preset scraper added
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"github.com/spf13/viper"
|
||||
|
||||
"git.amok.space/yevhen/resource-scraper/pkg/service"
|
||||
"git.amok.space/yevhen/resource-scraper/types/constant"
|
||||
)
|
||||
|
||||
type Handler struct {
|
||||
@@ -13,16 +16,17 @@ func New(services *service.Service) *Handler {
|
||||
}
|
||||
|
||||
func (h *Handler) InitConsole() string {
|
||||
return h.rutracker()
|
||||
switch viper.GetString(constant.CfgKeyScopeEnable) {
|
||||
case constant.ScopeRuTracker:
|
||||
return h.rutracker()
|
||||
case constant.ScopePrescene:
|
||||
return h.prescene()
|
||||
}
|
||||
|
||||
return "no scope chosen"
|
||||
}
|
||||
|
||||
//func (h *Handler) Base(services *service.Service) *Handler {
|
||||
// return &Handler{services: services}
|
||||
//}
|
||||
|
||||
//func (h *Handler) InitApi() *chi.Mux {
|
||||
// api := chi.NewRouter()
|
||||
// api.Get("/", web.ApiFallbackHandler)
|
||||
//
|
||||
// return api
|
||||
//}
|
||||
func (h *Handler) InitRoutes() string {
|
||||
//TODO:
|
||||
return "i am the web initiator"
|
||||
}
|
||||
|
||||
46
pkg/handler/prescene.go
Normal file
46
pkg/handler/prescene.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/viper"
|
||||
|
||||
"git.amok.space/yevhen/resource-scraper/types/constant"
|
||||
)
|
||||
|
||||
func (h *Handler) prescene() string {
|
||||
pagesToScrape := []string{"1"}
|
||||
levels := viper.GetInt(constant.CfgKeyLevelsToScrape)
|
||||
if levels > 1 {
|
||||
var q = (60.0 / 100.0) * (100.0 / viper.GetFloat64(constant.CfgKeyLevelsToScrape))
|
||||
minute := float64(time.Now().Minute())
|
||||
if minute < 1 {
|
||||
minute = 1
|
||||
}
|
||||
|
||||
fmt.Printf("FLOAT: %f, %f, %v\n", minute, q, math.Ceil(minute/q))
|
||||
|
||||
hour := time.Now().Hour()
|
||||
if hour == 1 {
|
||||
hour = 25
|
||||
} else if hour == 0 {
|
||||
hour = 1
|
||||
}
|
||||
|
||||
page := hour * int(math.Ceil(minute/q))
|
||||
pagesToScrape = append(pagesToScrape, strconv.Itoa(page))
|
||||
}
|
||||
|
||||
_, err := h.services.Prescene.GetPage(pagesToScrape)
|
||||
if err != nil {
|
||||
slog.Error("error occurred while getting page: ", "err", err.Error())
|
||||
}
|
||||
|
||||
fmt.Printf("%+v\n", pagesToScrape)
|
||||
|
||||
return "ps"
|
||||
}
|
||||
@@ -12,10 +12,12 @@ func (h *Handler) rutracker() string {
|
||||
key := fmt.Sprintf("topic.%v", time.Now().Hour())
|
||||
topics := viper.GetStringSlice(key)
|
||||
|
||||
err := h.services.Rutracker.GetTopic(topics)
|
||||
_, err := h.services.Rutracker.GetTopic(topics)
|
||||
if err != nil {
|
||||
slog.Error("error occurred while getting topic: ", err.Error())
|
||||
slog.Error("error occurred while getting topic: ", "err", err.Error())
|
||||
}
|
||||
|
||||
//fmt.Printf("%+v", rt)
|
||||
|
||||
return "rt"
|
||||
}
|
||||
|
||||
108
pkg/repository/prescene.go
Normal file
108
pkg/repository/prescene.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/go-shiori/dom"
|
||||
"github.com/golang-module/carbon/v2"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/spf13/viper"
|
||||
|
||||
"git.amok.space/yevhen/resource-scraper/helper/parser"
|
||||
"git.amok.space/yevhen/resource-scraper/pkg/repository/table"
|
||||
"git.amok.space/yevhen/resource-scraper/types/constant"
|
||||
"git.amok.space/yevhen/resource-scraper/types/model"
|
||||
)
|
||||
|
||||
type Prescene struct {
|
||||
db *sqlx.DB
|
||||
}
|
||||
|
||||
func NewPresceneRepository(db *sqlx.DB) *Prescene {
|
||||
return &Prescene{db: db}
|
||||
}
|
||||
|
||||
func (s *Prescene) GetPage(pageNumbers []string) ([]model.ExternalSources, error) {
|
||||
entries := make([]model.ExternalSources, 0)
|
||||
endpoint := viper.GetString(constant.CfgKeyEndpoint)
|
||||
//scope := viper.GetString(constant.CfgKeyScopeEnable)
|
||||
tags := viper.GetStringMapStringSlice("groups.tags")
|
||||
|
||||
for _, t := range pageNumbers {
|
||||
if t != "1" {
|
||||
endpoint += fmt.Sprintf(viper.GetString(constant.CfgKeyEndpointNext), t)
|
||||
}
|
||||
doc, err := parser.HTMLSourceFromURL(endpoint)
|
||||
//doc, err := parser.HTMLSourceFromURL("https://mdb.amok.space/$/scnlog.html")
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Parse error", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if doc == nil {
|
||||
slog.Warn("Document is nil", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, item := range dom.QuerySelectorAll(doc, ".post.type-post.category-flac.category-music") {
|
||||
var es model.ExternalSources
|
||||
columns := []string{"`type`", "type_id", "title", "eXsource", "releaser", "created"}
|
||||
|
||||
title := dom.QuerySelector(item, ".title")
|
||||
if title != nil {
|
||||
anchor := dom.QuerySelector(title, "h1 > a")
|
||||
if anchor != nil {
|
||||
es.Type = constant.ScopePrescene
|
||||
es.Title = dom.GetAttribute(anchor, "title")
|
||||
es.ExSource = dom.GetAttribute(anchor, "href")
|
||||
pattern := regexp.MustCompile(`(?is)-(\w+)$`)
|
||||
es.Releaser = pattern.FindStringSubmatch(es.Title)[1]
|
||||
|
||||
for flag, groups := range tags {
|
||||
if slices.Contains(groups, es.Releaser) {
|
||||
es.A = flag
|
||||
es.H = flag
|
||||
columns = append(columns, "a", "h")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if es.A == constant.TagIgnore {
|
||||
slog.Info("Skipped", "releaser", es.Releaser)
|
||||
continue
|
||||
}
|
||||
|
||||
localtime := dom.QuerySelector(title, "small > span.localtime")
|
||||
if localtime != nil {
|
||||
lc := dom.GetAttribute(localtime, "data-lttime")
|
||||
es.Created = carbon.Parse(lc)
|
||||
}
|
||||
}
|
||||
|
||||
cls := dom.GetAttribute(item, "class")
|
||||
pattern := regexp.MustCompile(`(?s)^post-(\d+)\spost`)
|
||||
es.TypeId, _ = strconv.Atoi(pattern.FindStringSubmatch(cls)[1])
|
||||
|
||||
//fmt.Println("====================== ", i, " ==============================")
|
||||
esModel := table.ExternalSources{Columns: columns}
|
||||
entry := esModel.InsertOnDuplicate(es, s.db)
|
||||
entries = append(entries, entry)
|
||||
|
||||
//fmt.Printf("%+v\n", entry)
|
||||
}
|
||||
|
||||
//fmt.Println("Sleeping...", j)
|
||||
time.Sleep(viper.GetDuration(constant.CfgKeySleepBeforeNextIteration))
|
||||
}
|
||||
|
||||
//fmt.Printf("scope: %v\n", scope)
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
@@ -2,22 +2,18 @@ package repository
|
||||
|
||||
import (
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/spf13/viper"
|
||||
|
||||
"git.amok.space/yevhen/resource-scraper/types"
|
||||
"git.amok.space/yevhen/resource-scraper/types/interface"
|
||||
)
|
||||
|
||||
type Repository struct {
|
||||
types.Rutracker
|
||||
_interface.Rutracker
|
||||
_interface.Prescene
|
||||
}
|
||||
|
||||
func New(db *sqlx.DB) *Repository {
|
||||
switch viper.GetString("scope") {
|
||||
case types.RuTracker:
|
||||
return &Repository{
|
||||
Rutracker: NewRutracker(db),
|
||||
}
|
||||
return &Repository{
|
||||
Rutracker: NewRutrackerRepository(db),
|
||||
Prescene: NewPresceneRepository(db),
|
||||
}
|
||||
|
||||
return &Repository{}
|
||||
}
|
||||
|
||||
@@ -8,60 +8,59 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/golang-module/carbon/v2"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/spf13/viper"
|
||||
|
||||
iface "git.amok.space/yevhen/resource-scraper/types"
|
||||
"git.amok.space/yevhen/resource-scraper/types/table"
|
||||
"git.amok.space/yevhen/resource-scraper/pkg/repository/table"
|
||||
"git.amok.space/yevhen/resource-scraper/types/constant"
|
||||
"git.amok.space/yevhen/resource-scraper/types/model"
|
||||
"git.amok.space/yevhen/resource-scraper/types/resource"
|
||||
)
|
||||
|
||||
type Rutracker struct {
|
||||
db *sqlx.DB
|
||||
}
|
||||
|
||||
func NewRutracker(db *sqlx.DB) *Rutracker {
|
||||
func NewRutrackerRepository(db *sqlx.DB) *Rutracker {
|
||||
return &Rutracker{db: db}
|
||||
}
|
||||
|
||||
func (s *Rutracker) GetTopic(topics []string) error {
|
||||
endpoint := viper.GetString("endpoint")
|
||||
func (s *Rutracker) GetTopic(topics []string) ([]model.ExternalSources, error) {
|
||||
endpoint := viper.GetString(constant.CfgKeyEndpoint)
|
||||
entries := make([]model.ExternalSources, 0)
|
||||
columns := []string{"`type`", "type_id", "title", "type_subsection_id", "releaser", "created"}
|
||||
|
||||
for _, t := range topics {
|
||||
topic, err := fetch(fmt.Sprintf(endpoint, t))
|
||||
if err != nil {
|
||||
slog.Error("couldn't parse topic data", "err", err.Error())
|
||||
return entries, err
|
||||
}
|
||||
|
||||
for i, e := range topic.Entry {
|
||||
var id int
|
||||
var es table.ExternalSources
|
||||
|
||||
for _, e := range topic.Entry {
|
||||
var es model.ExternalSources
|
||||
u, _ := url.Parse(e.Link.Href)
|
||||
es.Type = "rutracker"
|
||||
|
||||
es.Type = constant.ScopeRuTracker
|
||||
es.TypeId, _ = strconv.Atoi(u.Query().Get("t"))
|
||||
es.Title = e.Title
|
||||
es.TypeSubsectionId, _ = strconv.Atoi(t)
|
||||
es.Releaser = e.Author.Name
|
||||
es.Created, _ = time.Parse(time.RFC3339, e.Updated)
|
||||
created := es.Created.Format(iface.DateTimeFormat)
|
||||
es.Created = carbon.Parse(e.Updated)
|
||||
|
||||
query := fmt.Sprintf("INSERT INTO %s (`type`, type_id, title, type_subsection_id, releaser, created) VALUES (?, ?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE title=?, created=? RETURNING id", iface.ExternalSourcesTable)
|
||||
|
||||
row := s.db.QueryRow(query, es.Type, es.TypeId, es.Title, es.TypeSubsectionId, es.Releaser, created, es.Title, created)
|
||||
if err = row.Scan(&id); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println("<< ----------------- ", i+1, id, " ----------------- >>")
|
||||
esModel := table.ExternalSources{Columns: columns}
|
||||
entry := esModel.InsertOnDuplicate(es, s.db)
|
||||
entries = append(entries, entry)
|
||||
//fmt.Printf("%+v\n\n\n", entry)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
func fetch(endpoint string) (*iface.RutrackerAtomTopic, error) {
|
||||
func fetch(endpoint string) (*resource.RutrackerAtomTopic, error) {
|
||||
resp, err := http.Get(endpoint)
|
||||
if err != nil {
|
||||
slog.Error("couldn't fetch data", endpoint, err.Error())
|
||||
@@ -75,7 +74,7 @@ func fetch(endpoint string) (*iface.RutrackerAtomTopic, error) {
|
||||
}
|
||||
}(resp.Body)
|
||||
|
||||
topic := &iface.RutrackerAtomTopic{}
|
||||
topic := &resource.RutrackerAtomTopic{}
|
||||
|
||||
if err = xml.NewDecoder(resp.Body).Decode(topic); err != nil {
|
||||
return nil, err
|
||||
|
||||
34
pkg/repository/table/external_sources.go
Normal file
34
pkg/repository/table/external_sources.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package table
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
|
||||
"git.amok.space/yevhen/resource-scraper/types/constant"
|
||||
"git.amok.space/yevhen/resource-scraper/types/model"
|
||||
)
|
||||
|
||||
type ExternalSources struct {
|
||||
Columns []string
|
||||
}
|
||||
|
||||
func (f *ExternalSources) InsertOnDuplicate(es model.ExternalSources, db *sqlx.DB) model.ExternalSources {
|
||||
stmt := "INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE title=:title, created=:created RETURNING id"
|
||||
placeholders := strings.Join(f.Columns, ", :")
|
||||
placeholders = ":" + strings.Replace(placeholders, "`", "", -1)
|
||||
query := fmt.Sprintf(stmt, constant.ExternalSourcesTable, strings.Join(f.Columns, ", "), placeholders)
|
||||
|
||||
//fmt.Printf("%s\n", query)
|
||||
|
||||
if rows, err := db.NamedQuery(query, &es); err == nil {
|
||||
for rows.Next() {
|
||||
es.Error = rows.StructScan(&es)
|
||||
}
|
||||
} else {
|
||||
es.Error = err
|
||||
}
|
||||
|
||||
return es
|
||||
}
|
||||
18
pkg/service/prescene.go
Normal file
18
pkg/service/prescene.go
Normal file
@@ -0,0 +1,18 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"git.amok.space/yevhen/resource-scraper/types/interface"
|
||||
"git.amok.space/yevhen/resource-scraper/types/model"
|
||||
)
|
||||
|
||||
type PresceneService struct {
|
||||
repo _interface.Prescene
|
||||
}
|
||||
|
||||
func NewPresceneService(repo _interface.Prescene) *PresceneService {
|
||||
return &PresceneService{repo: repo}
|
||||
}
|
||||
|
||||
func (p *PresceneService) GetPage(pageNumbers []string) ([]model.ExternalSources, error) {
|
||||
return p.repo.GetPage(pageNumbers)
|
||||
}
|
||||
@@ -1,17 +1,18 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
iface "git.amok.space/yevhen/resource-scraper/types"
|
||||
"git.amok.space/yevhen/resource-scraper/types/interface"
|
||||
"git.amok.space/yevhen/resource-scraper/types/model"
|
||||
)
|
||||
|
||||
type RutrackerService struct {
|
||||
repo iface.Rutracker
|
||||
repo _interface.Rutracker
|
||||
}
|
||||
|
||||
func NewRutrackerService(repo iface.Rutracker) *RutrackerService {
|
||||
func NewRutrackerService(repo _interface.Rutracker) *RutrackerService {
|
||||
return &RutrackerService{repo: repo}
|
||||
}
|
||||
|
||||
func (s *RutrackerService) GetTopic(topic []string) error {
|
||||
func (s *RutrackerService) GetTopic(topic []string) ([]model.ExternalSources, error) {
|
||||
return s.repo.GetTopic(topic)
|
||||
}
|
||||
|
||||
@@ -1,24 +1,18 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"github.com/spf13/viper"
|
||||
|
||||
"git.amok.space/yevhen/resource-scraper/pkg/repository"
|
||||
"git.amok.space/yevhen/resource-scraper/types"
|
||||
"git.amok.space/yevhen/resource-scraper/types/interface"
|
||||
)
|
||||
|
||||
type Service struct {
|
||||
types.Rutracker
|
||||
_interface.Rutracker
|
||||
_interface.Prescene
|
||||
}
|
||||
|
||||
func New(repos *repository.Repository) *Service {
|
||||
|
||||
switch viper.GetString("scope") {
|
||||
case types.RuTracker:
|
||||
return &Service{
|
||||
Rutracker: NewRutrackerService(repos.Rutracker),
|
||||
}
|
||||
return &Service{
|
||||
Rutracker: NewRutrackerService(repos.Rutracker),
|
||||
Prescene: NewPresceneService(repos.Prescene),
|
||||
}
|
||||
|
||||
return &Service{}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user