135 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			135 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package repository
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"log/slog"
 | |
| 	"regexp"
 | |
| 	"slices"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/dromara/carbon/v2"
 | |
| 	"github.com/go-shiori/dom"
 | |
| 	"github.com/jmoiron/sqlx"
 | |
| 	"github.com/spf13/viper"
 | |
| 
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/helper/parser"
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/pkg/repository/table"
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/types/constant"
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/types/model"
 | |
| )
 | |
| 
 | |
| type Prescene struct {
 | |
| 	db *sqlx.DB
 | |
| }
 | |
| 
 | |
| func NewPresceneRepository(db *sqlx.DB) *Prescene {
 | |
| 	return &Prescene{db: db}
 | |
| }
 | |
| 
 | |
| func (s *Prescene) GetPage(pageNumbers []string) ([]model.ExternalSources, error) {
 | |
| 	entries := make([]model.ExternalSources, 0)
 | |
| 	endpoint := viper.GetString(constant.CfgKeyEndpoint)
 | |
| 
 | |
| 	uri := viper.GetString(constant.FlagSingleUri)
 | |
| 	if uri != "" {
 | |
| 		url := fmt.Sprintf("%s/%s", strings.Trim(endpoint, "/"), strings.Trim(uri, "/"))
 | |
| 		result, _ := parseUrl(url, s.db)
 | |
| 		entries = append(entries, result...)
 | |
| 	} else {
 | |
| 		for _, t := range pageNumbers {
 | |
| 			if t != "1" {
 | |
| 				endpoint += fmt.Sprintf(viper.GetString(constant.CfgKeyEndpointNext), t)
 | |
| 			}
 | |
| 
 | |
| 			//doc, err := parser.HTMLSourceFromURL("https://mdb.kplus.net.ua	/$/scnlog.html")
 | |
| 			if result, err := parseUrl(endpoint, s.db); err == nil {
 | |
| 				entries = append(entries, result...)
 | |
| 			} else {
 | |
| 				slog.Error("parsing url", "err", err)
 | |
| 			}
 | |
| 
 | |
| 			//fmt.Println("Sleeping...", j)
 | |
| 			time.Sleep(viper.GetDuration(constant.CfgKeySleepBeforeNextIteration))
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return entries, nil
 | |
| }
 | |
| 
 | |
| func parseUrl(endpoint string, db *sqlx.DB) ([]model.ExternalSources, error) {
 | |
| 	entries := make([]model.ExternalSources, 0)
 | |
| 	tags := viper.GetStringMapStringSlice("groups.tags")
 | |
| 	slog.Info("singleton", "url", endpoint)
 | |
| 	doc, err := parser.HTMLSourceFromURL(endpoint)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	if doc == nil {
 | |
| 		return nil, errors.New("document is nil")
 | |
| 	}
 | |
| 	var validID = regexp.MustCompile(`-\d+\/$`)
 | |
| 
 | |
| 	for i, item := range dom.QuerySelectorAll(doc, ".post.type-post.category-flac.category-music") {
 | |
| 		var es model.ExternalSources
 | |
| 		columns := []string{"`type`", "type_id", "title", "eXsource", "releaser", "created"}
 | |
| 
 | |
| 		title := dom.QuerySelector(item, ".title")
 | |
| 		if title != nil {
 | |
| 			anchor := dom.QuerySelector(title, "h1 > a")
 | |
| 			if anchor != nil {
 | |
| 				es.Type = constant.ScopePrescene
 | |
| 				es.Title = dom.GetAttribute(anchor, "title")
 | |
| 				if es.Title == "Auto Draft" {
 | |
| 					slog.Info("Skipped", "title", es.Title)
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				es.ExSource = dom.GetAttribute(anchor, "href")
 | |
| 				if validID.MatchString(es.ExSource) {
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				pattern := regexp.MustCompile(`(?is)-(\w+)$`)
 | |
| 				es.Releaser = pattern.FindStringSubmatch(es.Title)[1]
 | |
| 
 | |
| 				for flag, groups := range tags {
 | |
| 					if slices.Contains(groups, es.Releaser) {
 | |
| 						es.A = flag
 | |
| 						es.H = flag
 | |
| 						columns = append(columns, "a", "h")
 | |
| 						break
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			if es.A == constant.TagIgnore {
 | |
| 				slog.Info("Skipped", "releaser", es.Releaser)
 | |
| 				continue
 | |
| 			}
 | |
| 
 | |
| 			localtime := dom.QuerySelector(title, "small > span.localtime")
 | |
| 			if localtime != nil {
 | |
| 				lc := dom.GetAttribute(localtime, "data-lttime")
 | |
| 				es.Created = carbon.Parse(lc, "Europe/Kyiv")
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		cls := dom.GetAttribute(item, "class")
 | |
| 		pattern := regexp.MustCompile(`(?s)^post-(\d+)\spost`)
 | |
| 		es.TypeId, _ = strconv.Atoi(pattern.FindStringSubmatch(cls)[1])
 | |
| 
 | |
| 		esModel := table.ExternalSources{Columns: columns}
 | |
| 		entry := esModel.InsertOnDuplicate(es, db)
 | |
| 		entries = append(entries, entry)
 | |
| 
 | |
| 		fmt.Println("====================== ", i, " ==============================")
 | |
| 		fmt.Printf("%+v\n", entry)
 | |
| 	}
 | |
| 
 | |
| 	return entries, nil
 | |
| }
 |