upadted to 1.24.0; fixed stb and spiddped doubles in precene
This commit is contained in:
@@ -1,11 +1,13 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-shiori/dom"
|
||||
@@ -30,79 +32,103 @@ func NewPresceneRepository(db *sqlx.DB) *Prescene {
|
||||
func (s *Prescene) GetPage(pageNumbers []string) ([]model.ExternalSources, error) {
|
||||
entries := make([]model.ExternalSources, 0)
|
||||
endpoint := viper.GetString(constant.CfgKeyEndpoint)
|
||||
//scope := viper.GetString(constant.CfgKeyScopeEnable)
|
||||
tags := viper.GetStringMapStringSlice("groups.tags")
|
||||
|
||||
for _, t := range pageNumbers {
|
||||
if t != "1" {
|
||||
endpoint += fmt.Sprintf(viper.GetString(constant.CfgKeyEndpointNext), t)
|
||||
}
|
||||
doc, err := parser.HTMLSourceFromURL(endpoint)
|
||||
//doc, err := parser.HTMLSourceFromURL("https://mdb.amok.space/$/scnlog.html")
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Parse error", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if doc == nil {
|
||||
slog.Warn("Document is nil", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, item := range dom.QuerySelectorAll(doc, ".post.type-post.category-flac.category-music") {
|
||||
var es model.ExternalSources
|
||||
columns := []string{"`type`", "type_id", "title", "eXsource", "releaser", "created"}
|
||||
|
||||
title := dom.QuerySelector(item, ".title")
|
||||
if title != nil {
|
||||
anchor := dom.QuerySelector(title, "h1 > a")
|
||||
if anchor != nil {
|
||||
es.Type = constant.ScopePrescene
|
||||
es.Title = dom.GetAttribute(anchor, "title")
|
||||
es.ExSource = dom.GetAttribute(anchor, "href")
|
||||
pattern := regexp.MustCompile(`(?is)-(\w+)$`)
|
||||
es.Releaser = pattern.FindStringSubmatch(es.Title)[1]
|
||||
|
||||
for flag, groups := range tags {
|
||||
if slices.Contains(groups, es.Releaser) {
|
||||
es.A = flag
|
||||
es.H = flag
|
||||
columns = append(columns, "a", "h")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if es.A == constant.TagIgnore {
|
||||
slog.Info("Skipped", "releaser", es.Releaser)
|
||||
continue
|
||||
}
|
||||
|
||||
localtime := dom.QuerySelector(title, "small > span.localtime")
|
||||
if localtime != nil {
|
||||
lc := dom.GetAttribute(localtime, "data-lttime")
|
||||
es.Created = carbon.Parse(lc)
|
||||
}
|
||||
uri := viper.GetString(constant.FlagSingleUri)
|
||||
if uri != "" {
|
||||
url := fmt.Sprintf("%s/%s", strings.Trim(endpoint, "/"), strings.Trim(uri, "/"))
|
||||
result, _ := parseUrl(url, s.db)
|
||||
entries = append(entries, result...)
|
||||
} else {
|
||||
for _, t := range pageNumbers {
|
||||
if t != "1" {
|
||||
endpoint += fmt.Sprintf(viper.GetString(constant.CfgKeyEndpointNext), t)
|
||||
}
|
||||
|
||||
cls := dom.GetAttribute(item, "class")
|
||||
pattern := regexp.MustCompile(`(?s)^post-(\d+)\spost`)
|
||||
es.TypeId, _ = strconv.Atoi(pattern.FindStringSubmatch(cls)[1])
|
||||
//doc, err := parser.HTMLSourceFromURL("https://mdb.amok.space/$/scnlog.html")
|
||||
if result, err := parseUrl(endpoint, s.db); err == nil {
|
||||
entries = append(entries, result...)
|
||||
} else {
|
||||
slog.Error("parsing url", "err", err)
|
||||
}
|
||||
|
||||
//fmt.Println("====================== ", i, " ==============================")
|
||||
esModel := table.ExternalSources{Columns: columns}
|
||||
entry := esModel.InsertOnDuplicate(es, s.db)
|
||||
entries = append(entries, entry)
|
||||
|
||||
//fmt.Printf("%+v\n", entry)
|
||||
//fmt.Println("Sleeping...", j)
|
||||
time.Sleep(viper.GetDuration(constant.CfgKeySleepBeforeNextIteration))
|
||||
}
|
||||
|
||||
//fmt.Println("Sleeping...", j)
|
||||
time.Sleep(viper.GetDuration(constant.CfgKeySleepBeforeNextIteration))
|
||||
}
|
||||
|
||||
//fmt.Printf("scope: %v\n", scope)
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
func parseUrl(endpoint string, db *sqlx.DB) ([]model.ExternalSources, error) {
|
||||
entries := make([]model.ExternalSources, 0)
|
||||
tags := viper.GetStringMapStringSlice("groups.tags")
|
||||
slog.Info("singleton", "url", endpoint)
|
||||
doc, err := parser.HTMLSourceFromURL(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if doc == nil {
|
||||
return nil, errors.New("document is nil")
|
||||
}
|
||||
var validID = regexp.MustCompile(`-\d+\/$`)
|
||||
|
||||
for i, item := range dom.QuerySelectorAll(doc, ".post.type-post.category-flac.category-music") {
|
||||
var es model.ExternalSources
|
||||
columns := []string{"`type`", "type_id", "title", "eXsource", "releaser", "created"}
|
||||
|
||||
title := dom.QuerySelector(item, ".title")
|
||||
if title != nil {
|
||||
anchor := dom.QuerySelector(title, "h1 > a")
|
||||
if anchor != nil {
|
||||
es.Type = constant.ScopePrescene
|
||||
es.Title = dom.GetAttribute(anchor, "title")
|
||||
if es.Title == "Auto Draft" {
|
||||
slog.Info("Skipped", "title", es.Title)
|
||||
continue
|
||||
}
|
||||
|
||||
es.ExSource = dom.GetAttribute(anchor, "href")
|
||||
if validID.MatchString(es.ExSource) {
|
||||
continue
|
||||
}
|
||||
|
||||
pattern := regexp.MustCompile(`(?is)-(\w+)$`)
|
||||
es.Releaser = pattern.FindStringSubmatch(es.Title)[1]
|
||||
|
||||
for flag, groups := range tags {
|
||||
if slices.Contains(groups, es.Releaser) {
|
||||
es.A = flag
|
||||
es.H = flag
|
||||
columns = append(columns, "a", "h")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if es.A == constant.TagIgnore {
|
||||
slog.Info("Skipped", "releaser", es.Releaser)
|
||||
continue
|
||||
}
|
||||
|
||||
localtime := dom.QuerySelector(title, "small > span.localtime")
|
||||
if localtime != nil {
|
||||
lc := dom.GetAttribute(localtime, "data-lttime")
|
||||
es.Created = carbon.Parse(lc)
|
||||
}
|
||||
}
|
||||
|
||||
cls := dom.GetAttribute(item, "class")
|
||||
pattern := regexp.MustCompile(`(?s)^post-(\d+)\spost`)
|
||||
es.TypeId, _ = strconv.Atoi(pattern.FindStringSubmatch(cls)[1])
|
||||
|
||||
esModel := table.ExternalSources{Columns: columns}
|
||||
entry := esModel.InsertOnDuplicate(es, db)
|
||||
entries = append(entries, entry)
|
||||
|
||||
fmt.Println("====================== ", i, " ==============================")
|
||||
fmt.Printf("%+v\n", entry)
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user