upadted to 1.24.0; fixed stb and spiddped doubles in precene

This commit is contained in:
2025-03-03 09:28:41 +02:00
parent 0ecf0ddec1
commit 203f834f65
41 changed files with 1058 additions and 219 deletions

26
pkg/handler/console.go Normal file
View File

@@ -0,0 +1,26 @@
package handler
import (
"fmt"
"slices"
"github.com/logrusorgru/aurora/v4"
"github.com/spf13/viper"
)
func (h *Handler) Console() string {
viper.SetDefault("env", "devel")
cmd := viper.GetString("create")
cmdList := viper.Sub("console.cmd")
allowCreate := cmdList.GetStringSlice("create")
if !slices.Contains(allowCreate, cmd) {
fmt.Printf("%s Not allowed command %s used\n", aurora.BgMagenta("[WARN]"), aurora.Magenta(cmd))
return ""
}
fmt.Printf("%v\n", slices.Contains(allowCreate, cmd))
return viper.GetString("env")
}

View File

@@ -1,6 +1,9 @@
package handler
import (
"reflect"
"github.com/iancoleman/strcase"
"github.com/spf13/viper"
"git.amok.space/yevhen/resource-scraper/pkg/service"
@@ -16,14 +19,13 @@ func New(services *service.Service) *Handler {
}
func (h *Handler) InitConsole() string {
switch viper.GetString(constant.CfgKeyScopeEnable) {
case constant.ScopeRuTracker:
return h.rutracker()
case constant.ScopePrescene:
return h.prescene()
}
methodName := strcase.ToCamel(viper.GetString(constant.FlagScopeEnable))
return "no scope chosen"
immutable := reflect.ValueOf(h)
method := immutable.MethodByName(methodName)
v := method.Call(nil)
return methodName + " launched, " + v[0].String() + "\n"
}
func (h *Handler) InitRoutes() string {

36
pkg/handler/info.go Normal file
View File

@@ -0,0 +1,36 @@
package handler
import (
"fmt"
"log"
"github.com/logrusorgru/aurora/v4"
"github.com/spf13/viper"
"github.com/mewkiz/flac"
"git.amok.space/yevhen/resource-scraper/types/constant"
)
func (h *Handler) Info() string {
md5()
fmt.Printf("%s: %s; %s: %s\n",
aurora.Cyan("ENV"),
viper.GetString(constant.FlagEnv), aurora.Cyan("SCOPE"), viper.GetString(constant.FlagScopeEnable))
return "info"
}
func md5() {
stream, err := flac.ParseFile("C:\\arm.amok.space\\.incoming\\Ancient Storm\\Forever and Never (2024)\\06 Old Mountain.flac")
if err != nil {
log.Fatal(err)
}
defer stream.Close()
fmt.Printf("unencoded audio md5sum: %032x\n", stream.Info.MD5sum[:])
fmt.Printf("Total number of inter-channel samples in the stream: %+v\n", stream.Info.NSamples)
for i, block := range stream.Blocks {
fmt.Printf("block %d: %v\n", i, block.Type)
}
}

View File

@@ -0,0 +1,5 @@
package handler
func (h *Handler) MetalArchives() string {
return "MetalArchives................."
}

View File

@@ -12,7 +12,7 @@ import (
"git.amok.space/yevhen/resource-scraper/types/constant"
)
func (h *Handler) prescene() string {
func (h *Handler) Prescene() string {
pagesToScrape := []string{"1"}
levels := viper.GetInt(constant.CfgKeyLevelsToScrape)
if levels > 1 {

View File

@@ -8,7 +8,7 @@ import (
"github.com/spf13/viper"
)
func (h *Handler) rutracker() string {
func (h *Handler) Rutracker() string {
key := fmt.Sprintf("topic.%v", time.Now().Hour())
topics := viper.GetStringSlice(key)

36
pkg/handler/stb.go Normal file
View File

@@ -0,0 +1,36 @@
package handler
import (
"errors"
"fmt"
"log/slog"
"github.com/spf13/viper"
"git.amok.space/yevhen/resource-scraper/types/constant"
)
func (h *Handler) STB() string {
endpoint := fmt.Sprintf("%s.%s", constant.ScopeShareTheBrutality, constant.CfgKeyEndpoint)
endpoint = viper.GetString(endpoint)
if endpoint == "" {
slog.Error("getting endpoint from config", "err", errors.New("no endpoint provided"))
return "stb"
}
es, ms := h.services.ShareTheBrutality.GetMail(endpoint)
//fmt.Printf("%+v\n", es)
ms.LogOut()
for _, record := range es {
fmt.Printf("%s %d: %s [#%s]\n", record.Created, record.Id, record.Title, record.Releaser)
}
/*if err != nil {
slog.Error("error occurred while getting topic: ", "err", err)
}*/
return fmt.Sprintf("Added %d records\n", len(es))
}

View File

@@ -1,11 +1,13 @@
package repository
import (
"errors"
"fmt"
"log/slog"
"regexp"
"slices"
"strconv"
"strings"
"time"
"github.com/go-shiori/dom"
@@ -30,79 +32,103 @@ func NewPresceneRepository(db *sqlx.DB) *Prescene {
func (s *Prescene) GetPage(pageNumbers []string) ([]model.ExternalSources, error) {
entries := make([]model.ExternalSources, 0)
endpoint := viper.GetString(constant.CfgKeyEndpoint)
//scope := viper.GetString(constant.CfgKeyScopeEnable)
tags := viper.GetStringMapStringSlice("groups.tags")
for _, t := range pageNumbers {
if t != "1" {
endpoint += fmt.Sprintf(viper.GetString(constant.CfgKeyEndpointNext), t)
}
doc, err := parser.HTMLSourceFromURL(endpoint)
//doc, err := parser.HTMLSourceFromURL("https://mdb.amok.space/$/scnlog.html")
if err != nil {
slog.Error("Parse error", "err", err)
continue
}
if doc == nil {
slog.Warn("Document is nil", "err", err)
continue
}
for _, item := range dom.QuerySelectorAll(doc, ".post.type-post.category-flac.category-music") {
var es model.ExternalSources
columns := []string{"`type`", "type_id", "title", "eXsource", "releaser", "created"}
title := dom.QuerySelector(item, ".title")
if title != nil {
anchor := dom.QuerySelector(title, "h1 > a")
if anchor != nil {
es.Type = constant.ScopePrescene
es.Title = dom.GetAttribute(anchor, "title")
es.ExSource = dom.GetAttribute(anchor, "href")
pattern := regexp.MustCompile(`(?is)-(\w+)$`)
es.Releaser = pattern.FindStringSubmatch(es.Title)[1]
for flag, groups := range tags {
if slices.Contains(groups, es.Releaser) {
es.A = flag
es.H = flag
columns = append(columns, "a", "h")
break
}
}
}
if es.A == constant.TagIgnore {
slog.Info("Skipped", "releaser", es.Releaser)
continue
}
localtime := dom.QuerySelector(title, "small > span.localtime")
if localtime != nil {
lc := dom.GetAttribute(localtime, "data-lttime")
es.Created = carbon.Parse(lc)
}
uri := viper.GetString(constant.FlagSingleUri)
if uri != "" {
url := fmt.Sprintf("%s/%s", strings.Trim(endpoint, "/"), strings.Trim(uri, "/"))
result, _ := parseUrl(url, s.db)
entries = append(entries, result...)
} else {
for _, t := range pageNumbers {
if t != "1" {
endpoint += fmt.Sprintf(viper.GetString(constant.CfgKeyEndpointNext), t)
}
cls := dom.GetAttribute(item, "class")
pattern := regexp.MustCompile(`(?s)^post-(\d+)\spost`)
es.TypeId, _ = strconv.Atoi(pattern.FindStringSubmatch(cls)[1])
//doc, err := parser.HTMLSourceFromURL("https://mdb.amok.space/$/scnlog.html")
if result, err := parseUrl(endpoint, s.db); err == nil {
entries = append(entries, result...)
} else {
slog.Error("parsing url", "err", err)
}
//fmt.Println("====================== ", i, " ==============================")
esModel := table.ExternalSources{Columns: columns}
entry := esModel.InsertOnDuplicate(es, s.db)
entries = append(entries, entry)
//fmt.Printf("%+v\n", entry)
//fmt.Println("Sleeping...", j)
time.Sleep(viper.GetDuration(constant.CfgKeySleepBeforeNextIteration))
}
//fmt.Println("Sleeping...", j)
time.Sleep(viper.GetDuration(constant.CfgKeySleepBeforeNextIteration))
}
//fmt.Printf("scope: %v\n", scope)
return entries, nil
}
func parseUrl(endpoint string, db *sqlx.DB) ([]model.ExternalSources, error) {
entries := make([]model.ExternalSources, 0)
tags := viper.GetStringMapStringSlice("groups.tags")
slog.Info("singleton", "url", endpoint)
doc, err := parser.HTMLSourceFromURL(endpoint)
if err != nil {
return nil, err
}
if doc == nil {
return nil, errors.New("document is nil")
}
var validID = regexp.MustCompile(`-\d+\/$`)
for i, item := range dom.QuerySelectorAll(doc, ".post.type-post.category-flac.category-music") {
var es model.ExternalSources
columns := []string{"`type`", "type_id", "title", "eXsource", "releaser", "created"}
title := dom.QuerySelector(item, ".title")
if title != nil {
anchor := dom.QuerySelector(title, "h1 > a")
if anchor != nil {
es.Type = constant.ScopePrescene
es.Title = dom.GetAttribute(anchor, "title")
if es.Title == "Auto Draft" {
slog.Info("Skipped", "title", es.Title)
continue
}
es.ExSource = dom.GetAttribute(anchor, "href")
if validID.MatchString(es.ExSource) {
continue
}
pattern := regexp.MustCompile(`(?is)-(\w+)$`)
es.Releaser = pattern.FindStringSubmatch(es.Title)[1]
for flag, groups := range tags {
if slices.Contains(groups, es.Releaser) {
es.A = flag
es.H = flag
columns = append(columns, "a", "h")
break
}
}
}
if es.A == constant.TagIgnore {
slog.Info("Skipped", "releaser", es.Releaser)
continue
}
localtime := dom.QuerySelector(title, "small > span.localtime")
if localtime != nil {
lc := dom.GetAttribute(localtime, "data-lttime")
es.Created = carbon.Parse(lc)
}
}
cls := dom.GetAttribute(item, "class")
pattern := regexp.MustCompile(`(?s)^post-(\d+)\spost`)
es.TypeId, _ = strconv.Atoi(pattern.FindStringSubmatch(cls)[1])
esModel := table.ExternalSources{Columns: columns}
entry := esModel.InsertOnDuplicate(es, db)
entries = append(entries, entry)
fmt.Println("====================== ", i, " ==============================")
fmt.Printf("%+v\n", entry)
}
return entries, nil
}

View File

@@ -9,11 +9,13 @@ import (
type Repository struct {
_interface.Rutracker
_interface.Prescene
_interface.ShareTheBrutality
}
func New(db *sqlx.DB) *Repository {
return &Repository{
Rutracker: NewRutrackerRepository(db),
Prescene: NewPresceneRepository(db),
Rutracker: NewRutrackerRepository(db),
Prescene: NewPresceneRepository(db),
ShareTheBrutality: NewShareTheBrutalityRepository(db),
}
}

154
pkg/repository/stb.go Normal file
View File

@@ -0,0 +1,154 @@
package repository
import (
"fmt"
"net/url"
"regexp"
"strconv"
"strings"
"github.com/emersion/go-imap/v2"
"github.com/go-shiori/dom"
"github.com/golang-module/carbon/v2"
"github.com/jmoiron/sqlx"
"github.com/spf13/viper"
"golang.org/x/net/html"
"git.amok.space/yevhen/resource-scraper/helper/parser"
"git.amok.space/yevhen/resource-scraper/helper/sugar"
"git.amok.space/yevhen/resource-scraper/internal/mail"
_table "git.amok.space/yevhen/resource-scraper/pkg/repository/table"
"git.amok.space/yevhen/resource-scraper/types/constant"
"git.amok.space/yevhen/resource-scraper/types/model"
)
type ShareTheBrutality struct {
scope string
EmailService mail.EmailService
db *sqlx.DB
}
func NewShareTheBrutalityRepository(db *sqlx.DB) *ShareTheBrutality {
return &ShareTheBrutality{db: db, scope: constant.ScopeShareTheBrutality}
}
func (s *ShareTheBrutality) GetMail(email string) ([]model.ExternalSources, *mail.EmailService) {
s.EmailService = mail.EmailService{
User: email,
}
s.EmailService.Login()
mailboxes := fmt.Sprintf("%s.mailboxes", s.scope)
criteria := fmt.Sprintf("%s.search-criteria", s.scope)
searchCriteria := &imap.SearchCriteria{
Text: viper.GetStringSlice(criteria),
}
//now := carbon.Now()
s.EmailService.ListMessages(viper.GetStringSlice(mailboxes), searchCriteria)
//box.CreateMailbox("INBOX/Processed")
//s.EmailService.CreateMailbox("Processed/Succeed")
//s.EmailService.CreateMailbox("Processed/Failed")
//s.EmailService.CreateMailbox("Processed/Suspicious")
//s.EmailService.MailboxesList()
entries := s.Processing(viper.GetStringMapString(fmt.Sprintf("%s.sender", s.scope)))
return entries, &s.EmailService
}
func (s *ShareTheBrutality) Processing(sender map[string]string) []model.ExternalSources {
columns := []string{"`type`", "type_id", "title", "type_subsection_id", "releaser", "created", "fingerprint"}
entriesBatched := make([]model.ExternalSources, 0)
if len(s.EmailService.Messages) == 0 {
return entriesBatched
}
//tmpPath := viper.GetString(fmt.Sprintf("%s.storage.filepath", s.scope))
dbType := viper.GetString(fmt.Sprintf("%s.db-type", s.scope))
regexPatterns := viper.GetStringMapString(fmt.Sprintf("%s.regex", s.scope))
topics := viper.GetStringMap(fmt.Sprintf("%s.topics", s.scope))
for _, msg := range s.EmailService.Messages {
entries := make([]model.ExternalSources, 0)
from := msg.Envelope.From[0]
subject := msg.Envelope.Subject
if !(from.Mailbox == sender["mailbox"] && from.Host == sender["host"] && subject == sender["subject"]) {
continue
}
for _, section := range msg.BodySection {
//sugar.WriteDataToTmpFile(msg.BodySection, tmpPath)
if section.Bytes != nil {
doc, err := parser.HTMLSource(string(section.Bytes))
if s.EmailService.CheckErr("parsing message body", err) {
continue
}
table := dom.QuerySelector(doc, "body > table:nth-of-type(1n) table:nth-of-type(1n) table:nth-of-type(2n) > tbody")
if table == nil {
s.EmailService.Warn("dom.QuerySelector had not queried any data, returned nil")
continue
}
var es model.ExternalSources
for _, td := range dom.QuerySelectorAll(table, "tr > td:nth-child(2)") {
anchor := dom.QuerySelector(td, "h2 > a")
if anchor == nil {
s.EmailService.Warn("dom.QuerySelector couldn't find title")
continue
}
es.Title = sugar.SqueezeLine(dom.InnerHTML(anchor))
u, err := url.Parse(dom.GetAttribute(anchor, "href"))
if s.EmailService.CheckErr("parsing url", err) {
continue
}
es.Fingerprint = u.RequestURI()
pattern := regexp.MustCompile(regexPatterns["type-id"])
typeIdMatch := pattern.FindStringSubmatch(es.Fingerprint)
if len(typeIdMatch) != 2 {
s.EmailService.Warn("Regexp => typeIdMatch not matched")
continue
}
es.TypeId, _ = strconv.Atoi(typeIdMatch[1])
sourceData := dom.QuerySelector(td, "p:first-child")
if sourceData == nil {
s.EmailService.Warn("dom.QuerySelector couldn't find sourceData in paragraph")
continue
}
sourceDataString := html.UnescapeString(sugar.SqueezeLine(dom.InnerHTML(sourceData)))
pattern = regexp.MustCompile(regexPatterns["who-genre"])
sourceDataMatch := pattern.FindStringSubmatch(sourceDataString)
if len(sourceDataMatch) != 3 {
s.EmailService.Warn("Regexp => sourceData not matched")
continue
}
es.Releaser = sourceDataMatch[1]
es.Created = carbon.Parse(msg.Envelope.Date.String())
es.Type = dbType
genre := strings.ToLower(sourceDataMatch[2])
es.TypeSubsectionId = topics[genre].(int)
entries = append(entries, es)
}
result, status := _table.BatchInsertOnDuplicate(entries, s.db, columns)
if status != constant.StatusFailed {
entriesBatched = append(entriesBatched, result...)
}
s.EmailService.MoveMessageToMailbox(msg, status)
}
}
}
return entriesBatched
}

View File

@@ -2,10 +2,14 @@ package table
import (
"fmt"
"log/slog"
"slices"
"strings"
"github.com/jmoiron/sqlx"
"github.com/logrusorgru/aurora/v4"
"git.amok.space/yevhen/resource-scraper/helper/thither"
"git.amok.space/yevhen/resource-scraper/types/constant"
"git.amok.space/yevhen/resource-scraper/types/model"
)
@@ -20,8 +24,6 @@ func (f *ExternalSources) InsertOnDuplicate(es model.ExternalSources, db *sqlx.D
placeholders = ":" + strings.Replace(placeholders, "`", "", -1)
query := fmt.Sprintf(stmt, constant.ExternalSourcesTable, strings.Join(f.Columns, ", "), placeholders)
//fmt.Printf("%s\n", query)
if rows, err := db.NamedQuery(query, &es); err == nil {
for rows.Next() {
es.Error = rows.StructScan(&es)
@@ -32,3 +34,45 @@ func (f *ExternalSources) InsertOnDuplicate(es model.ExternalSources, db *sqlx.D
return es
}
func BatchInsertOnDuplicate(entries []model.ExternalSources, db *sqlx.DB, columns []string) ([]model.ExternalSources, string) {
es := &ExternalSources{Columns: columns}
typeIds := es.GetTypeIds(entries, db)
var status string
errCount := 0
for i := 0; i < len(entries); i++ {
entry := es.InsertOnDuplicate(entries[i], db)
if entry.Error != nil {
slog.Error("insert/update entry", "err", entry.Error)
errCount++
}
if !slices.Contains(typeIds, entry.TypeId) {
fmt.Printf("%s: %s\n", aurora.Green("ADDED"), aurora.White(entry.Title))
}
entries[i] = es.InsertOnDuplicate(entries[i], db)
}
if errCount == 0 {
status = constant.StatusSucceed
} else if errCount > 0 && errCount == len(entries) {
status = constant.StatusFailed
} else {
status = constant.StatusSuspicious
}
return entries, status
}
func (f *ExternalSources) GetTypeIds(entries []model.ExternalSources, db *sqlx.DB) []int {
var typeIds []int
ids := thither.FieldValueToStrSlice(entries, "TypeId")
query := fmt.Sprintf("SELECT type_id FROM %s WHERE `type` = '%s' AND type_id IN (%s) LIMIT %d", constant.ExternalSourcesTable, entries[0].Type, strings.Join(ids, ","), len(ids))
err := db.Select(&typeIds, query)
if err != nil {
slog.Error("getting type ids", "err", err)
}
return typeIds
}

7
pkg/service/info.go Normal file
View File

@@ -0,0 +1,7 @@
package service
type InfoService struct{}
func NewInfoService() *InfoService {
return &InfoService{}
}

View File

@@ -0,0 +1 @@
package service

View File

@@ -8,11 +8,15 @@ import (
type Service struct {
_interface.Rutracker
_interface.Prescene
_interface.Info
_interface.ShareTheBrutality
}
func New(repos *repository.Repository) *Service {
return &Service{
Rutracker: NewRutrackerService(repos.Rutracker),
Prescene: NewPresceneService(repos.Prescene),
Rutracker: NewRutrackerService(repos.Rutracker),
Prescene: NewPresceneService(repos.Prescene),
Info: NewInfoService(),
ShareTheBrutality: NewShareTheBrutalityService(repos.ShareTheBrutality),
}
}

19
pkg/service/stb.go Normal file
View File

@@ -0,0 +1,19 @@
package service
import (
"git.amok.space/yevhen/resource-scraper/internal/mail"
"git.amok.space/yevhen/resource-scraper/types/interface"
"git.amok.space/yevhen/resource-scraper/types/model"
)
type ShareTheBrutalityService struct {
repo _interface.ShareTheBrutality
}
func NewShareTheBrutalityService(repo _interface.ShareTheBrutality) *ShareTheBrutalityService {
return &ShareTheBrutalityService{repo: repo}
}
func (stb *ShareTheBrutalityService) GetMail(email string) ([]model.ExternalSources, *mail.EmailService) {
return stb.repo.GetMail(email)
}