162 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			162 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package repository
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"net/url"
 | |
| 	"regexp"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 
 | |
| 	"github.com/dromara/carbon/v2"
 | |
| 	"github.com/emersion/go-imap/v2"
 | |
| 	"github.com/go-shiori/dom"
 | |
| 	"github.com/jmoiron/sqlx"
 | |
| 	"github.com/spf13/viper"
 | |
| 	"golang.org/x/net/html"
 | |
| 
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/helper/parser"
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/helper/sugar"
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/internal/mail"
 | |
| 	_table "git.kplus.net.ua/yevhen/resource-scraper/pkg/repository/table"
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/types/constant"
 | |
| 	"git.kplus.net.ua/yevhen/resource-scraper/types/model"
 | |
| )
 | |
| 
 | |
| type ShareTheBrutality struct {
 | |
| 	scope        string
 | |
| 	EmailService mail.EmailService
 | |
| 	db           *sqlx.DB
 | |
| }
 | |
| 
 | |
| func NewShareTheBrutalityRepository(db *sqlx.DB) *ShareTheBrutality {
 | |
| 	return &ShareTheBrutality{db: db, scope: constant.ScopeShareTheBrutality}
 | |
| }
 | |
| 
 | |
| func (s *ShareTheBrutality) GetMail(email string) ([]model.ExternalSources, *mail.EmailService) {
 | |
| 	s.EmailService = mail.EmailService{
 | |
| 		User: email,
 | |
| 	}
 | |
| 
 | |
| 	s.EmailService.Login()
 | |
| 
 | |
| 	mailboxes := fmt.Sprintf("%s.mailboxes", s.scope)
 | |
| 	criteria := fmt.Sprintf("%s.search-criteria", s.scope)
 | |
| 	searchCriteria := &imap.SearchCriteria{
 | |
| 		Text: viper.GetStringSlice(criteria),
 | |
| 	}
 | |
| 	//now := carbon.Now()
 | |
| 	s.EmailService.ListMessages(viper.GetStringSlice(mailboxes), searchCriteria)
 | |
| 	//box.CreateMailbox("INBOX/Processed")
 | |
| 	//s.EmailService.CreateMailbox("Processed")
 | |
| 	//s.EmailService.CreateMailbox("Succeed")
 | |
| 	//s.EmailService.CreateMailbox("Processed/Succeed")
 | |
| 	//s.EmailService.DeleteMailbox("Processed")
 | |
| 	//s.EmailService.CreateMailbox("Processed/Failed")
 | |
| 	//s.EmailService.CreateMailbox("Processed/Suspicious")
 | |
| 	//s.EmailService.MailboxesList()
 | |
| 
 | |
| 	entries := s.Processing(viper.GetStringMapString(fmt.Sprintf("%s.sender", s.scope)))
 | |
| 
 | |
| 	return entries, &s.EmailService
 | |
| }
 | |
| 
 | |
| func (s *ShareTheBrutality) Processing(sender map[string]string) []model.ExternalSources {
 | |
| 	columns := []string{"`type`", "type_id", "title", "type_subsection_id", "releaser", "created", "fingerprint"}
 | |
| 	entriesBatched := make([]model.ExternalSources, 0)
 | |
| 	if len(s.EmailService.Messages) == 0 {
 | |
| 		return entriesBatched
 | |
| 	}
 | |
| 
 | |
| 	//tmpPath := viper.GetString(fmt.Sprintf("%s.storage.filepath", s.scope))
 | |
| 	dbType := viper.GetString(fmt.Sprintf("%s.db-type", s.scope))
 | |
| 	regexPatterns := viper.GetStringMapString(fmt.Sprintf("%s.regex", s.scope))
 | |
| 	topics := viper.GetStringMap(fmt.Sprintf("%s.topics", s.scope))
 | |
| 
 | |
| 	for _, msg := range s.EmailService.Messages {
 | |
| 		entries := make([]model.ExternalSources, 0)
 | |
| 		from := msg.Envelope.From[0]
 | |
| 		subject := msg.Envelope.Subject
 | |
| 
 | |
| 		if !(from.Mailbox == sender["mailbox"] && from.Host == sender["host"] && subject == sender["subject"]) {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		for _, section := range msg.BodySection {
 | |
| 			//sugar.WriteDataToTmpFile(msg.BodySection, tmpPath)
 | |
| 
 | |
| 			if section.Bytes != nil {
 | |
| 				doc, err := parser.HTMLSource(string(section.Bytes))
 | |
| 				if s.EmailService.CheckErr("parsing message body", err) {
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				table := dom.QuerySelector(doc, "body > table:nth-of-type(1n) table:nth-of-type(1n) table:nth-of-type(2n) > tbody")
 | |
| 				if table == nil {
 | |
| 					s.EmailService.Warn("dom.QuerySelector had not queried any data, returned nil")
 | |
| 					continue
 | |
| 				}
 | |
| 
 | |
| 				var es model.ExternalSources
 | |
| 				for _, td := range dom.QuerySelectorAll(table, "tr > td:nth-child(2)") {
 | |
| 					anchor := dom.QuerySelector(td, "h2 > a")
 | |
| 					if anchor == nil {
 | |
| 						s.EmailService.Warn("dom.QuerySelector couldn't find title")
 | |
| 						continue
 | |
| 					}
 | |
| 					es.Title = sugar.SqueezeLine(dom.InnerHTML(anchor))
 | |
| 
 | |
| 					u, err := url.Parse(dom.GetAttribute(anchor, "href"))
 | |
| 					if s.EmailService.CheckErr("parsing url", err) {
 | |
| 						continue
 | |
| 					}
 | |
| 
 | |
| 					es.Fingerprint = u.RequestURI()
 | |
| 					pattern := regexp.MustCompile(regexPatterns["type-id"])
 | |
| 					typeIdMatch := pattern.FindStringSubmatch(es.Fingerprint)
 | |
| 					if len(typeIdMatch) != 2 {
 | |
| 						s.EmailService.Warn("Regexp => typeIdMatch not matched")
 | |
| 						continue
 | |
| 					}
 | |
| 					es.TypeId, _ = strconv.Atoi(typeIdMatch[1])
 | |
| 
 | |
| 					sourceData := dom.QuerySelector(td, "p:first-child")
 | |
| 					if sourceData == nil {
 | |
| 						s.EmailService.Warn("dom.QuerySelector couldn't find sourceData in paragraph")
 | |
| 						continue
 | |
| 					}
 | |
| 					sourceDataString := html.UnescapeString(sugar.SqueezeLine(dom.InnerHTML(sourceData)))
 | |
| 					pattern = regexp.MustCompile(regexPatterns["who-genre"])
 | |
| 					sourceDataMatch := pattern.FindStringSubmatch(sourceDataString)
 | |
| 
 | |
| 					if len(sourceDataMatch) != 3 {
 | |
| 						s.EmailService.Warn("Regexp => sourceData not matched")
 | |
| 						continue
 | |
| 					}
 | |
| 
 | |
| 					es.Releaser = sourceDataMatch[1]
 | |
| 					es.Created = carbon.Parse(msg.Envelope.Date.String(), "Europe/Kyiv")
 | |
| 
 | |
| 					es.Type = dbType
 | |
| 
 | |
| 					genre := strings.ToLower(sourceDataMatch[2])
 | |
| 					es.TypeSubsectionId = topics[genre].(int)
 | |
| 
 | |
| 					entries = append(entries, es)
 | |
| 				}
 | |
| 
 | |
| 				//fmt.Println(entries)
 | |
| 				//os.Exit(0)
 | |
| 
 | |
| 				result, status := _table.BatchInsertOnDuplicate(entries, s.db, columns)
 | |
| 				if status != constant.StatusFailed {
 | |
| 					entriesBatched = append(entriesBatched, result...)
 | |
| 				}
 | |
| 
 | |
| 				s.EmailService.MoveMessageToMailbox(msg, status)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return entriesBatched
 | |
| }
 |