preset scraper added
This commit is contained in:
63
helper/parser/html.go
Normal file
63
helper/parser/html.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
func setClient(url string) (*http.Response, error) {
|
||||
client := &http.Client{}
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
|
||||
|
||||
return client.Do(req)
|
||||
}
|
||||
|
||||
func HTMLSourceFromURL(url string) (*html.Node, error) {
|
||||
resp, err := setClient(url)
|
||||
defer func(Body io.ReadCloser) {
|
||||
fmt.Printf("%v\n", Body == nil)
|
||||
|
||||
err = Body.Close()
|
||||
if err != nil {
|
||||
slog.Error("closing response body", "err", err)
|
||||
}
|
||||
}(resp.Body)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return nil, errors.New("http status code: " + strconv.Itoa(resp.StatusCode))
|
||||
}
|
||||
|
||||
doc, err := html.Parse(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
func HTMLSource(htmlSource string) (*html.Node, error) {
|
||||
doc, err := html.Parse(strings.NewReader(htmlSource))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
//https://ahmadrosid.com/blog/how-to-query-html-dom-in-golang
|
||||
Reference in New Issue
Block a user