66 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package parser
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"io"
 | |
| 	"log"
 | |
| 	"log/slog"
 | |
| 	"net/http"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 
 | |
| 	"golang.org/x/net/html"
 | |
| )
 | |
| 
 | |
| func setClient(url string) (*http.Response, error) {
 | |
| 	client := &http.Client{}
 | |
| 	req, err := http.NewRequest(http.MethodGet, url, nil)
 | |
| 	if err != nil {
 | |
| 		log.Fatalln(err)
 | |
| 	}
 | |
| 
 | |
| 	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36")
 | |
| 
 | |
| 	return client.Do(req)
 | |
| }
 | |
| 
 | |
| func HTMLSourceFromURL(url string) (*html.Node, error) {
 | |
| 	resp, err := setClient(url)
 | |
| 	if resp == nil {
 | |
| 		slog.Error("client return nil response", "err", err)
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	defer func(Body io.ReadCloser) {
 | |
| 		err = Body.Close()
 | |
| 		if err != nil {
 | |
| 			slog.Error("closing response body", "err", err)
 | |
| 		}
 | |
| 	}(resp.Body)
 | |
| 
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	if resp.StatusCode >= 400 {
 | |
| 		return nil, errors.New("http status code: " + strconv.Itoa(resp.StatusCode))
 | |
| 	}
 | |
| 
 | |
| 	doc, err := html.Parse(resp.Body)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	return doc, nil
 | |
| }
 | |
| 
 | |
| func HTMLSource(htmlSource string) (*html.Node, error) {
 | |
| 	doc, err := html.Parse(strings.NewReader(htmlSource))
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return doc, nil
 | |
| }
 | |
| 
 | |
| //https://ahmadrosid.com/blog/how-to-query-html-dom-in-golang
 |