基于Golang的chromedp库实现获取Hostloc的每日热帖

基于Golang的chromedp库实现获取Hostloc的每日热帖

背景

关于获取Hostloc的每日热帖推送的话题,本站已经写了两篇博客阐述了,分别是你想收到hostloc每日热帖的邮件么?以及如何越过Hostloc的防CC攻击机制获取其每日热帖,虽然这两个都是Python脚本,但是前者仅仅是一个简单的Python脚本,无法应对后来新增的防CC机制,而后者中的脚本很好的解决了这个问题,但是本篇博客用一种全新的思路来解决这个问题,具体就是用了一个把chrome的cdp协议封装成golang对象的库chromedp来调用无头的chrome浏览器来访问loc站点。

依赖

下面的脚本依赖了以下第三方github库,编译前自己安装一下。

"github.com/anaskhan96/soup"
"github.com/chromedp/chromedp"
"github.com/emersion/go-sasl"
"github.com/emersion/go-smtp"

代码

package main

import (
	"context"
	"encoding/base64"
	"fmt"
	"os"
	"runtime/debug"
	"strconv"
	"strings"
	"time"

	"github.com/anaskhan96/soup"
	"github.com/chromedp/chromedp"
	"github.com/emersion/go-sasl"
	"github.com/emersion/go-smtp"
)

func mailSend(mailBodyText string) {
	currentDate := time.Now().Format("2006-01-02")
	subject := fmt.Sprintf("%s Hostloc今日热帖", currentDate)
	subjectBase := base64.StdEncoding.EncodeToString([]byte(subject))
	from := ""
	to := ""
	password := ""
	smtpServer := "smtp.qq.com:587"
	auth := sasl.NewPlainClient("", from, password)
	msg := strings.NewReader(
		"From: " + from + "\r\n" +
			"To: " + to + "\r\n" +
			"Subject: =?UTF-8?B?" + subjectBase + "?=\r\n" +
			"Content-Type: text/html; charset=UTF-8" +
			"\r\n\r\n" +
			mailBodyText + "\r\n")
	if err := smtp.SendMail(smtpServer, auth, from, []string{to}, msg); err != nil {
		fmt.Println(err.Error() + string(debug.Stack()))
		os.Exit(1)
	} else {
		fmt.Println("成功发送了一封邮件!")
	}
}

func getMailBody() string {
	urlPrefix := "https://www.hostloc.com/"
	fullUrl := urlPrefix + "forum.php?mod=forumdisplay&fid=45&filter=hot"
	// 参数设置
	options := []chromedp.ExecAllocatorOption{
		chromedp.Flag("headless", true),
		chromedp.Flag("disable-gpu", true),
		chromedp.UserAgent(`Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36`),
	}
	options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
	allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
	defer cancel()

	// 创建chrome示例
	ctx, cancel := chromedp.NewContext(allocCtx)
	defer cancel()
	ctx, cancel = context.WithTimeout(ctx, 15*time.Second)
	defer cancel()

	var hotListHtml string
	if err := chromedp.Run(ctx,
		chromedp.Tasks{
			// 打开导航
			chromedp.Navigate(fullUrl),
			// 等待元素加载完成
			chromedp.WaitVisible("body", chromedp.ByQuery),
			chromedp.OuterHTML("#threadlist", &hotListHtml, chromedp.ByID),
		}); err != nil {
		fmt.Println(err.Error() + string(debug.Stack()))
		os.Exit(1)
	}
	doc := soup.HTMLParse(hotListHtml)
	itemList := doc.Find("div", "class", "bm_c").FindAll("tr")[2:]
	var mailBodyList []string
	for _, item := range itemList {
		replyNumStr := item.Find("td", "class", "num").FindStrict("a", "class", "xi2").Text()
		titleAndHref := item.Find("th", "class", "new").FindStrict("a", "class", "s xst")
		title := titleAndHref.Text()
		link := titleAndHref.Attrs()["href"]
		replyNumInt, err := strconv.Atoi(replyNumStr)
		if err != nil {
			fmt.Println(err.Error() + string(debug.Stack()))
			os.Exit(1)
		}
		if replyNumInt >= 35 {
			mailBodyList = append(mailBodyList, title+"<br/>"+urlPrefix+link)
		}
	}
	mailBodyText := strings.Join(mailBodyList[:], "</br>")
	return mailBodyText

}

func main() {
	mailSend(getMailBody())
}

 

0 评论 在 “基于Golang的chromedp库实现获取Hostloc的每日热帖

发表评论

电子邮件地址不会被公开。 必填项已用*标注

captcha