背景
关于获取Hostloc的每日热帖推送的话题,本站已经写了两篇博客阐述了,分别是你想收到hostloc每日热帖的邮件么?以及如何越过Hostloc的防CC攻击机制获取其每日热帖,虽然这两个都是Python脚本,但是前者仅仅是一个简单的Python脚本,无法应对后来新增的防CC机制,而后者中的脚本很好的解决了这个问题,但是本篇博客用一种全新的思路来解决这个问题,具体就是用了一个把chrome的cdp协议封装成golang对象的库chromedp来调用无头的chrome浏览器来访问loc站点。
依赖
下面的脚本依赖了以下第三方github库,编译前自己安装一下。
"github.com/anaskhan96/soup"
"github.com/chromedp/chromedp"
"github.com/emersion/go-sasl"
"github.com/emersion/go-smtp"
代码
package main
import (
"context"
"encoding/base64"
"fmt"
"os"
"runtime/debug"
"strconv"
"strings"
"time"
"github.com/anaskhan96/soup"
"github.com/chromedp/chromedp"
"github.com/emersion/go-sasl"
"github.com/emersion/go-smtp"
)
func mailSend(mailBodyText string) {
currentDate := time.Now().Format("2006-01-02")
subject := fmt.Sprintf("%s Hostloc今日热帖", currentDate)
subjectBase := base64.StdEncoding.EncodeToString([]byte(subject))
from := ""
to := ""
password := ""
smtpServer := "smtp.qq.com:587"
auth := sasl.NewPlainClient("", from, password)
msg := strings.NewReader(
"From: " + from + "\r\n" +
"To: " + to + "\r\n" +
"Subject: =?UTF-8?B?" + subjectBase + "?=\r\n" +
"Content-Type: text/html; charset=UTF-8" +
"\r\n\r\n" +
mailBodyText + "\r\n")
if err := smtp.SendMail(smtpServer, auth, from, []string{to}, msg); err != nil {
fmt.Println(err.Error() + string(debug.Stack()))
os.Exit(1)
} else {
fmt.Println("成功发送了一封邮件!")
}
}
func getMailBody() string {
urlPrefix := "https://www.hostloc.com/"
fullUrl := urlPrefix + "forum.php?mod=forumdisplay&fid=45&filter=hot"
// 参数设置
options := []chromedp.ExecAllocatorOption{
chromedp.Flag("headless", true),
chromedp.Flag("disable-gpu", true),
chromedp.UserAgent(`Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36`),
}
options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
defer cancel()
// 创建chrome示例
ctx, cancel := chromedp.NewContext(allocCtx)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, 15*time.Second)
defer cancel()
var hotListHtml string
if err := chromedp.Run(ctx,
chromedp.Tasks{
// 打开导航
chromedp.Navigate(fullUrl),
// 等待元素加载完成
chromedp.WaitVisible("body", chromedp.ByQuery),
chromedp.OuterHTML("#threadlist", &hotListHtml, chromedp.ByID),
}); err != nil {
fmt.Println(err.Error() + string(debug.Stack()))
os.Exit(1)
}
doc := soup.HTMLParse(hotListHtml)
itemList := doc.Find("div", "class", "bm_c").FindAll("tr")[2:]
var mailBodyList []string
for _, item := range itemList {
replyNumStr := item.Find("td", "class", "num").FindStrict("a", "class", "xi2").Text()
titleAndHref := item.Find("th", "class", "new").FindStrict("a", "class", "s xst")
title := titleAndHref.Text()
link := titleAndHref.Attrs()["href"]
replyNumInt, err := strconv.Atoi(replyNumStr)
if err != nil {
fmt.Println(err.Error() + string(debug.Stack()))
os.Exit(1)
}
if replyNumInt >= 35 {
mailBodyList = append(mailBodyList, title+"<br/>"+urlPrefix+link)
}
}
mailBodyText := strings.Join(mailBodyList[:], "</br>")
return mailBodyText
}
func main() {
mailSend(getMailBody())
}