背景

很早之前写过一篇叫 你想收到hostloc每日热帖的邮件么?的博客,但是没过多久这个就失效了,因为hostloc站点加了防CC攻击的机制,导致无法直接获取其网页源码了,下面分享一个可以越过防CC攻击的脚本,注意本脚本仅用来自用推送loc的热帖给自己,请不要用于其它用途。

依赖

pyaes==1.6.1
beautifulsoup4==4.10.0
html5lib==1.1

代码

import re
import textwrap
import requests
import time
import smtplib
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
from pyaes import AESModeOfOperationCBC
from requests import Session as req_Session

HOST = 'smtp.qq.com'
PORT = 587
SENDER = ''
RECEIVER = ''
PWD = ''

# 使用Python实现防CC验证页面中JS写的的toNumbers函数
def toNumbers(secret: str) -> list:
    text = []
    for value in textwrap.wrap(secret, 2):
        text.append(int(value, 16))
    return text

def multiple_replace(s):
    replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"),
                      ("扶墙","***")]
    for r in replace_mapping:
        s = s.replace(*r)
    return s


# 不带Cookies访问论坛首页,检查是否开启了防CC机制,将开启状态、AES计算所需的参数全部放在一个字典中返回
def check_anti_cc() -> dict:
    result_dict = {}
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
    }
    home_page = "https://hostloc.com/forum.php"
    res = requests.get(home_page, headers=headers)
    aes_keys = re.findall('toNumbers\("(.*?)"\)', res.text)
    cookie_name = re.findall('cookie="(.*?)="', res.text)

    if len(aes_keys) != 0:  # 开启了防CC机制
        print("检测到防 CC 机制开启!")
        if len(aes_keys) != 3 or len(cookie_name) != 1:  # 正则表达式匹配到了参数,但是参数个数不对(不正常的情况)
            result_dict["ok"] = 0
        else:  # 匹配正常时将参数存到result_dict中
            result_dict["ok"] = 1
            result_dict["cookie_name"] = cookie_name[0]
            result_dict["a"] = aes_keys[0]
            result_dict["b"] = aes_keys[1]
            result_dict["c"] = aes_keys[2]
    else:
        pass

    return result_dict


# 在开启了防CC机制时使用获取到的数据进行AES解密计算生成一条Cookie(未开启防CC机制时返回空Cookies)
def gen_anti_cc_cookies() -> dict:
    cookies = {}
    anti_cc_status = check_anti_cc()

    if anti_cc_status:  # 不为空,代表开启了防CC机制
        if anti_cc_status["ok"] == 0:
            print("防 CC 验证过程所需参数不符合要求,页面可能存在错误!")
        else:  # 使用获取到的三个值进行AES Cipher-Block Chaining解密计算以生成特定的Cookie值用于通过防CC验证
            print("自动模拟计算尝试通过防 CC 验证")
            a = bytes(toNumbers(anti_cc_status["a"]))
            b = bytes(toNumbers(anti_cc_status["b"]))
            c = bytes(toNumbers(anti_cc_status["c"]))
            cbc_mode = AESModeOfOperationCBC(a, b)
            result = cbc_mode.decrypt(c)

            name = anti_cc_status["cookie_name"]
            cookies[name] = result.hex()
    else:
        pass

    return cookies


def get_source() -> req_Session:
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
        "origin": "https://hostloc.com",
        "referer": "https://hostloc.com/forum.php",
    }
    hot_url = "https://hostloc.com/forum.php?mod=forumdisplay&fid=45&filter=hot"

    s = req_Session()
    s.headers.update(headers)
    s.cookies.update(gen_anti_cc_cookies())
    res = s.get(url=hot_url)
    res.raise_for_status()
    return res.content.decode('utf-8')


def mail_send(subject, mail_body):
    try:
        msg = MIMEText(mail_body, 'plain', 'utf-8')
        msg['Subject'] = subject
        msg['From'] = SENDER
        msg['To'] = RECEIVER
        s = smtplib.SMTP(HOST, PORT)
        s.debuglevel = 0
        s.login(SENDER, PWD)
        s.sendmail(SENDER, RECEIVER, msg.as_string())
        s.quit()
    except smtplib.SMTPException as e:
        print(str(e))


def main():
    prefix = 'https://www.hostloc.com/'
    current_date = time.strftime("%Y-%m-%d", time.localtime())
    content = get_source()
    soup = BeautifulSoup(content, 'html5lib')
    item_all_list = soup.select('div.bm_c tr')[2:]
    items_title_list = []
    for item in item_all_list:
        # print(item)
        reply_num = int(item.select_one('td.num a.xi2').get_text())
        title = item.select_one('th.new a.s.xst').get_text()
        href = item.select_one('th.new a.s.xst').get('href')
        item_url = prefix + href
        if reply_num >= 35:
            items_title_list.append(title + '  ' + item_url)
    mail_send(subject=current_date + ' ' + 'Hostloc今日热帖',
            mail_body=multiple_replace('\n'.join(items_title_list)))
    print('成功发送了一封邮件!')
    # print(items_title_list)

main()

说明

如果需要发邮件给自己需要自行替换脚本中的如下变量:

HOST = 'smtp.qq.com'
PORT = 587
SENDER = ''
RECEIVER = ''
PWD = ''

敏感词过滤请自行替换以下函数的replace_mapping列表值:

def multiple_replace(s):
    replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"),
                      ("扶墙","***")]
    for r in replace_mapping:
        s = s.replace(*r)
    return s