背景
很早之前写过一篇叫 你想收到hostloc每日热帖的邮件么?的博客,但是没过多久这个就失效了,因为hostloc站点加了防CC攻击的机制,导致无法直接获取其网页源码了,下面分享一个可以越过防CC攻击的脚本,注意本脚本仅用来自用推送loc的热帖给自己,请不要用于其它用途。
依赖
pyaes==1.6.1
beautifulsoup4==4.10.0
html5lib==1.1
代码
import re
import textwrap
import requests
import time
import smtplib
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
from pyaes import AESModeOfOperationCBC
from requests import Session as req_Session
HOST = 'smtp.qq.com'
PORT = 587
SENDER = ''
RECEIVER = ''
PWD = ''
# 使用Python实现防CC验证页面中JS写的的toNumbers函数
def toNumbers(secret: str) -> list:
text = []
for value in textwrap.wrap(secret, 2):
text.append(int(value, 16))
return text
def multiple_replace(s):
replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"),
("扶墙","***")]
for r in replace_mapping:
s = s.replace(*r)
return s
# 不带Cookies访问论坛首页,检查是否开启了防CC机制,将开启状态、AES计算所需的参数全部放在一个字典中返回
def check_anti_cc() -> dict:
result_dict = {}
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
home_page = "https://hostloc.com/forum.php"
res = requests.get(home_page, headers=headers)
aes_keys = re.findall('toNumbers\("(.*?)"\)', res.text)
cookie_name = re.findall('cookie="(.*?)="', res.text)
if len(aes_keys) != 0: # 开启了防CC机制
print("检测到防 CC 机制开启!")
if len(aes_keys) != 3 or len(cookie_name) != 1: # 正则表达式匹配到了参数,但是参数个数不对(不正常的情况)
result_dict["ok"] = 0
else: # 匹配正常时将参数存到result_dict中
result_dict["ok"] = 1
result_dict["cookie_name"] = cookie_name[0]
result_dict["a"] = aes_keys[0]
result_dict["b"] = aes_keys[1]
result_dict["c"] = aes_keys[2]
else:
pass
return result_dict
# 在开启了防CC机制时使用获取到的数据进行AES解密计算生成一条Cookie(未开启防CC机制时返回空Cookies)
def gen_anti_cc_cookies() -> dict:
cookies = {}
anti_cc_status = check_anti_cc()
if anti_cc_status: # 不为空,代表开启了防CC机制
if anti_cc_status["ok"] == 0:
print("防 CC 验证过程所需参数不符合要求,页面可能存在错误!")
else: # 使用获取到的三个值进行AES Cipher-Block Chaining解密计算以生成特定的Cookie值用于通过防CC验证
print("自动模拟计算尝试通过防 CC 验证")
a = bytes(toNumbers(anti_cc_status["a"]))
b = bytes(toNumbers(anti_cc_status["b"]))
c = bytes(toNumbers(anti_cc_status["c"]))
cbc_mode = AESModeOfOperationCBC(a, b)
result = cbc_mode.decrypt(c)
name = anti_cc_status["cookie_name"]
cookies[name] = result.hex()
else:
pass
return cookies
def get_source() -> req_Session:
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"origin": "https://hostloc.com",
"referer": "https://hostloc.com/forum.php",
}
hot_url = "https://hostloc.com/forum.php?mod=forumdisplay&fid=45&filter=hot"
s = req_Session()
s.headers.update(headers)
s.cookies.update(gen_anti_cc_cookies())
res = s.get(url=hot_url)
res.raise_for_status()
return res.content.decode('utf-8')
def mail_send(subject, mail_body):
try:
msg = MIMEText(mail_body, 'plain', 'utf-8')
msg['Subject'] = subject
msg['From'] = SENDER
msg['To'] = RECEIVER
s = smtplib.SMTP(HOST, PORT)
s.debuglevel = 0
s.login(SENDER, PWD)
s.sendmail(SENDER, RECEIVER, msg.as_string())
s.quit()
except smtplib.SMTPException as e:
print(str(e))
def main():
prefix = 'https://www.hostloc.com/'
current_date = time.strftime("%Y-%m-%d", time.localtime())
content = get_source()
soup = BeautifulSoup(content, 'html5lib')
item_all_list = soup.select('div.bm_c tr')[2:]
items_title_list = []
for item in item_all_list:
# print(item)
reply_num = int(item.select_one('td.num a.xi2').get_text())
title = item.select_one('th.new a.s.xst').get_text()
href = item.select_one('th.new a.s.xst').get('href')
item_url = prefix + href
if reply_num >= 35:
items_title_list.append(title + ' ' + item_url)
mail_send(subject=current_date + ' ' + 'Hostloc今日热帖',
mail_body=multiple_replace('\n'.join(items_title_list)))
print('成功发送了一封邮件!')
# print(items_title_list)
main()
说明
如果需要发邮件给自己需要自行替换脚本中的如下变量:
HOST = 'smtp.qq.com'
PORT = 587
SENDER = ''
RECEIVER = ''
PWD = ''
敏感词过滤请自行替换以下函数的replace_mapping列表值:
def multiple_replace(s):
replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"),
("扶墙","***")]
for r in replace_mapping:
s = s.replace(*r)
return s