# 导入所需模块 import requests from bs4 import BeautifulSoup
cookie = 'this is cookies' cookies = {i.split("=")[0]:i.split("=")[-1] for i in cookie.split("; ")} headers = { 'User-Agent': 'this is user-agent', } for page inrange(10): url = {"ng2":'https://bbs.nga.cn/thread.php?fid=-447601&page=%s' %page, "ark":'https://bbs.nga.cn/thread.php?fid=-34587507&page=%s' %page} response = requests.get(url['ark'], headers=headers,cookies=cookies) soup = BeautifulSoup(response.text, 'lxml') for each in soup.find_all('tbody'): title = each.find('a',class_ = 'topic').get_text(strip=True) f=open('data'+'_'+'ark'+'.txt', 'a', encoding='utf-8') f.write('\n'.join([title])) f.write('\n') f.close() print('response.status_code ==',response.status_code)
cookie = 'this is cookies' cookies = {i.split("=")[0]:i.split("=")[-1] for i in cookie.split("; ")} headers = { 'User-Agent': 'this is user-agent', } for page inrange(10): url = {"ng2":'https://bbs.nga.cn/thread.php?fid=-447601&page=%s' %page, "ark":'https://bbs.nga.cn/thread.php?fid=-34587507&page=%s' %page} response = requests.get(url['ark'], headers=headers,cookies=cookies) soup = BeautifulSoup(response.text, 'lxml')
然后是写入文件,这部分折磨了我好久。我一开始的版本是:
3
1 2 3 4 5 6
for each in soup.find_all('tbody'): title = each.find('a',class_ = 'topic').get_text(strip=True) withopen('data'+'_'+'ark'+'.txt', 'w', encoding='utf-8') as f: f.write('\n'.join([title])) f.write('\n') f.close()
但这样写会让文件的写入覆盖前一个,也就是说,无数轮循环结束,文件里只能保留最后一个标题。
但我不知道为什么,怀疑是with open('data'+'_'+'ark'+'.txt', 'w', encoding='utf-8') as f:的问题。我将其改成了f = open('data'+'_'+'ark'+'.txt', 'w', encoding='utf-8'),但还是同样的问题。
其实我的怀疑是对的,但没怀疑到点子上。
事实上是这个:
3
1
withopen('data'+'_'+'ark'+'.txt', 'a', encoding='utf-8') as f: