正则爬取'豆瓣之乘风破浪的姐姐'的并存入excel文档

2023-03-08,,

import requests
import re
import pandas as pd def parse_page(url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299'
}
response = requests.get(url,headers=headers)
text = response.text authors = re.findall(r'<div class="bg-img-green">.*?<h4>.*?<a.*?>(.*?)</a>',text,re.DOTALL)
contents = re.findall(r'<p class=" reply-content">(.*?)</p>',text,re.DOTALL) commands = []
for command in contents:
x = re.sub(r'<.*?>',"",command)
commands.append(x.strip())
data={
'作者':authors,
'评论':commands
} #print(contents)
df=pd.DataFrame(data)
df.to_excel('23333.xlsx',encoding='utf-8') def main():
url = 'https://www.douban.com/group/topic/184693273/'
parse_page(url) if __name__ == '__main__':
main()

正则爬取'豆瓣之乘风破浪的姐姐'的并存入excel文档的相关教程结束。

《正则爬取'豆瓣之乘风破浪的姐姐'的并存入excel文档.doc》

下载本文的Word格式文档,以方便收藏与打印。