股票数据定向爬虫.py（亲测有效）

import requests

from bs4 import BeautifulSoup

import traceback

import re

def getHTMLText(url,code='utf-8'):

    try:

        r = requests.get(url,timeout=30)

        r.encoding = code

        return r.text

    except:

        return ""

def getStockList(lst,stockURL):

    html = getHTMLText(stockURL)

    soup = BeautifulSoup(html,'html.parser')

    a = soup.find_all('tr')

    for i in a:

        try:

            href = i.attrs['id']

            lst.append(re.findall(r'[tr]\d{6}',href)[0])

        except:

            continue

def getStockInfo(lst,stockURL,fpath):

    count = 0

    for stock in lst:

        url = stockURL + stock[1:] +".html"

        html = getHTMLText(url)

        try:

            if html == "":

                continue

            infoDict = {}

            soup = BeautifulSoup(html,'html.parser')

            stockInfo = soup.find('div',attrs={'class':'merchandiseDetail'})

            name = stockInfo.find_all(attrs={'class':'fundDetail-tit'})[0]

            infoDict.update({'股票名称':name.text.split()[0]})

            keylist = stockInfo.find_all('dt')

            valuelist = stockInfo.find_all('dd')

            for i in range(len(keylist)):

                key = keylist[i].text

                print(key)

                val = valuelist[i].text

                infoDict[key] = val

            with open(fpath,'a',encoding='utf-8')as f:

                f.write(str(infoDict)+'\n')

                count = count+1

                print('\r当前速度：{:.2f}%'.format(count*100/len(lst)),end='')

        except:

            count = count + 1

            print('\r当前速度：{:.2f}%'.format(count * 100 / len(lst)), end='')

            traceback.print_exc()

            continue

def main():

    stock_list_url = 'https://fund.eastmoney.com/fund.html#os_0;isall_0;ft_;pt_1'

    stock_info_url = 'https://fund.eastmoney.com/'

    output_file = 'D://桌面//BaiduStockInfo.txt'

    slist = []

    getStockList(slist,stock_list_url)

    getStockInfo(slist,stock_info_url,output_file)

main()

股票数据定向 爬虫.py（亲测有效）的相关教程结束。

《股票数据定向爬虫.py（亲测有效）.doc》

下载本文的Word格式文档，以方便收藏与打印。

股票数据定向爬虫.py（亲测有效）

股票数据定向 爬虫.py（亲测有效）的相关教程结束。

相关推荐

Python网络爬虫实战案例之：7000本电子书下载（2）

python爬虫爬取笔趣网小说网站过程图解

scrapy爬虫如何爬取javascript内容

python使用selenium实现爬虫知乎

爬虫之header

【爬虫+数据清洗+可视化】用Python分析“淄博烧烤“的评论数据

python爬虫防止IP被封的一些措施(转)

python利用urllib实现的爬取京东网站商品图片的爬虫