1. 首页
  2. > 商标注册 >

上交所财务报表怎样下载(下载财务报表的网站)

# import requests import re import os import urllib.request import random class SseCrawl(): def __init__(self): self.url = "http://www.sse.com.cn/disclosure/listedinfo/announcement/json/stock_bulletin_publish_order.json?v=0.46853839377888784" self.headers =[{User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.116 Safari/537.36},] self.server = "http://www.sse.com.cn/" self.root_pattern = re.compile(r{"([sS]*?)},) self.pdf_pattern = re.compile(r"bulletinUrl":"([sS]*?)","securityCode") self.name_pattern = re.compile(r"bulletinTitle":"([sS]*?)","bulletinClassic") def get_html(self): r = requests.get(self.url) r.encoding = utf-8 htmls = r.text return htmls def analysis(self,htmls): root_htmls = re.findall(self.root_pattern, htmls) anchors = [] for html in root_htmls: root_pdf = re.findall(self.pdf_pattern, html) url = self.server "".join(root_pdf) root_name = re.findall(self.name_pattern, html) anchor = {name:root_name, address : url} anchors.append(anchor) return anchors def download(self,anchors): os.mkdir(Pdf_Download) os.chdir(os.path.join(os.getcwd(), Pdf_Download)) for anchor in anchors: file_url = "".join(anchor[address]) req_data = urllib.request.Request(file_url, headers=self.headers) u = urllib.request.urlopen(req_data) req_data = request.Request(anchor[address], headers=headers[random.randint(0, 9)]) u = request.urlopen(req_data)""" f = open("".join(anchor[name]), wb) block_sz = 8192 # 因为UFS默认大小是8192字节(8KB) while True: buffer = u.read(block_sz) if not buffer: break f.write(buffer) f.close() def main(self): htmls = self.get_html() anchors = self.analysis(htmls) self.download(anchors) spider = SseCrawl() spider.main()

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至123456@qq.com 举报,一经查实,本站将立刻删除。

联系我们

工作日:9:30-18:30,节假日休息