#!/usr/bin/env python # coding:utf-8 import requests,json from setting import proxies from urllib import parse from lxml import etree from mongo_cho import myco26,r_myco15,myco26_b from rety import retry r = requests.session() r.keep_alive = False @retry(3) def r1(ny1,ny2,pg): url = 'http://guizhou.chinatax.gov.cn/import/taxApi' headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36", } data = { "pageNum": pg, "pageSize": 10, "siteId": 502424, "months": "{}月".format(ny2), "years": "{}年".format(ny1), "isPage": True } response = r.post(url=url,json=data,headers=headers,proxies=proxies) # print(response.text) rsd = response.json() rsl = rsd['data']['list'] tpg = rsd['data']['total'] rpg = tpg//10 # print(rpg) # if rsl: # myco26.insert_many(rsl) list1 = [] list2 = [] for i in rsl: # print(i) url1 = i['docpuburl'] print(url1) utf = r_myco15.sismember('n26', url1) ##更改 if not utf: list1.append(i) list2.append(url1) else: print('已存在,>>>n26') if list1: myco26.insert_many(list1) print('已存入原始库') if list2: myco26_b.insert_many(list1) print('已存入备份原始库') for mis in list2: r_myco15.sadd('n26', mis) ##更改 return rpg def runs(ny1,ny2): # for ny1 in range(2021,2022): # for ny2 in range(1,4): print(ny1,ny2,'=========') rpg = r1(ny1,ny2,pg=1) if rpg > 0: for pg in range(2,rpg+1): r1(ny1, ny2,pg) # pass runs(2023,4)