#!/usr/bin/env python # coding:utf-8 import requests,json from setting import proxies from urllib import parse from lxml import etree from mongo_cho import myco12,myco12_b,r_myco15 from rety import retry r = requests.session() r.keep_alive = False #http://wzyy.hebei.chinatax.gov.cn/LawPublicity/law/adPenalty/taxDishonestyCases @retry(3) def r1(ny1,ny2,pg): url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findTaxDishonestyCasesList?cid=27&uid=' headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36", } data = { "column":"adPenalty/taxDishonestyCases", "queryCity": "", "queryType": "", "queryDate": str(ny1) + '.' + str(ny2), "NSRMC": "", "NSRSBH": "", "ZCDZ": "", "ZZJGDM": "", "FDDBR": "", "FDDBZJH": "", "CWFZR": "", "CWFZRZJH": "", "pageSize": "10", "pageNum": pg, "orderByColumn": "", "isAsc": "asc", } response = r.post(url=url,headers=headers,data=data,proxies=proxies) # print(response.text) rsd = response.json() # print(rsd) rsl = rsd['rows'] # print(rsl) list1 = [] list2 = [] if rsl: for i in rsl: uid = i['uid'] utf = r_myco15.sismember('n12', uid) ##更改 if not utf: list2.append(uid) list1.append(i) else: print('已存在,>>>n12') if list1: myco12.insert_many(list1) print('已存入原始库') if list2: myco12_b.insert_many(list1) print('已存入备份原始库') for mis in list2: r_myco15.sadd('n12', mis) ##更改 # myco12.insert_many(rsl) # for i in rsl: # print(i) return rsd['total'] # r1() def runs(ny1,ny2): tpg = r1(ny1,ny2,pg=1) for pg in range(2,tpg): print(pg,'===') r1(ny1,ny2,pg) runs('2023','8')