#!/usr/bin/env python # coding:utf-8 import requests,json from setting import proxies from urllib import parse from lxml import etree from mongo_cho import myco20,r_myco15,myco20_b from rety import retry r = requests.session() r.keep_alive = False @retry(3) def r1(ny1,ny2,pg): url = 'http://hunan.chinatax.gov.cn/hardcasegetdatanew' headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36", } type_value = str(ny1) + '0' + str(ny2) dt = str(ny1) + '/0' + str(ny2) data = { "type":"3", "type_value": type_value, "case_type": "1", "page": pg, "limit": "10", "is_search": "0", "taxpayerName": "", "taxpayerNumber": "", "organizationalCode": "", "place": "", "legalName": "", "legalIdCard": "", "financeName": "", "financeIdCard": "", "personName": "", "personIdCard": "", "_csrf": "fe7aeeb7-63a9-4770-9f35-84869a82d042", } response = r.post(url=url,headers=headers,data=data,proxies=proxies) # print(response.text) rsd = response.json() rsl = rsd['data'] rpg = rsd['hardCasePage']['totalPages'] # print(rpg) list1 = [] list2 = [] if rsl: for i in rsl: i['date'] = dt print(i) url1 = i['id'] utf = r_myco15.sismember('n20', url1) ##更改 if not utf: list1.append(i) list2.append(url1) else: print('已存在,>>>n20') if list1: myco20.insert_many(list1) print('已存入原始库') if list2: myco20_b.insert_many(list1) print('已存入备份原始库') for mis in list2: r_myco15.sadd('n20', mis) ##更改 # if list1: # myco20.insert_many(list1) return int(rpg) def runs(ny1,ny2): rpg = r1(ny1,ny2,pg=1) if rpg>1: for pg in range(2,rpg+1): print(pg,'====') r1(ny1,ny2,pg) runs('2022','11')