1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- #!/usr/bin/env python
- # coding:utf-8
- import requests,json
- from setting import proxies
- from urllib import parse
- from lxml import etree
- from mongo_cho import myco12,myco12_b,r_myco15
- from rety import retry
- r = requests.session()
- r.keep_alive = False
- #http://wzyy.hebei.chinatax.gov.cn/LawPublicity/law/adPenalty/taxDishonestyCases
- @retry(3)
- def r1(ny1,ny2,pg):
- url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findTaxDishonestyCasesList?cid=27&uid='
- headers = {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
- }
- data = {
- "column":"adPenalty/taxDishonestyCases",
- "queryCity": "",
- "queryType": "",
- "queryDate": str(ny1) + '.' + str(ny2),
- "NSRMC": "",
- "NSRSBH": "",
- "ZCDZ": "",
- "ZZJGDM": "",
- "FDDBR": "",
- "FDDBZJH": "",
- "CWFZR": "",
- "CWFZRZJH": "",
- "pageSize": "10",
- "pageNum": pg,
- "orderByColumn": "",
- "isAsc": "asc",
- }
- response = r.post(url=url,headers=headers,data=data,proxies=proxies)
- # print(response.text)
- rsd = response.json()
- # print(rsd)
- rsl = rsd['rows']
- # print(rsl)
- list1 = []
- list2 = []
- if rsl:
- for i in rsl:
- uid = i['uid']
- utf = r_myco15.sismember('n12', uid) ##更改
- if not utf:
- list2.append(uid)
- list1.append(i)
- else:
- print('已存在,>>>n12')
- if list1:
- myco12.insert_many(list1)
- print('已存入原始库')
- if list2:
- myco12_b.insert_many(list1)
- print('已存入备份原始库')
- for mis in list2:
- r_myco15.sadd('n12', mis) ##更改
- # myco12.insert_many(rsl)
- # for i in rsl:
- # print(i)
- return rsd['total']
- # r1()
- def runs(ny1,ny2):
- tpg = r1(ny1,ny2,pg=1)
- for pg in range(2,tpg):
- print(pg,'===')
- r1(ny1,ny2,pg)
- runs('2023','8')
|