1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- #!/usr/bin/env python
- # coding:utf-8
- import requests,json
- from setting import proxies
- from urllib import parse
- from lxml import etree
- from mongo_cho import myco24,r_myco15,myco24_b
- from rety import retry
- r = requests.session()
- r.keep_alive = False
- @retry(3)
- def r1(ny1,ny2,pg):
- d2 = str(ny2)
- if len(d2) == 1:
- d3 = '0' + str(d2)
- else:
- d3 = str(ny2)
- url = 'http://shanxi.chinatax.gov.cn/common/extQuery?sqlid=web_zdsswf&limit=10&cx_lx=0&cx_xsrq={ny1}-{ny2}&page={pg}'.format(ny1=ny1,ny2=d3,pg=pg)
- headers = {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
- }
- data = {
- "start":"0",
- }
- response = r.post(url=url,headers=headers,data=data,proxies=proxies)
- # print(response.text)
- rsd = response.json()
- # print(rsd)
- rsl = rsd['message']['list']
- rpg = rsd['message']['totalPage']
- list1 = []
- list2 = []
- for i in rsl:
- url1 = i['ajbh']
- utf = r_myco15.sismember('n24', url1) ##更改
- if not utf:
- list1.append(i)
- list2.append(url1)
- else:
- print('已存在,>>>n24')
- if list1:
- myco24.insert_many(list1)
- print('已存入原始库')
- if list2:
- myco24_b.insert_many(list1)
- print('已存入备份原始库')
- for mis in list2:
- r_myco15.sadd('n24', mis) ##更改
- return int(rpg)
- # if rsl:
- # print('1')
- # myco24.insert_many(rsl)
- # ny1 = 2020
- # ny2 = 12
- def runs(ny1,ny2):
- print(ny1,ny2,'---------')
- rpg = r1(ny1,ny2,pg=1)
- for pg in range(1,rpg+1):
- print(pg,'==================')
- r1(ny1,ny2,pg)
- ny1 = 2023
- ny2 = 12
- runs(ny1,ny2)
|