12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- #!/usr/bin/env python
- # coding:utf-8
- import requests,json
- from setting import proxies
- from urllib import parse
- from lxml import etree
- from mongo_cho import myco26,r_myco15,myco26_b
- from rety import retry
- r = requests.session()
- r.keep_alive = False
- @retry(3)
- def r1(ny1,ny2,pg):
- url = 'http://guizhou.chinatax.gov.cn/import/taxApi'
- headers = {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
- }
- data = {
- "pageNum": pg,
- "pageSize": 10,
- "siteId": 502424,
- "months": "{}月".format(ny2),
- "years": "{}年".format(ny1),
- "isPage": True
- }
- response = r.post(url=url,json=data,headers=headers,proxies=proxies)
- # print(response.text)
- rsd = response.json()
- rsl = rsd['data']['list']
- tpg = rsd['data']['total']
- rpg = tpg//10
- # print(rpg)
- # if rsl:
- # myco26.insert_many(rsl)
- list1 = []
- list2 = []
- for i in rsl:
- # print(i)
- url1 = i['docpuburl']
- print(url1)
- utf = r_myco15.sismember('n26', url1) ##更改
- if not utf:
- list1.append(i)
- list2.append(url1)
- else:
- print('已存在,>>>n26')
- if list1:
- myco26.insert_many(list1)
- print('已存入原始库')
- if list2:
- myco26_b.insert_many(list1)
- print('已存入备份原始库')
- for mis in list2:
- r_myco15.sadd('n26', mis) ##更改
- return rpg
- def runs(ny1,ny2):
- # for ny1 in range(2021,2022):
- # for ny2 in range(1,4):
- print(ny1,ny2,'=========')
- rpg = r1(ny1,ny2,pg=1)
- if rpg > 0:
- for pg in range(2,rpg+1):
- r1(ny1, ny2,pg)
- # pass
- runs(2023,4)
|