12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- #!/usr/bin/env python
- # coding:utf-8
- import requests,json
- from setting import proxies
- from urllib import parse
- from lxml import etree
- from mongo_cho import myco19,r_myco15,myco19_b
- from rety import retry
- r = requests.session()
- r.keep_alive = False
- @retry(3)
- def r1(ny,pg):
- url = 'https://etax.hubei.chinatax.gov.cn/webroot/gzcxAction.do'
- headers = {
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
- }
- params = {
- "method":"zdsswfajcx",
- "page": pg,
- "limit": "15",
- "nsrsbh": "",
- "zzjgdm": "",
- "nsrmc": "",
- "fddbrmc": "",
- "fddbrzjh": "",
- "cwfzrmc": "",
- "cwfzrzjh": "",
- "nsrlx": "",
- "ds": "",
- "zcdz": "",
- "ajxz": "",
- "ssnd": parse.quote(ny),
- }
- response = r.get(url=url,headers=headers,params=params,proxies=proxies)
- # print(response.text)
- rsd = response.json()
- cot = rsd['count']
- rpg = cot//15 + 2
- rsl = rsd['data']
- # if rsl:
- # myco19.insert_many(rsl)
- list1 = []
- list2 = []
- for i in rsl:
- url1 = i['LSH']
- utf = r_myco15.sismember('n19', url1) ##更改
- if not utf:
- list1.append(i)
- list2.append(url1)
- else:
- print('已存在,>>>n01')
- if list1:
- myco19.insert_many(list1)
- print('已存入原始库')
- if list2:
- myco19_b.insert_many(list1)
- print('已存入备份原始库')
- for mis in list2:
- r_myco15.sadd('n19', mis) ##更改
- # print(rpg)
- return rpg
- def runs(ny1,ny2):
- # for ny1 in range(2020,2021):
- # for ny2 in range(1,13):
- ny = str(ny1)+'年'+str(ny2)+'月'
- rpg = r1(ny,pg=1)
- print(ny,'======')
- if rpg >1:
- for pg in range(2,rpg):
- print(pg,'==')
- rpg = r1(ny, pg)
- runs('2023','11')
|