n26_guizhou.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. import requests,json
  4. from setting import proxies
  5. from urllib import parse
  6. from lxml import etree
  7. from mongo_cho import myco26,r_myco15,myco26_b
  8. from rety import retry
  9. r = requests.session()
  10. r.keep_alive = False
  11. @retry(3)
  12. def r1(ny1,ny2,pg):
  13. url = 'http://guizhou.chinatax.gov.cn/import/taxApi'
  14. headers = {
  15. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
  16. }
  17. data = {
  18. "pageNum": pg,
  19. "pageSize": 10,
  20. "siteId": 502424,
  21. "months": "{}月".format(ny2),
  22. "years": "{}年".format(ny1),
  23. "isPage": True
  24. }
  25. response = r.post(url=url,json=data,headers=headers,proxies=proxies)
  26. # print(response.text)
  27. rsd = response.json()
  28. rsl = rsd['data']['list']
  29. tpg = rsd['data']['total']
  30. rpg = tpg//10
  31. # print(rpg)
  32. # if rsl:
  33. # myco26.insert_many(rsl)
  34. list1 = []
  35. list2 = []
  36. for i in rsl:
  37. # print(i)
  38. url1 = i['docpuburl']
  39. print(url1)
  40. utf = r_myco15.sismember('n26', url1) ##更改
  41. if not utf:
  42. list1.append(i)
  43. list2.append(url1)
  44. else:
  45. print('已存在,>>>n26')
  46. if list1:
  47. myco26.insert_many(list1)
  48. print('已存入原始库')
  49. if list2:
  50. myco26_b.insert_many(list1)
  51. print('已存入备份原始库')
  52. for mis in list2:
  53. r_myco15.sadd('n26', mis) ##更改
  54. return rpg
  55. def runs(ny1,ny2):
  56. # for ny1 in range(2021,2022):
  57. # for ny2 in range(1,4):
  58. print(ny1,ny2,'=========')
  59. rpg = r1(ny1,ny2,pg=1)
  60. if rpg > 0:
  61. for pg in range(2,rpg+1):
  62. r1(ny1, ny2,pg)
  63. # pass
  64. runs(2023,4)