n12_hebei.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. import requests,json
  4. from setting import proxies
  5. from urllib import parse
  6. from lxml import etree
  7. from mongo_cho import myco12,myco12_b,r_myco15
  8. from rety import retry
  9. r = requests.session()
  10. r.keep_alive = False
  11. #http://wzyy.hebei.chinatax.gov.cn/LawPublicity/law/adPenalty/taxDishonestyCases
  12. @retry(3)
  13. def r1(ny1,ny2,pg):
  14. url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findTaxDishonestyCasesList?cid=27&uid='
  15. headers = {
  16. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
  17. }
  18. data = {
  19. "column":"adPenalty/taxDishonestyCases",
  20. "queryCity": "",
  21. "queryType": "",
  22. "queryDate": str(ny1) + '.' + str(ny2),
  23. "NSRMC": "",
  24. "NSRSBH": "",
  25. "ZCDZ": "",
  26. "ZZJGDM": "",
  27. "FDDBR": "",
  28. "FDDBZJH": "",
  29. "CWFZR": "",
  30. "CWFZRZJH": "",
  31. "pageSize": "10",
  32. "pageNum": pg,
  33. "orderByColumn": "",
  34. "isAsc": "asc",
  35. }
  36. response = r.post(url=url,headers=headers,data=data,proxies=proxies)
  37. # print(response.text)
  38. rsd = response.json()
  39. # print(rsd)
  40. rsl = rsd['rows']
  41. # print(rsl)
  42. list1 = []
  43. list2 = []
  44. if rsl:
  45. for i in rsl:
  46. uid = i['uid']
  47. utf = r_myco15.sismember('n12', uid) ##更改
  48. if not utf:
  49. list2.append(uid)
  50. list1.append(i)
  51. else:
  52. print('已存在,>>>n12')
  53. if list1:
  54. myco12.insert_many(list1)
  55. print('已存入原始库')
  56. if list2:
  57. myco12_b.insert_many(list1)
  58. print('已存入备份原始库')
  59. for mis in list2:
  60. r_myco15.sadd('n12', mis) ##更改
  61. # myco12.insert_many(rsl)
  62. # for i in rsl:
  63. # print(i)
  64. return rsd['total']
  65. # r1()
  66. def runs(ny1,ny2):
  67. tpg = r1(ny1,ny2,pg=1)
  68. for pg in range(2,tpg):
  69. print(pg,'===')
  70. r1(ny1,ny2,pg)
  71. runs('2023','8')