n20_hunnan.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. import requests,json
  4. from setting import proxies
  5. from urllib import parse
  6. from lxml import etree
  7. from mongo_cho import myco20,r_myco15,myco20_b
  8. from rety import retry
  9. r = requests.session()
  10. r.keep_alive = False
  11. @retry(3)
  12. def r1(ny1,ny2,pg):
  13. url = 'http://hunan.chinatax.gov.cn/hardcasegetdatanew'
  14. headers = {
  15. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
  16. }
  17. type_value = str(ny1) + '0' + str(ny2)
  18. dt = str(ny1) + '/0' + str(ny2)
  19. data = {
  20. "type":"3",
  21. "type_value": type_value,
  22. "case_type": "1",
  23. "page": pg,
  24. "limit": "10",
  25. "is_search": "0",
  26. "taxpayerName": "",
  27. "taxpayerNumber": "",
  28. "organizationalCode": "",
  29. "place": "",
  30. "legalName": "",
  31. "legalIdCard": "",
  32. "financeName": "",
  33. "financeIdCard": "",
  34. "personName": "",
  35. "personIdCard": "",
  36. "_csrf": "fe7aeeb7-63a9-4770-9f35-84869a82d042",
  37. }
  38. response = r.post(url=url,headers=headers,data=data,proxies=proxies)
  39. # print(response.text)
  40. rsd = response.json()
  41. rsl = rsd['data']
  42. rpg = rsd['hardCasePage']['totalPages']
  43. # print(rpg)
  44. list1 = []
  45. list2 = []
  46. if rsl:
  47. for i in rsl:
  48. i['date'] = dt
  49. print(i)
  50. url1 = i['id']
  51. utf = r_myco15.sismember('n20', url1) ##更改
  52. if not utf:
  53. list1.append(i)
  54. list2.append(url1)
  55. else:
  56. print('已存在,>>>n20')
  57. if list1:
  58. myco20.insert_many(list1)
  59. print('已存入原始库')
  60. if list2:
  61. myco20_b.insert_many(list1)
  62. print('已存入备份原始库')
  63. for mis in list2:
  64. r_myco15.sadd('n20', mis) ##更改
  65. # if list1:
  66. # myco20.insert_many(list1)
  67. return int(rpg)
  68. def runs(ny1,ny2):
  69. rpg = r1(ny1,ny2,pg=1)
  70. if rpg>1:
  71. for pg in range(2,rpg+1):
  72. print(pg,'====')
  73. r1(ny1,ny2,pg)
  74. runs('2022','11')