n30_xj.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. import requests,json,time
  4. import random,string
  5. from setting import proxies
  6. from urllib import parse
  7. from lxml import etree
  8. from pymongo import MongoClient
  9. from a_cfg_dg_zsq import tail_call_optimized
  10. myclients = MongoClient("mongodb://127.0.0.1:27017/")
  11. myco_jb1 = myclients['shuiwu_ml']['xinjiang']
  12. import urllib3
  13. urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
  14. from mongo_cho import myco30
  15. r = requests.session()
  16. r.keep_alive = False
  17. def retry(times, exceptions=None):
  18. exceptions = exceptions if exceptions is not None else Exception
  19. def wrapper(func):
  20. def wrapper(*args, **kwargs):
  21. last_exception = None
  22. for _ in range(times):
  23. try:
  24. return func(*args, **kwargs)
  25. except exceptions as e:
  26. last_exception = e
  27. raise last_exception
  28. return wrapper
  29. return wrapper
  30. @tail_call_optimized
  31. @retry(3)
  32. def r1(name):
  33. etm = int(time.time() * 1000)
  34. url = 'https://etax.xinjiang.chinatax.gov.cn/yhs-web/api/yhsyzm/get?{}'.format(etm)
  35. headers = {
  36. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
  37. }
  38. response = r.get(url=url,headers=headers,proxies=proxies,verify=False)
  39. time.sleep(1)
  40. num = string.ascii_letters + string.digits
  41. yzm = "".join(random.sample(num, 4))
  42. # code_img = response.content
  43. # with open('{}.png'.format('1'), 'wb') as fp:
  44. # fp.write(code_img)
  45. # print(code_img)
  46. # a = input()
  47. # print(a)
  48. data = {
  49. "yzm":yzm,
  50. "nsrmc": name,
  51. "pageSize": 10,
  52. "sswfrlx": "00",
  53. "pageIndex": 1
  54. }
  55. url1 = 'https://etax.xinjiang.chinatax.gov.cn/yhs-web/api/zdwfaj/ajlbcx'
  56. response1 = r.post(url=url1,headers=headers,json=data,proxies=proxies,verify=False)
  57. print(response1.json())
  58. # print(response1.text)
  59. rsd = response1.json()
  60. rst = rsd['value']['result']
  61. list1 = []
  62. for i in rst:
  63. pid = i['id']
  64. etm1 = int(time.time() * 1000)
  65. params = {
  66. "id":pid,
  67. "sswfrlx": "00",
  68. "timestamp": etm1,
  69. }
  70. url2 = 'https://etax.xinjiang.chinatax.gov.cn/yhs-web/api/zdwfaj/ajmxcx'
  71. response2= r.get(url=url2,headers=headers,params=params,proxies=proxies,verify=False)
  72. rsd1 = response2.json()
  73. rsd2 = rsd1['value']
  74. list1.append(rsd2)
  75. # print(response2.text)
  76. time.sleep(1)
  77. print(list1)
  78. if list1:
  79. myco30.insert_many(list1)
  80. # r1(name)
  81. def runs():
  82. a = myco_jb1.find().skip(275171+34665+4542)
  83. num = 0
  84. for i in a:
  85. num +=1
  86. name = i['name']
  87. print(num,name )
  88. r1(name)
  89. runs()