czyc hace 4 meses
commit
456bf8d4d1
Se han modificado 40 ficheros con 4574 adiciones y 0 borrados
  1. 3 0
      Readme.md
  2. 23 0
      a_cfg_dg_zsq.py
  3. 449 0
      daocsv.py
  4. 705 0
      gx_mong.py
  5. 85 0
      mongo_cho.py
  6. 111 0
      n01_ah.py
  7. 49 0
      n02_nb.py
  8. 154 0
      n03_bj.py
  9. 101 0
      n04_sz.py
  10. 89 0
      n05_sc.py
  11. 84 0
      n06_js.py
  12. 117 0
      n07_zj.py
  13. 78 0
      n08_fj.py
  14. 174 0
      n09_sd.py
  15. 125 0
      n10_gd.py
  16. 90 0
      n11_gx.py
  17. 77 0
      n12_hb.py
  18. 128 0
      n13_ln.py
  19. 100 0
      n14_hlj.py
  20. 126 0
      n15_jl.py
  21. 115 0
      n16_gs.py
  22. 110 0
      n17_qh.py
  23. 112 0
      n18_hn.py
  24. 76 0
      n19_hb.py
  25. 79 0
      n20_hn.py
  26. 95 0
      n21_jx.py
  27. 90 0
      n22_yn.py
  28. 116 0
      n23_hn.py
  29. 68 0
      n24_sx.py
  30. 125 0
      n25_sx.py
  31. 67 0
      n26_gz.py
  32. 82 0
      n27_nmg.py
  33. 136 0
      n28_nx.py
  34. 157 0
      n29_xz.py
  35. 95 0
      n30_xj.py
  36. 75 0
      n31_tj.py
  37. 69 0
      n33_sh.py
  38. 16 0
      rety.py
  39. 23 0
      setting.py
  40. 0 0
      shuiwulei.py

+ 3 - 0
Readme.md

@@ -0,0 +1,3 @@
+01 ーイサユ
+02 ト�イィヒーホ�ヨ
+20

+ 23 - 0
a_cfg_dg_zsq.py

@@ -0,0 +1,23 @@
+# !/usr/bin/env python
+# -*- coding:utf-8 -*-
+import sys
+from functools import wraps
+class TailRecurseException(BaseException):
+    def __init__(self, args):
+        self.args = args
+
+def tail_call_optimized(g):
+    @wraps(g)
+    def func(*args):
+        f = sys._getframe()
+        if f.f_back and f.f_back.f_back and f.f_back.f_back.f_code == f.f_code:
+            raise TailRecurseException(args)
+        else:
+            while 1:
+                try:
+                    return g(*args)
+                except TailRecurseException as e:
+                    args = e.args
+
+    func.__doc__ = g.__doc__
+    return func

+ 449 - 0
daocsv.py

@@ -0,0 +1,449 @@
+#!/usr/bin/env python
+# coding:utf-8
+import csv
+from pymongo import MongoClient
+myclient = MongoClient("mongodb://127.0.0.1:27017/")
+mycoup = myclient['shuiwu06']
+mycohz = myclient['shuiwu_hz']['base_date']
+mycopp = myclient['shuiwu_210709']['base_data']
+def w1(dbn):
+    result = mycoup[dbn].find()
+    list1 = []
+    for i in result:
+        print(i)
+        # ***********纳税人名称**********************************************#
+        if dbn in ['08_fj']:
+            n1 = i['S1']
+        elif dbn in ['11_gx']:
+            n1 = i['NSR_NAME']
+        elif dbn in ['12_hb','19_hb']:
+            n1 = i['NSRMC']
+        elif dbn in ['20_hn']:
+            n1 = i['taxpayerName']
+        elif dbn in ['24_sx','26_gz','30_xj','33_sh']:
+            n1 = i['nsrmc']
+        else:
+            n1 = i['纳税人名称']
+        #************所属地区*****************************************#
+        if dbn in ['01_ah','07_zj','22_yn']:
+            n2 = i['所属地区']
+        elif dbn in ['08_fj']:
+            n2 = i['S20']
+        elif dbn in ['11_gx']:
+            n2 = i['ADDR']
+        elif dbn in ['19_hb']:
+            n2 = i['SHENGF'] + '/' + i['DIS']
+        elif dbn in ['20_hn']:
+            n2 = i['siteName']
+        elif dbn in ['24_sx']:
+            n2 = i['dqmc']
+        elif dbn in ['26_gz']:
+            n2 = i['area']
+        else:
+            n2 = ''
+        #*************纳税人识别号或社会信用代码******************************#
+        if dbn in ['01_ah','04_sz','06_js','10_gd','16_gs','17_qh','21_jx','22_yn','25_sx','29_xz','31_tj']:
+            n3 = str(i['纳税人识别号'])
+        elif dbn in ['02_nb','03_bj','05_sc','07_zj','09_sd','13_ln','14_hlj','15_jl','18_hn','28_nx']:
+            n3 = str(i['纳税人识别号或社会信用代码'])
+        elif dbn in ['27_nmg']:
+            n3 = str(i['纳税人识别号或社会信用代码']).split('\xa0')[0]
+        elif dbn in ['08_fj']:
+            n3 = str(i['S2'])
+        elif dbn in ['11_gx']:
+            n3 = str(i['NSR_ID'])
+        elif dbn in ['12_hb','19_hb']:
+            n3 = str(i['NSRSBH'])
+        elif dbn in ['20_hn']:
+            n3 = str(i['taxpayerNumber'])
+        elif dbn in ['23_hn']:
+            n3 = str(i['统一社会信用代码(纳税人识别号)'])
+        elif dbn in ['26_gz','30_xj']:
+            n3 = str(i['nsrsbh'])
+        elif dbn in ['33_sh']:
+            n3 = str(i['nsrsbm'])
+        elif dbn in ['24_sx']:
+            try:
+                n3 = str(i['nsrsbh'])
+            except:
+                n3 = ''
+        #****************组织机构代码************************************#
+        if dbn in ['08_fj']:
+            n4 = str(i['S3'])
+        elif dbn in ['11_gx']:
+            n4 = ''
+        elif dbn in ['12_hb']:
+            n4 = str(i['ZZJGDM'])
+        elif dbn in ['19_hb']:
+            n4 = str(i['ZZJG'])
+        elif dbn in ['20_hn']:
+            n4 = str(i['organizationalCode'])
+        elif dbn in ['26_gz','30_xj','33_sh']:
+            try:
+                n4 = str(i['zzjgdm'])
+            except:
+                n4 = ''
+        elif dbn in ['24_sx']:
+            try:
+                n4 = str(i['zzjgdm'])
+            except:
+                n4 = ''
+        else:
+            n4 = str(i['组织机构代码'])
+        #***************注册地址************************************#
+        if dbn in ['08_fj']:
+            n5 = i['S4']
+        elif dbn in ['11_gx']:
+            n5 = i['REG_ADDR']
+        elif dbn in ['12_hb','19_hb']:
+            n5 = i['ZCDZ']
+        elif dbn in ['20_hn']:
+            n5 = i['place']
+        elif dbn in ['24_sx','26_gz','30_xj','33_sh']:
+            n5 = i['zcdz']
+        else:
+            n5 = i['注册地址']
+        #***********法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)******#
+        if dbn in ['01_ah','07_zj','22_yn']:
+            #       法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)
+            n6 = i['法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)']
+        elif dbn in ['02_nb','03_bj','05_sc','13_ln','15_jl','18_hn','28_nx']:
+            n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)']
+        elif dbn in ['25_sx',]:
+            n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码']
+        elif dbn in ['27_nmg',]:
+            n6 = i['法定代表人或者负责人姓名、性别、及身份证号码(或者其他证件号码)']
+        elif dbn in ['29_xz',]:
+            n6 = i['法人信息']
+        elif dbn in ['14_hlj',]:
+            n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或者其他证件号码)']
+        elif dbn in ['04_sz']:
+            n6 = i['法定代表人或负责人姓名、性别、证件名称及号码']
+        elif dbn in ['10_gd','16_gs','17_qh','21_jx']:
+            n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码']
+        elif dbn in ['06_js']:
+            n6 = i['法定代表人姓名、性别及身份证号码']
+        elif dbn in ['09_sd']:
+            n6 = i['法定代表人或者负责人姓名'] + '。' + i['性别']
+        elif dbn in ['08_fj']:
+            n6 = i['S5'] + '。' + i['S6'] + '。' + i['S7']
+        elif dbn in ['12_hb']:
+            n6 = i['FDDBR'] + '。' + i['FDDBSEX'] + '。' + i['FDDBZJ']+ '。' + i['FDDBZJH']
+        elif dbn in ['11_gx']:
+            n6 = i['FDDBR_NAME'] + '。' + i['FDDBR_SEX'] + '。' + i['FDDBR_SFZHM']
+        elif dbn in ['19_hb']:
+            n6 = i['FDDBR']
+        elif dbn in ['23_hn',]:
+            n6 = i['法定代表人、负责人或者经法院判决确定的实际责任人的姓名、性别、证件名称及号码']
+        elif dbn in ['30_xj']:
+            n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrzjlx']+ '。' + i['fddbrzjhm']
+        elif dbn in ['31_tj']:
+            n6 = i['姓名'] + '。' + i['性别'] + '。' + i['证件名称'] + '。' + i['证件号码']
+        elif dbn in ['26_gz']:
+            n6 = i['fddbrhzfzrxm'] + '。' + i['fddbrhzfzrxb'] + '。' + i['fddbrhzfzrzjmc']+ '。' + i['fddbrhzfzrzjhm']
+        elif dbn in ['33_sh']:
+            n6 = i['frdbmc'] + '。' + i['rowno'] + '。' + i['frdbxb']+ '。' + i['frdbsfz']
+        elif dbn in ['24_sx']:
+            try:
+                n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrsfzhm']
+            except:
+                n6 = ''
+        elif dbn in ['20_hn']:
+            n6 = ''
+            if i['legalName']:
+                n6 += i['legalName']
+            if i['legalSex']:
+                if i['legalSex'] == 1:
+                    fgt = '男'
+                else:
+                    fgt = '女'
+                n6 += fgt
+            if i['legalIdCard']:
+                n6 += i['legalIdCard']
+        #************违法期间法人代表或者负责人姓名、性别、证件名称及号码*************#
+        if dbn in ['01_ah','02_nb','03_bj','05_sc','07_zj','22_yn','28_nx']:
+            #       违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码)
+            n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码)']
+        elif dbn in ['18_hn']:
+            n7 = i['违法期间法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)']
+        elif dbn in ['14_hlj']:
+            n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或者其他证件号码)']
+        elif dbn in ['10_gd']:
+            n7 = i['违法期间法人代表或者负责人姓名、性别、证件名称及号码']
+        elif dbn in ['13_ln','15_jl']:
+            n7 = i['违法期间法人代表姓名及身份证号码']
+        elif dbn in ['25_sx']:
+            n7 = i['违法期间法人代表或者负责人姓名性别及身份证号码(或其他证件号码)']
+        elif dbn in ['27_nmg']:
+            n7 = i['违法期间法定代表人或责任人姓名、性别、身份证号码(或者其他证件号码)']
+        elif dbn in ['08_fj']:
+            n7 = i['S8'] + '。' + i['S9'] + '。' + i['S10']
+        elif dbn in ['30_xj']:
+            try:
+                n7 = i['cwfzrxm'] + '。' + i['cwfzrxb'] + '。' + i['cwfzrzjlx']+ '。' + i['cwfzrzjhm']
+            except:
+                n7 = ''
+        elif dbn in ['20_hn']:
+            n7 = ''
+            if i['legalNameDuring']:
+                n7 +=  i['legalNameDuring']
+            if i['legalSexDuring']:
+                if i['legalSexDuring'] == 1:
+                    fgt = '男'
+                else:
+                    fgt = '女'
+                n7 +=  fgt
+            if i['legalIdCardDuring']:
+                n7 +=  i['legalIdCardDuring']
+        else:
+            n7 = ''
+        #*************负有直接责任的财务负责人姓名、性别、证件名称及号码*********#
+        if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg']:
+                   #负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码)
+            n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或其他证件号码)']
+        elif dbn in ['16_gs','25_sx']:
+            n8 = i['负有直接责任的财务负责人姓名性别及身份证号码(或其他证件号码)']
+        elif dbn in ['14_hlj']:
+            n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或者其他证件号码)']
+        elif dbn in ['28_nx']:
+            n8 = i['负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码)']
+        elif dbn in ['04_sz','06_js','10_gd','17_qh','21_jx']:
+            n8 = i['负有直接责任的财务负责人姓名、性别、证件名称及号码']
+        elif dbn in ['08_fj']:
+            n8 = i['S11'] + '。' + i['S12'] + '。' + i['S13']
+        elif dbn in ['23_hn']:
+            n8 = i['经法院裁判确定的负有直接责任的财务人员、团伙成员的姓名、性别、证件名称及号码']
+        elif dbn in ['20_hn']:
+            n8 = ''
+            if i['financeName']:
+                n8 += i['financeName']
+            if i['financeSex']:
+                if i['financeSex'] == 1:
+                    fgt = '男'
+                else:
+                    fgt = '女'
+                n8 += fgt
+            if i['financeIdCard']:
+                n8 += i['financeIdCard']
+        else:
+            n8 = ''
+        #***********实际负责人姓名、性别及身份证号码(或其他证件号码)************#
+        if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg','28_nx']:
+                   #实际负责人姓名、性别及身份证号码(或其他证件号码)
+            n9 = i['实际负责人姓名、性别及身份证号码(或其他证件号码)']
+        elif dbn in ['16_gs','25_sx']:
+            n9 = i['实际负责人姓名性别及身份证号码(或其他证件号码)']
+        elif dbn in ['06_js']:
+            n9 = i['经法院裁判确定的实际责任人姓名、性别、证件名称及号码']
+        elif dbn in ['14_hlj']:
+            n9 = i['实际负责人姓名、性别及身份证号码(或者其他证件号码)']
+        elif dbn in ['20_hn']:
+            n9 = ''
+            if i['principalName']:
+                n9 += i['principalName']
+            if i['principalSex']:
+                if i['principalSex'] == 1:
+                    fgt = '男'
+                else:
+                    fgt = '女'
+                n9 += fgt
+            if i['principalIdCard']:
+                n9 += i['principalIdCard']
+            # n9 = i['principalName'] + '。' + i['principalSex'] + '。' + i['principalIdCard']
+        else:
+            n9 = ''
+        #*************负有直接责任的中介机构信息****************************#
+        if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','14_hlj','15_jl','18_hn','22_yn','27_nmg','28_nx']:
+            n10 = i['负有直接责任的中介机构信息']
+        elif dbn in ['04_sz','06_js','10_gd','16_gs','17_qh','21_jx','23_hn','25_sx']:
+            n10 = i['负有直接责任的中介机构信息及其从业人员信息']
+        elif dbn in ['20_hn']:
+            n10 = i['agency']
+        else:
+            n10 = ''
+        #*************案件性质*********************************#
+        if dbn in ['08_fj']:
+            n11 = i['S18']
+        elif dbn in ['09_sd']:
+            n11 = ''
+        elif dbn in ['11_gx','12_hb','19_hb']:
+            n11 = i['AJXZ']
+        elif dbn in ['20_hn']:
+            n11 = i['hardCaseType']['typeName']
+        elif dbn in ['26_gz','30_xj']:
+            n11 = i['ajxz']
+        elif dbn in ['29_xz']:
+            n11 = i['违法案件性质']
+        elif dbn in ['33_sh']:
+            n11 = i['ajMc']
+        elif dbn in ['24_sx']:
+            try:
+                n11 = i['ajxzmc']
+            except:
+                n11 = ''
+        else:
+            n11 = i['案件性质']
+        #***********主要违法事实相关法律依据及税务处理处罚情况**********************#
+        if dbn in ['01_ah','02_nb','05_sc','07_zj','13_ln','14_hlj','15_jl','16_gs','18_hn','22_yn','25_sx','27_nmg']:
+            n12 = i['主要违法事实相关法律依据及税务处理处罚情况']
+        elif dbn in ['03_bj','29_xz']:
+            n12 = i['主要违法事实']
+        elif dbn in ['06_js']:
+            n12 = i['主要违法事实、相关法律依据及税务处理处罚情况']
+        elif dbn in ['04_sz','10_gd','17_qh','28_nx','31_tj']:
+            n12 = i['主要违法事实'] + '。' +i['相关法律依据及税务处理处罚情况']
+        elif dbn in ['12_hb']:
+            n12 = i['ZYWFSS'] + '。' +i['FLYJ']
+        elif dbn in ['30_xj']:
+            n12 = i['wfss'] + '。' +i['swclcfqk']
+        elif dbn in ['09_sd']:
+            n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理处罚情况 ']
+        elif dbn in ['08_fj']:
+            n12 = i['S19']
+        elif dbn in ['11_gx']:
+            n12 = i['ZYWFSS']
+        elif dbn in ['19_hb']:
+            n12 = i['WFSS']
+        elif dbn in ['33_sh']:
+            n12 = i['wfss']
+        elif dbn in ['20_hn']:
+            n12 = i['content']
+        elif dbn in ['21_jx']:
+            n12 = i['主要违法事实直接法律依据及税务处理处罚情况']
+        elif dbn in ['24_sx']:
+            n12 = i['zywfss'] + '。' + i['flyj_cljg']
+        elif dbn in ['26_gz']:
+            n12 = i['zywfss'] + '。' + i['xgflyjjswclcfqk']
+        elif dbn in ['23_hn']:
+            n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理、税务行政处罚等情况']
+        #**************************************************************#
+        if dbn in ['08_fj']:
+            n13 = i['docreltime'].replace('-','/')
+        elif dbn in ['11_gx']:
+            n13 = i['DOCPUBTIME'].split(' ')[0].replace('.','/')
+        elif dbn in ['12_hb']:
+            n13 = i['GBRQ'].split(' ')[0]
+        elif dbn in ['19_hb']:
+            n13 = i['GBRQ'].split(' ')[0].replace('年','/').replace('月','')
+        elif dbn in ['24_sx']:
+            n13 = i['xsrq'].split(' ')[0].replace('-','/')
+        elif dbn in ['26_gz']:
+            n13 = i['CrTime'].split(' ')[0].replace('-','/')
+        elif dbn in ['30_xj']:
+            n13 = ''
+        else:
+            n13 = i['date']
+        n14 = dbn
+        list1.append([n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14])
+    return list1
+
+
+# 01_ah 02_nb
+# dbn = '33_sh'
+
+
+
+
+def drcav():
+    l1 = ['01_ah', '02_nb', '03_bj', '04_sz', '05_sc', '06_js', '07_zj', '08_fj', '09_sd', '10_gd', '11_gx', '12_hb',
+          '13_ln', '14_hlj', '15_jl', '16_gs', '17_qh', '18_hn', '19_hb', '20_hn', '21_jx', '22_yn', '23_hn', '24_sx'
+        , '25_sx', '26_gz', '27_nmg', '28_nx', '29_xz', '30_xj', '31_tj', '33_sh']
+    f = open('minglu.csv','w',encoding='utf-8',newline='')
+    csv_writer = csv.writer(f)
+    for dbn in l1:
+        list1 = w1(dbn)
+        for i1 in list1:
+            csv_writer.writerow(i1)
+    f.close()
+
+
+
+
+def r1():
+    with open('minglu.csv', 'r', encoding='utf-8') as f:
+        csv_writer = csv.reader(f)
+        num = 0
+        for i in csv_writer:
+            num +=1
+            print(num)
+            nsrmc = i[0]
+            ssdq =i[1]
+            nsrsbh= i[2]
+            zzjgdm=i[3]
+            zcdz= i[4]
+            fddbr=i[5]
+            wffddbr= i[6]
+            zjfzcwr= i[7]
+            sjfzr= i[8]
+            zjzrzj= i[9]
+            ajxz=i[10]
+            zywf= i[11]
+            date=i[12]
+            soure=i[13]
+            dict1 = {
+                "nsrmc":nsrmc,
+                "ssdq":ssdq,
+                "nsrsbh":nsrsbh,
+                "zzjgdm":zzjgdm,
+                "zcdz":zcdz,
+                "fddbr":fddbr,
+                "wffddbr":wffddbr,
+                "zjfzcwr":zjfzcwr,
+                "sjfzr":sjfzr,
+                "zjzrzj":zjzrzj,
+                "ajxz":ajxz,
+                "zywf":zywf,
+                "date": date,
+                "source": soure,
+            }
+            # print(dict1)
+            # break
+            mycohz.insert_one(dict1)
+
+# r1()
+#
+
+def ser_m(name):
+    a = mycohz.find({"nsrmc":name})
+    for i in a:
+        print(i)
+        i.pop('_id')
+        mycopp.insert_one(i)
+
+def w_test():
+    f = open('test.csv', 'w', encoding='utf-8', newline='')
+    csv_writer = csv.writer(f)
+    i1= ['1','2']
+    csv_writer.writerow(i1)
+    f.close()
+# w_test()
+
+
+import xlrd
+#打开excel
+def r2():
+    wb = xlrd.open_workbook('税务违法企业匹配名单.xlsx')
+    #按工作簿定位工作表
+    sh = wb.sheet_by_name('Sheet1')
+    # print(sh.nrows)#有效数据行数
+    # print(sh.ncols)#有效数据列数
+    # print(sh.cell(0,0).value)#输出第一行第一列的值
+    # print(sh.row_values(0))#输出第一行的所有值
+    # #将数据和标题组合成字典
+    # print(dict(zip(sh.row_values(0),sh.row_values(1))))
+    #遍历excel,打印所有数据
+    num = 0
+    for i in range(sh.nrows):
+        num +=1
+        print(num)
+        name = sh.row_values(i)[0]
+        ser_m(name)
+        # print(name)
+# r2()
+
+# drcav()  #将库内历史数据导入csv,用来备份上传至另一个mongo --写入minglu.csv
+# r1()    #将上一步csv导入mongo                         --写入shuiwu_hz mongoDB
+#           查询前添加mongo索引  db.***.createIndex({})
+# r2()      #将xlsx文件查询,数据写入 mongodb
+#           下命令导出csv
+# mongoexport.exe -h 127.0.0.1 --port 27017 -d shuiwu_210709 -c base_data --csv -f nsrmc,ssdq,nsrsbh,zzjgdm,zcdz,fddbr,wffddbr,zjfzcwr,sjfzr,zjzrzj,ajxz,zywf,date,source -o shuiwu.csv

+ 705 - 0
gx_mong.py

@@ -0,0 +1,705 @@
+#!/usr/bin/env python
+# coding:utf-8
+import n01_ah ,n03_bj ,n04_sz,n05_sc,n06_js,n07_zj ,n08_fj,n09_sd,n10_gd ,n11_gx,n12_hb,n13_ln ,n14_hlj ,n15_jl ,n16_gs,n17_qh,n18_hn,n19_hb,n20_hn,n21_jx,n22_yn,n23_hn,n24_sx ,n25_sx,n26_gz ,n27_nmg  ,n28_nx ,n29_xz,n33_sh #,n30_xj,n31_tj,n32_cq,n33_sh
+from mongo_cho import myco1,myco2,myco3,myco4,myco5,myco6,myco7,myco8,myco9,myco10,myco11,myco12,myco13,myco14,myco15,myco16,myco17,myco18,myco19,myco20,myco21,myco22,myco23,myco24,myco25,myco26,myco27,myco28,myco29,myco30,myco31,myco32,myco33
+from mongo_cho import r_myco15,mycoup,myco1_b,myco3_b,myco5_b,myco7_b,myco10_b,myco13_b,myco14_b,myco15_b,myco16_b,myco21_b,myco23_b,myco25_b,myco27_b,myco28_b
+import arrow
+
+
+class Mg_upd():
+    def __init__(self):
+        sfmth = arrow.now()
+        self.mon = int(sfmth.month) - 1
+        self.yea = int(sfmth.year)
+
+    def se_ms(self,dsbh):
+        a = mycoup.find({"dsbh":dsbh})
+        for i in a:
+            return i
+
+    def up_ms(self,dsbh,emon):
+        myquery = {"dsbh": dsbh}
+        newv = {"$set": {"emon":str(emon)}}
+        mycoup.update_one(myquery, newv)
+        print('>>>update OK')
+
+    def n_01(self):
+        rpg = n01_ah.get_ny()
+        print(rpg)
+        ny_rsd = self.se_ms('n01')
+        year = '2021年'
+        lpg = int(ny_rsd['emon'])
+        tpg = int(rpg) + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue,year)
+            n01_ah.r1(searhvalue, year)
+            self.up_ms('n01', i)
+
+    def n_03(self):
+        ny_rsd = self.se_ms('n03')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue,year)
+            n03_bj.runs(year,i)
+            self.up_ms('n03', i)
+
+    def n_04(self):
+        ny_rsd = self.se_ms('n04')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue,year)
+            n04_sz.runs(year,i) ##
+            self.up_ms('n04', i)
+
+    def n_06(self):
+        ny_rsd = self.se_ms('n06')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n06_js.runs(year, i)  ##
+            self.up_ms('n06', i)
+
+    def n_07(self):
+        ny_rsd = self.se_ms('n07')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n07_zj.runs(year, i)  ##
+            self.up_ms('n07', i)
+
+    def n_08(self):
+        ##如遇到当月确实无数据,需手动去ups增加一个月
+        ny_rsd = self.se_ms('n08')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            etf = n08_fj.runs(year, i)  ##
+            if etf == 'er1':
+                self.up_ms('n08', i)
+                return
+            self.up_ms('n08', i)
+
+    def n_09(self):
+        ny_rsd = self.se_ms('n09')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n09_sd.runs(year, i)  ##
+            self.up_ms('n09', i)
+
+    def n_10(self):
+        ny_rsd = self.se_ms('n10')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n10_gd.runs(year, i)  ##
+            self.up_ms('n10', i)
+
+    def n_11(self):
+        ny_rsd = self.se_ms('n11')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n11_gx.runs(year, i)  ##
+            self.up_ms('n11', i)
+
+    def n_12(self):
+        ny_rsd = self.se_ms('n12')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n12_hb.runs(year, i)  ##
+            self.up_ms('n12', i)
+
+    def n_13(self):
+        ny_rsd = self.se_ms('n13')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n13_ln.runs(year, i)  ##
+            self.up_ms('n13', i)
+
+    def n_14(self):
+        ny_rsd = self.se_ms('n14')
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue)
+            n14_hlj.runs(i)  ##
+            self.up_ms('n14', i)
+
+    def n_15(self):
+        ny_rsd = self.se_ms('n15')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue)
+            n15_jl.runs(year,i)  ##
+            self.up_ms('n15', i)
+
+    def n_16(self):
+        ny_rsd = self.se_ms('n16')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        if self.mon in [1,2,3]:
+            tpg = 2
+        elif self.mon in [4,5,6]:
+            tpg = 3
+        elif self.mon in [7,8,9]:
+            tpg = 4
+        else:
+            tpg = 5
+        for i in range(lpg, tpg):
+            searhvalue = '{}季度'.format(i)
+            print(searhvalue)
+            n16_gs.r1(year,i)  ##
+            self.up_ms('n16', i)
+
+    def n_17(self):
+        year = self.yea
+        n17_qh.runs(year)
+
+    def n_18(self):
+        n18_hn.runs()
+    
+    def n_19(self):
+        ny_rsd = self.se_ms('n19')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue)
+            n19_hb.runs(year,i)  ##
+            self.up_ms('n19', i)
+
+    def n_20(self):
+        ny_rsd = self.se_ms('n20')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        if self.mon in [1,2,3]:
+            tpg = 2
+        elif self.mon in [4,5,6]:
+            tpg = 3
+        elif self.mon in [7,8,9]:
+            tpg = 4
+        else:
+            tpg = 5
+        for i in range(lpg, tpg):
+            searhvalue = '{}季度'.format(i)
+            print(searhvalue)
+            n20_hn.runs(year,i)  ##
+            self.up_ms('n20', i)
+
+    def n_21(self):
+        n21_jx.runs()
+
+    def n_22(self):
+        ny_rsd = self.se_ms('n22')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n22_yn.runs(year, i)  ##
+            self.up_ms('n22', i)
+
+    def n_23(self):
+        ny_rsd = self.se_ms('n23')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n23_hn.runs(year, i)  ##
+            self.up_ms('n23', i)
+
+    def n_24(self):
+        ny_rsd = self.se_ms('n24')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n24_sx.runs(year, i)  ##
+            self.up_ms('n24', i)
+
+    def n_25(self):
+        ny_rsd = self.se_ms('n25')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n25_sx.runs(year, i)  ##
+            self.up_ms('n25', i)
+
+    def n_26(self):
+        ny_rsd = self.se_ms('n26')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n26_gz.runs(year, i)  ##
+            self.up_ms('n26', i)
+
+    def n_27(self):
+        n27_nmg.r1()
+
+    def n_28(self):
+        ny_rsd = self.se_ms('n28')
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue)
+            n28_nx.runs(i)  ##
+            self.up_ms('n28', i)
+
+    def n_29(self):
+        n29_xz.runs()
+
+    def n_33(self):
+        ny_rsd = self.se_ms('n33')
+        year = self.yea
+        lpg = int(ny_rsd['emon'])
+        tpg = self.mon + 1
+        for i in range(lpg, tpg):
+            searhvalue = '{}月'.format(i)
+            print(searhvalue, year)
+            n33_sh.runs(year, i)  ##
+            self.up_ms('n33', i)
+
+    def qidong(self):
+        self.n_01()
+        print('n1--ok-----')
+        self.n_03()
+        print('n3--ok-----')
+        self.n_04()
+        print('n4--ok-----')
+        self.n_06()
+        print('n6--ok-----')
+        self.n_07()
+        print('n7--ok-----')
+        self.n_08()
+        print('n8--ok-----')
+        self.n_09()
+        print('n9--ok-----')
+        self.n_10()
+        print('n10--ok-----')
+        self.n_11()
+        print('n11--ok-----')
+        self.n_12()
+        print('n12--ok-----') #需更换cookie
+        self.n_14()
+        print('n14--ok-----')
+        self.n_15()
+        print('n15--ok-----')
+        self.n_16()
+        print('n16--ok-----')
+        self.n_17()
+        print('n17--ok-----')
+        self.n_18()
+        print('n18--ok-----')
+        self.n_19()
+        print('n19--ok-----')
+        self.n_20()
+        print('n20--ok-----')
+        self.n_21()
+        print('n21--ok-----')
+        self.n_22()
+        print('n22--ok-----')
+        self.n_23()
+        print('n23--ok-----')
+        self.n_24()
+        print('n24--ok-----')
+        self.n_25()
+        print('n25--ok-----')  #更新cookie
+        self.n_26()
+        print('n26--ok-----')
+        self.n_27()
+        print('n27--ok-----')
+        self.n_28()
+        print('n28--ok-----')
+        self.n_29()
+        print('n29--ok-----')
+        self.n_33()
+        print('n33--ok-----')
+
+# d1 = {"dsbh":"n13","eyear":"2021","emon":"4"}
+# mycoup.insert_one(d1)
+qd = Mg_upd()
+# qd.qidong()
+
+##删除错误url
+def mong_del():
+    a = myco10.find()
+    num = 0
+    for i in a:
+        num +=1
+        print(num)
+        tf = i['uid']
+        if tf == "#pageIndex":
+            dlc = {"uid": "#pageIndex"}
+            myco10.delete_one(dlc)
+
+#url uid去重
+class del_rpt():
+    def __init__(self):
+        pass
+    def cl20_tb(slf):  #必须清除redis n20后方可操作
+        a = myco20.find()
+        num = 0
+        for i in a:
+            num += 1
+            print(num)
+            url = i['id']
+            tf = r_myco15.sismember('n20', url)
+            if tf:
+                dlc = {"id": url}
+                myco20.delete_one(dlc)
+            else:
+                r_myco15.sadd('n20', url)
+
+    def cl23_tb(self):  #必须清除redis n20后方可操作
+        a = myco23.find()
+        num = 0
+        for i in a:
+            num += 1
+            print(num)
+            url = i['url']
+            tf = r_myco15.sismember('n23', url)
+            if tf:
+                dlc = {"url": url}
+                myco23.delete_one(dlc)
+            else:
+                r_myco15.sadd('n23', url)
+
+    def cl24_tb(self):  #必须清除redis n20后方可操作
+        a = myco24.find()
+        num = 0
+        for i in a:
+            num += 1
+            print(num)
+            url = i['ajbh']
+            tf = r_myco15.sismember('n24', url)
+            if tf:
+                dlc = {"ajbh": url}
+                myco24.delete_one(dlc)
+            else:
+                r_myco15.sadd('n24', url)
+
+    def cl28_tb(self):  #必须清除redis n20后方可操作
+        a = myco28.find()
+        num = 0
+        for i in a:
+            num += 1
+            print(num)
+            url = i['url']
+            tf = r_myco15.sismember('n28', url)
+            if tf:
+                dlc = {"ajbh": url}
+                myco28.delete_one(dlc)
+            else:
+                r_myco15.sadd('n28', url)
+
+    def qidong(self):
+        pass#理论不需要去重
+
+
+def cl1():  #未解
+    a = myco33.find()
+    num = 0
+    for i in a:
+        num +=1
+        print(num)
+        print(i)
+        # break
+        # url = i['uid']
+        # url = i['url']
+        url = i['djxh']
+        r_myco15.sadd('n33',url)
+
+# cl1()
+
+
+#补录数据
+class bulu_mes():
+    def __init__(self):
+        pass
+
+    def mongo_upd01(self):
+        a = myco1_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n01_ah.r1_d(url,dt)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco1.update_one(myquery,newv)
+                myco1_b.update_one(myquery, newv)
+                print(rsd)
+
+    def mongo_upd03(self):
+        a = myco3_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['uid']
+            if tfl == 3:
+                dt1 = dt.replace('/','年度') + '月'
+                rsd = n03_bj.r1_d(url,dt1)
+                myquery = {"uid":url}
+                newv = {"$set":rsd}
+                myco3.update_one(myquery,newv)
+                myco3_b.update_one(myquery, newv)
+                print(rsd)
+
+    def mongo_upd05(self):
+        a = myco5_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                # dt1 = dt.replace('/','年度') + '月'
+                # rsd = n05_sc.r1_d(url,dt1)
+                myquery = {"url":url}
+                myco5.delete_one(myquery)
+                myco5_b.delete_one(myquery)
+                # print(rsd)
+
+
+    def mongo_upd07(self):
+        a = myco7_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n07_zj.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco7.update_one(myquery,newv)
+                myco7_b.update_one(myquery, newv)
+
+    def mongo_upd10(self):
+        a = myco10_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['uid']
+            if tfl == 3:
+                rsd = n10_gd.r1_d(url,dt)
+                print(rsd)
+                myquery = {"uid":url}
+                newv = {"$set":rsd}
+                myco10.update_one(myquery,newv)
+                myco10_b.update_one(myquery, newv)
+
+    def mongo_upd13(self):
+        a = myco13_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n13_ln.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco13.update_one(myquery,newv)
+                myco13_b.update_one(myquery, newv)
+
+    def mongo_upd14(self):
+        a = myco14_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n14_hlj.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco14.update_one(myquery,newv)
+                myco14_b.update_one(myquery, newv)
+
+    def mongo_upd15(self):
+        a = myco15_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n15_jl.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco15.update_one(myquery,newv)
+                myco15_b.update_one(myquery, newv)
+
+    def mongo_upd16(self):
+        a = myco16_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n16_gs.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco16.update_one(myquery,newv)
+                myco16_b.update_one(myquery, newv)
+
+    def mongo_upd21(self):
+        a = myco21_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['uid']
+            if tfl == 3:
+                print(url,'=====')
+                rsd = n21_jx.r2(dt,url)
+                print(rsd)
+                myquery = {"uid":url}
+                newv = {"$set":rsd}
+                myco21.update_one(myquery,newv)
+                myco21_b.update_one(myquery, newv)
+
+    def mongo_upd23(self):
+        a = myco23.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                print(i)
+                rsd = n23_hn.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco23.update_one(myquery,newv)
+                myco23_b.update_one(myquery, newv)
+
+    def mongo_upd25(self):
+        a = myco25_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n25_sx.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco25.update_one(myquery,newv)
+                myco25_b.update_one(myquery, newv)
+
+    def mongo_upd27(self):
+        a = myco27_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n27_nmg.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco27.update_one(myquery,newv)
+                myco27_b.update_one(myquery, newv)
+
+    def mongo_upd28(self):
+        a = myco28_b.find()
+        for i in a:
+            tfl = len(i)
+            dt = i['date']
+            url = i['url']
+            if tfl == 3:
+                rsd = n28_nx.r1_d(url,dt)
+                print(rsd)
+                myquery = {"url":url}
+                newv = {"$set":rsd}
+                myco28.update_one(myquery,newv)
+                myco28_b.update_one(myquery, newv)
+
+    def qidong(self):
+        self.mongo_upd01()
+        print('n01-补录-ok=====')
+        self.mongo_upd03()
+        print('n03-补录-ok=====')
+        self.mongo_upd07()
+        print('n07-补录-ok=====')
+        self.mongo_upd10()
+        print('n10-补录-ok=====')
+        self.mongo_upd13()
+        print('n13-补录-ok=====')
+        self.mongo_upd14()
+        print('n14-补录-ok=====')
+        self.mongo_upd15()
+        print('n15-补录-ok=====')
+        self.mongo_upd16()
+        print('n16-补录-ok=====')
+        self.mongo_upd21()
+        print('n21-补录-ok=====')
+        self.mongo_upd23()
+        print('n23-补录-ok=====')
+        self.mongo_upd25()
+        print('n25-补录-ok=====')
+        self.mongo_upd27()
+        print('n27-补录-ok=====')
+        self.mongo_upd28()
+        print('n28-补录-ok=====')
+upqd = bulu_mes()
+# upqd.qidong()
+# upqd.mongo_upd28()
+import datetime
+# a = myco15.find()
+# for i in a:
+#     if len(i) == 3:
+#         print(i)

+ 85 - 0
mongo_cho.py

@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# coding:utf-8
+from pymongo import MongoClient
+myclient = MongoClient("mongodb://127.0.0.1:27017/")
+mycoup = myclient['shuiwu06']['upd_ms']
+myco1 = myclient['shuiwu06']['01_ah']
+myco2 = myclient['shuiwu06']['02_nb']
+myco3 = myclient['shuiwu06']['03_bj']
+myco4 = myclient['shuiwu06']['04_sz']
+myco5 = myclient['shuiwu06']['05_sc']
+myco6 = myclient['shuiwu06']['06_js']
+myco7 = myclient['shuiwu06']['07_zj']
+myco8 = myclient['shuiwu06']['08_fj']
+myco9 = myclient['shuiwu06']['09_sd']
+myco10 = myclient['shuiwu06']['10_gd']
+myco11 = myclient['shuiwu06']['11_gx'] ##
+myco12 = myclient['shuiwu06']['12_hb']
+myco13 = myclient['shuiwu06']['13_ln']
+myco14 = myclient['shuiwu06']['14_hlj']
+myco15 = myclient['shuiwu06']['15_jl']
+myco16 = myclient['shuiwu06']['16_gs']
+myco17 = myclient['shuiwu06']['17_qh']
+myco18 = myclient['shuiwu06']['18_hn']
+myco19 = myclient['shuiwu06']['19_hb']
+myco20 = myclient['shuiwu06']['20_hn']
+myco21 = myclient['shuiwu06']['21_jx']
+myco22 = myclient['shuiwu06']['22_yn']
+myco23 = myclient['shuiwu06']['23_hn']
+myco24 = myclient['shuiwu06']['24_sx']
+myco25 = myclient['shuiwu06']['25_sx']
+myco26 = myclient['shuiwu06']['26_gz']
+myco27 = myclient['shuiwu06']['27_nmg']
+myco28 = myclient['shuiwu06']['28_nx']
+myco29 = myclient['shuiwu06']['29_xz']
+myco30 = myclient['shuiwu06']['30_xj']
+myco31 = myclient['shuiwu06']['31_tj']
+myco32 = myclient['shuiwu06']['32cq']
+myco33 = myclient['shuiwu06']['33_sh']
+
+myco1_b = myclient['shuiwu06']['01_ah_b']
+myco2_b = myclient['shuiwu06']['02_nb_b']
+myco3_b = myclient['shuiwu06']['03_bj_b']
+myco4_b = myclient['shuiwu06']['04_sz_b']
+myco5_b = myclient['shuiwu06']['05_sc_b']
+myco6_b = myclient['shuiwu06']['06_js_b']
+myco7_b = myclient['shuiwu06']['07_zj_b']
+myco8_b = myclient['shuiwu06']['08_fj_b']
+myco9_b = myclient['shuiwu06']['09_sd_b']
+myco10_b = myclient['shuiwu06']['10_gd_b']
+myco11_b = myclient['shuiwu06']['11_gx_b'] ##
+myco12_b = myclient['shuiwu06']['12_hb_b']
+myco13_b = myclient['shuiwu06']['13_ln_b']
+myco14_b = myclient['shuiwu06']['14_hlj_b']
+myco15_b = myclient['shuiwu06']['15_jl_b']
+myco16_b = myclient['shuiwu06']['16_gs_b']
+myco17_b = myclient['shuiwu06']['17_qh_b']
+myco18_b = myclient['shuiwu06']['18_hn_b']
+myco19_b = myclient['shuiwu06']['19_hb_b']
+myco20_b = myclient['shuiwu06']['20_hn_b']
+myco21_b = myclient['shuiwu06']['21_jx_b']
+myco22_b = myclient['shuiwu06']['22_yn_b']
+myco23_b = myclient['shuiwu06']['23_hn_b']
+myco24_b = myclient['shuiwu06']['24_sx_b']
+myco25_b = myclient['shuiwu06']['25_sx_b']
+myco26_b = myclient['shuiwu06']['26_gz_b']
+myco27_b = myclient['shuiwu06']['27_nmg_b']
+myco28_b = myclient['shuiwu06']['28_nx_b']
+myco29_b = myclient['shuiwu06']['29_xz_b']
+myco30_b = myclient['shuiwu06']['30_xj_b']
+myco31_b = myclient['shuiwu06']['31_tj_b']
+myco32_b = myclient['shuiwu06']['32cq_b']
+myco33_b = myclient['shuiwu06']['33_sh_b']
+
+import redis
+hostname = "127.0.0.1"
+r_myco = redis.StrictRedis(host=hostname, port=6379, decode_responses=True, db=2)
+r_myco15 = redis.StrictRedis(host=hostname, port=6379, decode_responses=True, db=15)
+
+# myco23 = myclient['shuiwu06']['23_hn']
+# a = myco23.find()
+# for i in a:
+#     uid = i['url']
+#     r_myco15.sadd('n23',uid)
+
+#

+ 111 - 0
n01_ah.py

@@ -0,0 +1,111 @@
+import requests,json,time
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco1,r_myco15,myco1_b
+r = requests.session()
+r.keep_alive = False
+from rety import retry
+
+def zh1(list1):
+    str1 = ''
+    for i in list1:
+        str1 += i.replace(' ','').replace('\r','').replace('\n','').replace('\t','')
+    return str1
+
+@retry(3)
+def r1_d(url,dt1):
+    # url = 'http://anhui.chinatax.gov.cn/art/2021/3/3/art_20155_6021.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr[@class="rlbbox"]')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('td[1]/div/text()')
+        # print(k1)
+        k2 = zh1(k1)
+        # print(k2)
+        v1 = i.xpath('td[2]/div/text()')
+        # print(v1)
+        v2 = zh1(v1)
+        # print(v2)
+        dict1[k2] = v2
+
+        # dict1 = {k2:v2}
+    # print(dict1)
+    dict1['date'] = dt1
+    dict1['url'] = url
+    return dict1
+
+@retry(3)
+def r1(searhvalue,year):
+    url = 'http://anhui.chinatax.gov.cn/module/jslib/bulletin/ajaxdata.jsp'
+    headers = {
+        "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "searhvalue":parse.quote(searhvalue),
+        "searchkey": "jd1",
+        "year": parse.quote(year),
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr[@class="rlbbox"]')
+    list1 = []
+    list2 = []
+    for i in a:
+        url1 = i.xpath('td[5]/div/a/@href')[0]
+        print(url1)
+        # url1='http://anhui.chinatax.gov.cn/art/2020/5/9/art_19687_3782.html'
+        utf = r_myco15.sismember('n01',url1) ##更改
+        if not utf:
+            dt1 = year.replace('年','/') +  searhvalue.replace('月','')
+            rsd = r1_d(url1,dt1)
+            if rsd:
+                list1.append(rsd)
+                list2.append(url1)
+        else:
+            print('已存在,>>>n01')
+    print(list1)
+    if list1:
+        myco1.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco1_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n01', mis)  ##更改
+
+@retry(3)
+def get_ny():
+    url ='http://anhui.chinatax.gov.cn//module/jslib/bulletin/bullenleft.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url, headers=headers, proxies=proxies)
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    len1=selector.xpath('//tr[@id="jiduonclick1"]/td/span//a')
+    list1 = []
+    for i in len1:
+        ny = i.xpath('text()')[0].replace('月','')
+        list1.append(ny)
+    eny = list1[-1]
+    print(eny,'>>>from n01_ah***')
+    return eny
+# get_ny()
+
+def runs():
+    year = '2024年'
+    for i in range(1,2):
+        searhvalue = '{}月'.format(i)
+        r1(searhvalue,year)
+runs()
+

+ 49 - 0
n02_nb.py

@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json,time
+from setting import proxies
+# from urllib import parse
+# from pymongo import MongoClient
+# myclient = MongoClient("mongodb://127.0.0.1:27017/")
+# myco2 = myclient['shuiwu06']['02_nb']
+# myco2_b = myclient['shuiwu06']['02_nb']
+from lxml import etree
+from mongo_cho import myco2,myco2_b
+r = requests.session()
+r.keep_alive = False
+
+#########见30的
+
+# http://ningbo.chinatax.gov.cn/col/col6300/index.html
+
+def r1(year,mon,day):
+    dict1 = {}
+    url = 'http://ningbo.chinatax.gov.cn/art/{year}/{mon}/{day}/art_6166_7114.html'.format(year=year,mon=mon,day=mon)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    html = requests.get(url=url,headers=headers,proxies=proxies)
+    selector = etree.HTML(html)
+    a = selector.xpath('//div[@id="zoom"]//table//tr')
+    for i in a:
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+            k3 = k2.replace('\r','').replace('\t','').replace('\n','').replace(' ','')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            dict1[k3] = v3
+    dict1['url'] = url
+    dict1['date'] = str(year)+'/'+str('04')
+    print(dict1)
+    myco2_b.insert_one(dict1)
+
+for i in range(1,30):
+    r1('2023','4',i)

+ 154 - 0
n03_bj.py

@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+from rety import retry
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco3,r_myco15,myco3_b
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(cid,ny):
+    url = 'http://beijing.chinatax.gov.cn/bjsat/office/jsp/zdsswfaj/wwidquery'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "id":cid,
+        "dq": "",
+        "ajlx": "",
+        "ndjd": ny,
+        "bz": "ndjd",
+        "dqy": "2",
+        "ymdx": "",
+        "nsrmc": "",
+        "nsrsbh": "",
+        "zcdz": "",
+        "zzjgdm": "",
+        "fddbrmc": "",
+        "fddbrsfzhm": "",
+        "cwfzrmc": "",
+        "cwfzrsfzhm": "",
+        "orgCode": "11100000000",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('/html/body/table/tbody/tr/td/table/tbody//tr')
+    dict1 = {}
+    for i in a:
+        try:
+            k1 = i.xpath('td[1]/text()')[0].replace(' ','').replace('\r','').replace('\n','').replace('\t','')
+            v1 = i.xpath('td[2]/text()')[0].replace(' ','').replace('\r','').replace('\n','').replace('\t','')
+
+            dict1[k1] = v1
+        except:
+            pass
+    dict1['uid'] = cid
+    dict1['date'] = ny.replace('年度','/').replace('月','').replace(' ','')
+    # print(dict1)
+    return dict1
+# r3_d()
+
+@retry(3)
+def r1(ny,dqy):
+    url = 'http://beijing.chinatax.gov.cn/bjsat/office/jsp/zdsswfaj/wwquery'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data1 = {
+        "orgCode":"11100000000",
+        "bz": "ndjd",
+        "ndjd": ny,
+    }
+    data = {
+        "id": '',
+        "dq": "",
+        "ajlx": "",
+        "ndjd": ny,
+        "bz": "ndjd",
+        "dqy": dqy,
+        "ymdx": "",
+        "nsrmc": "",
+        "nsrsbh": "",
+        "zcdz": "",
+        "zzjgdm": "",
+        "fddbrmc": "",
+        "fddbrsfzhm": "",
+        "cwfzrmc": "",
+        "cwfzrsfzhm": "",
+        "orgCode": "11100000000",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    rpg = re.findall(r'果(.*?)页',html)
+    tpg = 0
+    if rpg:
+        tpg = rpg[0].replace(' ','')
+    print(tpg)
+    selector = etree.HTML(html)
+    a = selector.xpath('/html/body/table/tbody/tr/td/table[2]/tbody//tr')
+    list1 = []
+    list2 = []
+    for i in a:
+        rst = i.xpath('td[5]/input/@onclick')
+        if rst:
+            codt = rst[0]
+            cd1 = re.findall(r"'(.*?)'",codt)[0]
+            print(cd1)
+            utf = r_myco15.sismember('n03', cd1)
+            if not utf:
+                rsd = r1_d(cd1, ny)
+                list1.append(rsd)
+                list2.append(cd1)
+            else:
+                print('已存在,>>>n03')
+                pass
+    if list1:
+        myco3.insert_many(list1)
+    if list2:
+        myco3_b.insert_many(list1)
+        for mis in list2:
+            r_myco15.sadd('n03', mis)
+    # myco3.insert_many(list1)
+    return int(tpg)
+
+@retry(3)
+def get_ny():
+    url ='http://beijing.chinatax.gov.cn/bjsat/office/jsp/zdsswfaj/ndjd.jsp?orgCode=11100000000'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url, headers=headers, proxies=proxies)
+    html = response.text
+    # print(html)
+    a = re.findall(r"cx\('2021年度(.*?)  月'\)",html)
+    list1 = []
+    for i in a:
+        # print(i)
+        ny = i.replace(' ','')
+        list1.append(ny)
+    eny = list1[-1]
+    print(eny,'>>>from n03_ah***')
+    return eny
+# get_ny()
+
+
+def runs(ny1,ny2):
+    if len(str(ny2)) == 1:
+        ny = str(ny1) + "年度" + str(ny2) + "  月"
+    else:
+        ny = str(ny1) + "年度" + str(ny2) + " 月"
+    for dqy in range(1,2):
+        print(ny1,ny2,dqy,'页=================')
+        tpg = r1(ny,dqy)
+        if tpg >1:
+            for i in range(2,tpg+1):
+                print(ny1,ny2,i,'页=================')
+                r1(ny, i)
+
+runs(2021,12)

+ 101 - 0
n04_sz.py

@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json,re
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco4,r_myco15,myco4_b
+r = requests.session()
+r.keep_alive = False
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+from rety import retry
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'https://shenzhen.chinatax.gov.cn/mhsofpro/otherproject/wgtg/data.jsp?tags=ps_18756&fh=true'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,verify=False,proxies=proxies)
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr')
+    dict1 = {}
+    for i in a:
+        # print(i.xpath('td[1]/text()'))
+        # print(i.xpath('td[2]/text()'))
+
+        try:
+            k1 = i.xpath('td[1]')
+            text = k1[0].xpath('string(.)').strip()
+            # text
+            v1 = i.xpath('td[2]')
+            text1 = v1[0].xpath('string(.)').strip()
+            v2 = text1.replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '')
+            dict1[text] = v2
+        except:
+            pass
+    dict1['url'] = url
+    dict1['date'] = dt
+    print(dict1)
+    return dict1
+
+
+# r1_d()
+@retry(3)
+def r1(ny,cpg,tpg,dt):
+    url = 'https://shenzhen.chinatax.gov.cn/mhsofpro/otherproject/page/page.jsp?type=w_date&msg={}'.format(ny)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "curPage":cpg,
+        "totalPages": tpg,
+        "pageNum": "1",
+    }
+    response = r.post(url=url,headers=headers,verify=False,data=data,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    rpg = re.findall(r'\/(.*?)页',html)
+    tpg = rpg[1]
+    # print(rpg)
+    a = selector.xpath('//input[@id="button2"]/@onclick')
+    list1 = []
+    list2 = []
+    for i in a:
+        # print(i)
+        aa = re.findall("'(.*?)'",i)[0]
+        url1 = 'https://shenzhen.chinatax.gov.cn'+aa
+        print(url1,dt)
+        utf = r_myco15.sismember('n04', url1)
+        if not utf:
+            rsd = r1_d(url1, dt)
+            list1.append(rsd)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n04')
+            pass
+    if list1:
+        myco4.insert_many(list1)
+    if list2:
+        myco4_b.insert_many(list1)
+        for mis in list2:
+            r_myco15.sadd('n04', mis)
+    # myco4.insert_many(list1)
+    return int(tpg)
+
+
+def runs(ny1='2021',ny2='9'):
+    ny=str(ny2)+'_'+ str(ny1)
+    # cpg='1'
+    dt=str(ny1)+'/'+str(ny2)+'/1'
+    tpg = r1(ny, '1', '1', dt)
+    if tpg >1:
+        for i in range(2,tpg+1):
+            print(i,'页============')
+            r1(ny,i,tpg,dt)
+
+runs()

+ 89 - 0
n05_sc.py

@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json,re
+from rety import retry
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco5
+r = requests.session()
+r.keep_alive = False
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'https://sichuan.chinatax.gov.cn/art/2021/3/23/art_15873_10537.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,verify=False,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html= response.text
+    # print(html)
+    selector = etree.HTML(html)
+    a = selector.xpath('//tbody//tr')
+    dict1 = {}
+    try:
+        for i in a:
+            k1 = i.xpath('td[1]')
+            text = k1[0].xpath('string(.)').strip()
+            # print(text)
+            v1 = i.xpath('td[2]')
+            text1 = v1[0].xpath('string(.)').strip()
+            v2 = text1.replace(' ','').replace('\r','').replace('\t','').replace('\n','')
+            dict1[text] = v2
+    except:
+        pass
+    print(dict1)
+    dict1['url'] = url
+    dict1['date'] = dt
+    return dict1
+
+# r1_d()
+@retry(3)
+def r1(icid,cpg,dt):
+    url = 'https://sichuan.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "vc_name":"",
+        "field_439": "",
+        "field_440": "",
+        "field_441": "",
+        "field_442": "",
+        "field_443": "",
+        "strSelectID": "390,439,440,441,442,443",
+        "i_columnid": icid,
+        "field": "vc_name:1:0,field_439:1:0,field_440:1:0,field_441:1:0,field_442:1:0,field_443:1:0",
+        "currpage": cpg,
+    }
+    response = r.get(url=url,headers=headers,params=params,verify=False,proxies=proxies)
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr/td[5]/a/@href')
+    list1 = []
+    for i in a:
+        # print(i)
+        url1 = i.replace('../..','https://sichuan.chinatax.gov.cn')
+        print(url1)
+        rsd = r1_d(url1,dt)
+        list1.append(rsd)
+    # print(list1)
+    myco5.insert_many(list1)
+        # 'https://sichuan.chinatax.gov.cn'
+
+
+# https://sichuan.chinatax.gov.cn/col/col15873/index.html
+##季度更新,对比上次页数,
+def runs():
+    icid='15873'  #季度id
+    tpg=9         #总共几页
+    dt='2023/10'   #时限
+    for i in range(1,tpg+1):
+        print(i,'==================')
+        r1(icid,i,dt)
+runs()

+ 84 - 0
n06_js.py

@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+from mongo_cho import myco6,r_myco15,myco6_b
+#https://jiangsu.chinatax.gov.cn/col/col16916/index.html
+
+@retry(3)
+def r1_d(url,dt):
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//body/table/tbody/tr')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('td[1]')
+        text = k1[0].xpath('string(.)').strip()
+        # print(text)
+        v1 = i.xpath('td[2]')
+        text1 = v1[0].xpath('string(.)').strip()
+        text2 = text1.replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '')
+        dict1[text] = text2
+    dict1['url'] = url
+    dict1['date'] = dt
+    print(dict1)
+    return dict1
+# r1_d()
+
+@retry(3)
+def r1(ny1,ny2,dt):
+    url = 'https://jiangsu.chinatax.gov.cn/module/jslib/bulletin/lpajaxdata.jsp?startrecord=1&endrecord=36&perpage=11&rowpage=1'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "searhvalue":ny2,
+        "searchkey": "jd",
+        "year": ny1,
+    }
+    response = r.post(url=url,data=data,headers=headers,proxies=proxies)
+    html = response.text
+    print(html)
+    selector = etree.HTML(html)
+    a = selector.xpath('//a/@href')
+    # print(a)
+    list1 = []
+    list2 = []
+    for i in a:
+        print(i)
+        utf = r_myco15.sismember('n06', i)
+        if not utf:
+            rsd = r1_d(i, dt)
+            list1.append(rsd)
+            list2.append(i)
+        else:
+            print('已存在,>>>n06')
+            pass
+    if list1:
+        myco6.insert_many(list1)
+    if list2:
+        myco6_b.insert_many(list1)
+        for mis in list2:
+            r_myco15.sadd('n06', mis)
+    # myco6.insert_many(list1)
+
+# https://jiangsu.chinatax.gov.cn/col/col16916/index.html
+
+def runs():
+    ny1 = '2021'
+    ny2 = '8'
+    dt= str(ny1) + '/' + str(ny2)
+    r1(ny1,ny2,dt)
+
+runs()

+ 117 - 0
n07_zj.py

@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco7,r_myco15,myco7_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+def zh1(list1):
+    str1 = ''
+    for i in list1:
+        str1 += i.replace(' ','').replace('\r','').replace('\n','').replace('\t','')
+    return str1
+
+@retry(3)
+def r1_d(url,dt):
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr[@class="rlbbox"]')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('td[1]/div/text()')
+        # print(k1)
+        k2 = zh1(k1)
+        # print(k2)
+        v1 = i.xpath('td[2]/div/text()')
+        # print(v1)
+        v2 = zh1(v1)
+        # print(v2)
+        dict1[k2] = v2
+        # dict1 = {k2:v2}
+    # print(dict1)
+    dict1['url'] = url
+    dict1['date'] = dt
+    return dict1
+# r1_d()
+@retry(3)
+def r1(searhvalue,year,pg,dt):
+    # url = 'http://anhui.chinatax.gov.cn/module/jslib/bulletin/ajaxdata.jsp'
+    pg1 = str(pg *10 -9)
+    pg2 = str(pg *10)
+    url = 'http://zhejiang.chinatax.gov.cn/module/jslib/bulletin/ajaxdata.jsp?startrecord={pg1}&endrecord={pg2}'.format(pg1=pg1,pg2=pg2)
+    headers = {
+        "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "searhvalue":parse.quote(searhvalue),
+        "searchkey": "jd1",
+        "year": parse.quote(year),
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    # print(html)
+    aa = re.findall(r"http://zhejiang.chinatax.gov.cn/art/(.*?).html", html)
+    list1 = []
+    list2 = []
+    for i1 in aa:
+        # print(i1)
+        url1 = "http://zhejiang.chinatax.gov.cn/art/" + i1 + ".html"
+        print(url1)
+        utf = r_myco15.sismember('n07', url1)  ##更改
+        if not utf:
+            rsd = r1_d(url1, dt)
+            list1.append(rsd)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n07')
+            pass
+    if list1:
+        myco7.insert_many(list1)
+    if list2:
+        myco7_b.insert_many(list1)
+        for mis in list2:
+            r_myco15.sadd('n07', mis)  ##更改
+    # print(list1)
+    # myco7.insert_many(list1)
+
+@retry(3)
+def get_pg(ny1,ny2):
+    url ='http://zhejiang.chinatax.gov.cn/module/jslib/bulletin/bullenright.jsp?searhvalue={ny2}%E6%9C%88&searchkey=jd1&year={ny1}%E5%B9%B4%E5%BA%A6'.format(ny1=ny1,ny2=ny2)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url, headers=headers, proxies=proxies)
+    html = response.text
+    # print(html)
+    rpg = re.findall(r"var totalRecord = '(.*?)'",html)
+    if rpg:
+        tpg = rpg[0]
+        tpg1 = int(tpg)//10 + 2
+        # print(tpg1)
+        return tpg1
+
+# get_pg('2021','1')
+
+
+
+def runs(ny1,ny2):
+    searhvalue = str(ny2) + '月'
+    year =  str(ny1) + '年度'
+    pg = get_pg(ny1, ny2)
+    dt = str(ny1) + '/' + str(ny2)
+    # print(dt)
+    for i in range(1,pg):
+        print(i,'页===========')
+        r1(searhvalue,year,i,dt)
+
+runs(2023,11)

+ 78 - 0
n08_fj.py

@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json,re
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco8,myco8_b,r_myco15
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://fujian.chinatax.gov.cn/was5/web/search'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "channelid":"291316",
+        "templet": "zdaj.jsp",
+        "sortfield": "-datefor",
+        "classsql": "datefor={ny1}\-{ny2}".format(ny1=ny1,ny2=ny2),
+        "r": "0.31052286956801844",
+        "prepage": "8",
+        "page": pg,
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    html = response.text
+    tpg = re.findall('"pagenum":"(.*?)"',html)[0]
+    print(tpg,'===========')
+    # a = json.loads(html)
+    print(html)
+    html1 = html.replace(" ","").replace("\r","").replace("\n","").replace("\t","")
+    res1 = re.findall('"docs":\[(.*?)\]',html1)
+    res2 = res1[0]
+    res3 = re.findall('\{(.*?)\}',res2)
+    list1 = []
+    listurl = []
+    for i in res3:
+        i1 = "{" + i + "}"
+        i2 = json.loads(i1)
+        print(i2)
+        url1 = i2['url']
+        utf = r_myco15.sismember('n08', url1)  ##更改
+        if not utf:
+            listurl.append(url1)
+            list1.append(i2)
+        else:
+            print('已存在,>>>n08')
+            pass
+    list2 = list1[:-1]
+    # print(list2)
+    # if list2:
+    #     myco8.insert_many(list2)
+    #     if listurl:
+    #         myco8_b.insert_many(list2)
+    #         for mis in listurl:
+    #             r_myco15.sadd('n08', mis)  ##更改
+
+    return tpg
+
+
+
+def runs(ny1,ny2):
+    rpg = r1(ny1,ny2,pg=1)
+    # print(rpg)
+    # print(type(rpg))
+    if rpg == '0':
+        print('122')
+        return 'er1'
+    else:
+        for pg2 in range(2,int(rpg)+1):
+            r1(ny1, ny2, pg2)
+
+for pg2 in range(3,7):
+    r1('2023', '10', pg2)

+ 174 - 0
n09_sd.py

@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco9,r_myco15,myco9_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+@retry(3)
+def r2(ny,cid,dicts):
+    url = 'https://etax.shandong.chinatax.gov.cn/DZSWJ/DZSWJ_SSWFSXAJ_CX_NAVIGATE?method=queryMx&nsrmc=&nsrsbh={cid}&zcdz=&zzjgdm=&fddbrxm=&fddbrsfzhm=&cwfzrxm=&cwfzrsfzhm=&cxnd={ny}%D4%C2&cxdq=&ajxz='.format(cid=cid,ny=ny)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
+    }
+    data = {
+        "s_nsrsbh":"",
+        "nsrmc": "",
+        "zcdz": "",
+        "zzjgdm": "",
+        "fddbrxm": "",
+        "fddbrsfzhm": "",
+        "cwfzrxm": "",
+        "cwfzrsfzhm": "",
+        "cxdq": "",
+        "ajxz": "",
+        "cxnd": "{}月".format(ny),
+    }
+    response = r.post(url=url,data=data,headers=headers,verify=False,proxies=proxies)
+    html = response.text
+    # print(html)
+    try:
+        dict1 = {}
+        NSRMC = re.findall(r'<NSRMC>(.*?)</NSRMC>',html)
+        dict1['纳税人名称'] = NSRMC[0]
+        NSRSBH = re.findall(r'<NSRSBH>(.*?)</NSRSBH>', html)
+        dict1['纳税人识别号或社会信用代码'] = NSRSBH[0]
+        ZZJGDM = re.findall(r'<ZZJGDM>(.*?)</ZZJGDM>', html)
+        dict1['组织机构代码'] = ZZJGDM[0]
+        ZCDZ = re.findall(r'<ZCDZ>(.*?)</ZCDZ>', html)
+        dict1['注册地址'] = ZCDZ[0]
+        FDDBRHFZRXM = re.findall(r'<FDDBRHFZRXM>(.*?)</FDDBRHFZRXM>', html)
+        dict1['法定代表人或者负责人姓名'] = FDDBRHFZRXM[0]
+        FDDBRHFZRXB = re.findall(r'<FDDBRHFZRXB>(.*?)</FDDBRHFZRXB>', html)
+        dict1['性别'] = FDDBRHFZRXB[0]
+        FDDBRHFZRZJHM = re.findall(r'<FDDBRHFZRZJHM>(.*?)</FDDBRHFZRZJHM>', html)
+        dict1['证件号码1'] = FDDBRHFZRZJHM[0]
+        FDRZJHM = re.findall(r'<FDRZJHM>(.*?)</FDRZJHM>', html)
+        dict1['证件号码2'] = FDRZJHM[0]
+        AJXZ = re.findall(r'<AJXZ>(.*?)</AJXZ>', html)
+        dict1['案件性质'] = AJXZ[0]
+        ZYWFSS = re.findall(r'<ZYWFSS>(.*?)</ZYWFSS>', html)
+        dict1['主要违法事实'] = ZYWFSS[0]
+        XGFLYJJSWCLCFQK = re.findall(r'<XGFLYJJSWCLCFQK>(.*?)</XGFLYJJSWCLCFQK>', html)
+        dict1['相关法律依据及税务处理处罚情况 '] = XGFLYJJSWCLCFQK[0]
+        dict1['date'] = ny[:4] +'/'+ ny[4:]
+        dict1['uid'] = cid
+        # print(dict1)
+        return dict1
+    except:
+        return dicts
+# r2()
+
+@retry(3)
+def r1(ny,pg):
+    url = 'https://etax.shandong.chinatax.gov.cn/DZSWJ/DZSWJ_SSWFSXAJ_CX_NAVIGATE?method=queryMxFh&nsrmc=&nsrsbh=&zcdz=&zzjgdm=&fddbrxm=&fddbrsfzhm=&cwfzrxm=&cwfzrsfzhm=&cxnd={ny}%D4%C2&cxdq=&ajxz=&page={pg}'.format(ny=ny,pg=pg)
+    # url = 'https://etax.shandong.chinatax.gov.cn/DZSWJ/DZSWJ_SSWFSXAJ_CX_NAVIGATE?method=queryBynd&nsrmc=&nsrsbh=&zcdz=&zzjgdm=&fddbrxm=&fddbrsfzhm=&cwfzrxm=&cwfzrsfzhm=&cxnd=201911%D4%C2'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
+    }
+    data = {
+        "s_nsrsbh":"",
+        "nsrmc": "",
+        "zcdz": "",
+        "zzjgdm": "",
+        "fddbrxm": "",
+        "fddbrsfzhm": "",
+        "cwfzrxm": "",
+        "cwfzrsfzhm": "",
+        "cxdq": "",
+        "ajxz": "",
+        "cxnd": "20201年度",
+    }
+    params = {
+        "method":"queryMxFh",
+        "nsrmc": "",
+        "nsrsbh": "",
+        "zcdz": "",
+        "zzjgdm": "",
+        "fddbrxm": "",
+        "fddbrsfzhm": "",
+        "cwfzrxm": "",
+        "cwfzrsfzhm": "",
+        "cxnd": "20195%D4%C2",
+        "cxdq": "",
+        "ajxz": "",
+        "page": "2",
+    }
+    response = r.get(url=url,headers=headers,verify=False,proxies=proxies)
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    b = selector.xpath('//tr')
+    try:
+        num = 0
+        list1 = []
+        list2 = []
+        for i in b:
+            num += 1
+            if num > 1:
+                dict1 = {}
+                td2 = i.xpath('td[2]/text()')
+                td22 = td2[0].replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '').replace('\xa0', '')
+                dict1['纳税人名称'] = td22
+                td3 = i.xpath('td[3]/text()')
+                td33 = td3[0].replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '').replace('\xa0', '')
+                dict1['纳税人识别号或社会信用代码'] = td33
+                td4 = i.xpath('td[4]/text()')
+                td44 = td4[0].replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '').replace('\xa0', '')
+                dict1['案件性质'] = td44
+                td5 = i.xpath('td[5]/input[@id="xxxx"]/@onclick')
+                td55 = td5[0].replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '').replace('\xa0', '')
+                cid = re.findall(r"\('(.*?)'\)",td55)[0]
+                dict1['组织机构代码'] = ''
+                dict1['注册地址'] = ''
+                dict1['法定代表人或者负责人姓名'] = ''
+                dict1['性别'] = ''
+                dict1['证件号码1'] = ''
+                dict1['证件号码2'] = ''
+                dict1['主要违法事实'] = ''
+                dict1['相关法律依据及税务处理处罚情况 '] = ''
+                dict1['date'] = ny[:4] + '/' + ny[4:]
+                dict1['uid'] = cid
+                utf = r_myco15.sismember('n09', cid)  ##更改
+                if not utf:
+                    rsd = r2(ny, cid, dict1)
+                    print(rsd)
+                    list1.append(rsd)
+                    list2.append(cid)
+                else:
+                    print('已存在,>>>n09')
+                    pass
+
+        if list1:
+            myco9.insert_many(list1)
+        if list2:
+            myco9_b.insert_many(list1)
+            for mis in list2:
+                r_myco15.sadd('n09', mis)  ##更改
+        return '1'
+    except:
+        return '2'
+    # myco9.insert_many(list1)
+            # break
+            # print(dict1)
+            # print(cid)
+
+def runs(ny1,ny2):
+    ny = str(ny1) + str(ny2)
+    tpg = 100
+    for pg in range(1,tpg):
+        print(pg,'===================')
+        btf = r1(ny,pg)
+        if btf == "2":
+            break
+
+
+runs('2023','11')

+ 125 - 0
n10_gd.py

@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco10,r_myco15,myco10_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(cid,dt):
+    url = 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/zdsswfaj/service.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "manuscriptId": cid,
+    }
+    response = r.post(url=url, headers=headers, data=data,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('td[1]/text()')
+        v1 = i.xpath('td[2]/text()')
+
+        if k1:
+            k2 = k1[0].replace(' ','').replace('\r','').replace('\t','').replace('\n','')
+            # print(k2)
+            if v1:
+                v2 = v1[0].replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '')
+                # print(v2)
+            else:
+                v2 = ''
+            if k2:
+                dict1[k2] = v2
+    dict1["uid"] = cid
+    dict1['date'] = dt
+    print(dict1)
+    return dict1
+
+# r1_d('42da48b512b046d488189ce36a833fa8','9')
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://guangdong.chinatax.gov.cn/siteapps/webpage/gdtax/zdsswfaj/query.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "yf": "{ny1}_{ny2}".format(ny1=ny1,ny2=ny2),
+        "pageSize":"20",
+        "pageNo": pg,
+        "channelId": "",
+        "taxNature": "",
+        "quarter": "",
+        "nsr_mc": "",
+        "nsr_sbh": "",
+        "fddbr_xm": "",
+        "zcdz": "",
+        "zzjgdm": "",
+        "fddbrzjhm": "",
+        "cwfzrxm": "",
+        "cwfzrzjhm": "",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    rpg = re.findall('共(.*?)页',html)[0].replace(' ','')
+    # a1 = selector.xpath('//*[@id="zdss_tb"]/tbody/tr[2]/td[5]/text()')
+    # print(a1)
+    # for i in a:
+    #     print(i.xpath('a/@onclick'))
+    a = selector.xpath('//a/@onclick')
+    list1 = []
+    list2 = []
+    for i in a:
+        i1 = re.findall("'(.*?)'",i)
+        if i1:
+            cid = i1[0]
+
+            dt = str(ny1) + '/' + str(ny2+1)
+            if cid == '#pageIndex':
+                pass
+            else:
+                utf = r_myco15.sismember('n10', cid)  ##更改
+                if not utf:
+                    print(cid)
+                    rsd = r1_d(cid, dt)
+                    list1.append(rsd)
+                    list2.append(cid)
+                else:
+                    print('已存在,>>>n10')
+                    pass
+    # if list1:
+    if list1:
+        myco10.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco10_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n10', mis)  ##更改
+    #     myco10.insert_many(list1)
+    return rpg
+
+def runs(ny1,ny2):
+    # for ny1 in range(2021,2022):
+    #     for ny2 in range(0,4):
+    #         print(ny1,ny2,'===========')
+    rpg = r1(ny1,ny2,pg=1)
+    print(ny1,ny2)
+    if int(rpg) > 1:
+        for pg in range(2,int(rpg)+1):
+            print(pg,'==============')
+            r1(ny1,ny2,pg)
+
+
+runs(2023,11)

+ 90 - 0
n11_gx.py

@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json,re
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco11,r_myco15,myco11_b
+r = requests.session()
+r.keep_alive = False
+from rety import retry
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+@retry(3)
+def r1(pg):
+    url = 'http://guangxi.chinatax.gov.cn/restSearch'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "channelid":"290909",
+        "searchword": "",
+        "orderby": "RELEVANCE",
+        "page": pg,
+        "pageSize": "10",
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    # print(response.text)
+    rsd = response.json()
+    print(rsd['pager'])
+    rsl = rsd['datas']
+    # myco11.insert_many(rsl)
+    # for i in rsl:
+    #     print(i)
+
+# for pg in range(109,310):
+#     print(pg,'============')
+#     r1(pg)
+
+def r2(ny1,ny2,pg):
+    url = 'http://guangxi.chinatax.gov.cn/restSearch'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "channelid": "290909",
+        "searchword": "(NF={ny1} and YF={ny2}月)".format(ny1=ny1,ny2=ny2),
+        "orderby": "RELEVANCE",
+        "page": pg,
+        "pageSize": "10",
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    # print(response.text)
+    rsd = response.json()
+    rpg = rsd['pager']['pageCount']
+    rsl = rsd['datas']
+    list1 = []
+    list2 = []
+    for i in rsl:
+        url1 = i['DOCPUBURL']
+        utf = r_myco15.sismember('n11', url1)  ##更改
+        if not utf:
+            list2.append(url1)
+            list1.append(rsd)
+        else:
+            print('已存在,>>>n11')
+    if list1:
+        myco11.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco11_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n11', mis)  ##更改
+    # print(response.text)
+    return int(rpg)
+
+# r2(2)
+
+def runs(ny1, ny2):
+    rpg = r2(ny1, ny2, pg=1)
+    if rpg > 1:
+        for pg in range(2,rpg+1):
+            print(pg,'==========')
+            r2(ny1, ny2, pg)
+
+ny1 = 2023
+ny2 = 11
+runs(ny1, ny2)

+ 77 - 0
n12_hb.py

@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco12,myco12_b,r_myco15
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+#http://wzyy.hebei.chinatax.gov.cn/LawPublicity/law/adPenalty/taxDishonestyCases
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://wzyy.hebei.chinatax.gov.cn/LawPublicity/app-publicity-service/law/penalty/findTaxDishonestyCasesList?cid=27&uid='
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "column":"adPenalty/taxDishonestyCases",
+        "queryCity": "",
+        "queryType": "",
+        "queryDate": str(ny1) + '.' + str(ny2),
+        "NSRMC": "",
+        "NSRSBH": "",
+        "ZCDZ": "",
+        "ZZJGDM": "",
+        "FDDBR": "",
+        "FDDBZJH": "",
+        "CWFZR": "",
+        "CWFZRZJH": "",
+        "pageSize": "10",
+        "pageNum": pg,
+        "orderByColumn": "",
+        "isAsc": "asc",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    # print(response.text)
+    rsd = response.json()
+    # print(rsd)
+    rsl = rsd['rows']
+    # print(rsl)
+    list1 = []
+    list2 = []
+    if rsl:
+        for i in rsl:
+            uid = i['uid']
+            utf = r_myco15.sismember('n12', uid)  ##更改
+            if not utf:
+                list2.append(uid)
+                list1.append(i)
+            else:
+                print('已存在,>>>n12')
+    if list1:
+        myco12.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco12_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n12', mis)  ##更改
+    #     myco12.insert_many(rsl)
+    # for i in rsl:
+    #     print(i)
+    return rsd['total']
+
+# r1()
+
+def runs(ny1,ny2):
+    tpg = r1(ny1,ny2,pg=1)
+    for pg in range(2,tpg):
+        print(pg,'===')
+        r1(ny1,ny2,pg)
+
+runs('2023','8')
+

+ 128 - 0
n13_ln.py

@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+# coding:utf-8
+import re
+import time
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco13,r_myco15,myco13_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'http://liaoning.chinatax.gov.cn/art/2020/12/8/art_5883_1808.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//table[@class="contentTable"]//tr')
+    dict1 = {}
+    for i in a:
+
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r','').replace('\t','').replace('\n','').replace(' ','')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            if k3:
+                dict1[k3] = v3
+    dict1['url'] = url
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+# r1_d()
+
+@retry(3)
+def r1(ny,pg,dt):
+    url = 'http://liaoning.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params= {
+        "a":"",
+        "b": "",
+        "d": "",
+        "c": "",
+        "e": "",
+        "h": "",
+        "k": "",
+        "n": "",
+        "t": "",
+        "x_large": "",
+        "x_small": "",
+        "y_large": "",
+        "y_small": "",
+        "z_large": ny,
+        "z_small": ny,
+        "strSelectID": "1754,1755,1756,1757,1758,1761,1764,1767,1777,1778,1779",
+        "i_columnid": "5883",
+        "field": "a:1:0,b:1:1,c:1:1,d:1:0,e:1:1,h:1:0,k:1:1,n:1:0,t:1:0,x:0:1,y:0:1,z:0:1",
+        "initKind": "FieldForm",
+        "type": "0,1,1,0,1,0,1,0,0,1,1,1",
+        "currpage":pg,
+        "currentplace": "",
+        "splitflag": "",
+        "fullpath": "0",
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    html = response.text
+    # print(html)
+    rpg = re.findall(r'共&nbsp;(.*?)&nbsp',html)[0]
+    # print(rpg)
+    # time.sleep(9)
+    selector = etree.HTML(html)
+    rsl = selector.xpath('//a[@class="xxxx"]/@href')
+    list1 = []
+    list2 = []
+    for i in rsl:
+        # print(i)
+        url1 = 'http://liaoning.chinatax.gov.cn' + i.replace('../..','')
+        print(url1)
+        utf = r_myco15.sismember('n13', url1)  ##更改
+        if not utf:
+            rsd = r1_d(url1, dt)
+            print(rsd)
+            list1.append(rsd)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n13')
+    if list1:
+        myco13.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco13_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n13', mis)  ##更改
+    return int(rpg)
+
+
+# http://liaoning.chinatax.gov.cn/col/col5883/index.html
+
+def runs(ny1,ny2):
+    if len(str(ny2)) == 1:
+        ny = str(ny1) + '0' + str(ny2)
+    else:
+        ny = str(ny1) + str(ny2)
+    pg = 1
+    dt = str(ny1) + '/' + str(ny2)
+    tpg = r1(ny,pg,dt)
+    for pg in range(1,tpg+1):
+        print(pg,'=====')
+        r1(ny,pg,dt)
+runs(2023,4)

+ 100 - 0
n14_hlj.py

@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco14,myco14_b,r_myco15
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'http://heilongjiang.chinatax.gov.cn/art/2021/4/10/art_6410_962.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr')
+    dict1 = {}
+    for i in a:
+
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            if k3:
+                dict1[k3] = v3
+    # print(dict1)
+    dict1['url'] = url
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+# r1_d()
+
+@retry(3)
+def r1(ny1,ny2):
+    # url = 'http://heilongjiang.chinatax.gov.cn/module/jslib/bulletin/ajaxdata.jsp?startrecord=1&endrecord=2&perpage=11'
+    url = 'http://heilongjiang.chinatax.gov.cn/module/jslib/bulletin/ajaxdata.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "searhvalue":ny2,
+        "searchkey": "jd",
+        "year": ny1,
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    hfs = re.findall("href='(.*?)'",html)
+    if ny1 == 1:
+        y1 = '2019'
+    elif ny1 == 2:
+        y1 = '2020'
+    elif ny1 == 3:
+        y1 = '2021'
+    dt = y1 + '/' +str(ny2+1)
+    list1 = []
+    list2 = []
+    for url1 in hfs:
+        print(url1)
+        utf = r_myco15.sismember('n14', url1)  ##更改
+        if not utf:
+            rsd = r1_d(url1, dt)
+            list1.append(rsd)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n14')
+    if list1:
+        myco14.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco14_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n14', mis)  ##更改
+    # if list1:
+    #     myco14.insert_many(list1)
+
+def runs(ny2):
+    ny1 = 3
+    ny3 = int(ny2) - 1
+    print('2023',ny2,'=========')
+    r1(ny1, ny2)
+
+runs(10)

+ 126 - 0
n15_jl.py

@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco15,r_myco15,myco15_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'http://jilin.chinatax.gov.cn/art/2021/3/3/art_19972_7390.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//table[@class="zdwf"]//tr')
+    dict1 = {}
+    for i in a:
+
+        k1 = i.xpath('td[1]/text()')[0]
+        v1 = i.xpath('td[2]/text()')
+        # print(k1,v1)
+        v3 = ''
+        for v2 in v1:
+            v3 += v2
+        k2 = k1.replace(' ', '').replace('\r', '').replace('\n', '').replace('\t', '')
+        v4 = v3.replace(' ','').replace('\r','').replace('\n','').replace('\t','')
+        if k2:
+            dict1[k2] = v4
+    dict1['url'] = url
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+# r1_d('1','2')
+
+@retry(3)
+def r1(ny, dt, pg):
+    url = 'http://jilin.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "field_1136_large":"",
+        "field_1136_small": "",
+        "field_1137_large": "",
+        "field_1137_small": "",
+        "field_1138_large": ny,
+        "field_1138_small": ny,
+        "field_1113": "",
+        "field_1114": "",
+        "field_1115": "",
+        "field_1116": "",
+        "field_1117": "",
+        "field_1120": "",
+        "field_1123": "",
+        "field_1126": "",
+        "strSelectID": "1113,1114,1115,1116,1117,1120,1123,1126,1136,1137,1138",
+        "i_columnid": "19972",
+        "field": "field_1113:1:1,field_1114:1:1,field_1115:1:1,field_1116:1:1,field_1117:1:1,field_1120:1:1,field_1123:1:1,field_1126:1:1,field_1136:0:1,field_1137:0:1,field_1138:0:1",
+        "initKind": "FieldForm",
+        "type": "1,1,1,1,1,1,1,1,1,1,1",
+        "currentplace": "",
+        "currpage": pg,
+        "splitflag": "",
+        "fullpath": "0",
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    # print(response.text)
+    html = response.text
+    tpg = re.findall('共(.*?)页',html)[0].replace('&nbsp;','')
+    selector = etree.HTML(html)
+    rsl = selector.xpath('//a[@class="xxxx"]/@href')
+    list1 = []
+    list2 = []
+    for i in rsl:
+        # print(i)
+        url1 = 'http://jilin.chinatax.gov.cn/' + i.replace('../..', '')
+        # print(url1)
+        utf = r_myco15.sismember('n15', url1)  ##更改
+        if not utf:
+            rsd = r1_d(url1, dt)
+            print(rsd)
+            list2.append(url1)
+            list1.append(rsd)
+        else:
+            print('已存在,>>>n15')
+    if list1:
+        myco15.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco15_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n15', mis)  ##更改
+    # if list1:
+    #     myco15.insert_many(list1)
+    return tpg
+
+# ny= '202001'
+# pg = '1'
+# dt = '0'
+def runs(ny1,ny2):
+    # for ny1 in range(2019,2021): #2021
+    #     for ny2 in range(1,13): # 1  4
+    if len(str(ny2)) ==1:
+        ny2 = '0' +str(ny2)
+    ny = str(ny1) + str(ny2)
+    dt = str(ny1) + '/' +str(ny2)
+    print(ny,dt,'======')
+    rpg = r1(ny, dt, pg=1)
+    if int(rpg) >1:
+        for pg1 in range(2,int(rpg)+1):
+            r1(ny, dt, pg1)
+
+
+ny1 = '2023'
+ny2 = '10'
+runs(ny1,ny2)

+ 115 - 0
n16_gs.py

@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco16,r_myco15,myco16_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'http://gansu.chinatax.gov.cn/art/2020/3/10/art_8350_65.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//table[@class="zdsc_con"]//tr')
+    dict1 = {}
+    for i in a:
+
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            dict1[k3] = v3
+    dict1['url'] = url
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+
+# r1_d()
+
+@retry(3)
+def r1(f86,f85):
+    url = 'http://gansu.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "field_849":"",
+        "field_850": "",
+        "field_857": "",
+        "field_868": "",
+        "field_867": "",
+        "field_860": "",
+        "field_851": "",
+        "field_852": "",
+        "field_855": f85,
+        "field_866": "",
+        "field_856": "",
+        "field_865": f86,
+        "strSelectID": "849,850,868,857,867,860,851,852,855,866,865,856",
+        "i_columnid": "8350",
+        "field": "field_849:1:1,field_850:1:1,field_851:1:1,field_852:1:1,field_857:1:1,field_860:1:1,field_867:1:1,field_868:1:1,field_855:1:1,field_866:1:1,field_865:1:1,field_856:1:1",
+        "initKind": "FieldForm",
+        "type": "1,1,1,1,1,1,1,1,1,1,1,1",
+        "currentplace": "",
+        "splitflag": "",
+        "fullpath": "0",
+        "currpage":"1",
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//a/@href')
+    list1 = []
+    list2 = []
+    for i in a:
+        if "art" in i:
+            # print(i)
+            url1 = 'http://gansu.chinatax.gov.cn' + i.replace('../..','')
+            # print(url1)
+            dt = str(f86) + '/' + str(f85)
+            utf = r_myco15.sismember('n16', url1)  ##更改
+            if not utf:
+                rsd = r1_d(url1, dt)
+                print(rsd)
+                list1.append(rsd)
+                list2.append(url1)
+            else:
+                print('已存在,>>>n16')
+    if list1:
+        myco16.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco16_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n16', mis)  ##更改
+    # if list1:
+        # print(list1)
+        # myco16.insert_many(list1)
+
+
+def runs():
+    ny1 = '2023' #年度
+    ny2 = '1'  #季度
+    r1(ny1,ny2)
+
+runs()

+ 110 - 0
n17_qh.py

@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco17,r_myco15,myco17_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+# http://qinghai.chinatax.gov.cn/web/zdsswfsxaj/zdaj.shtml
+
+@retry(3)
+def r1_d(url):
+    # url = 'http://qinghai.chinatax.gov.cn/web/2020nd/202007/e4856c576fa04e059eff6762dc47bf0c.shtml'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url, headers=headers, proxies=proxies)
+    # html = response.text
+    # print(html)
+    html = response.text
+    selector = etree.HTML(html)
+    dt = selector.xpath('//*[@id="page-newContent"]/div[2]/div/div[1]/div/div[1]/div/span[1]/text()')
+    dt1 = dt[0].replace('发布时间:','').replace('\r','').replace('\n','').replace(' ','').replace('-','/')
+    dt2 = dt1[:-5]
+    print(dt2)
+    a = selector.xpath('//tr')
+    dict1 = {}
+    for i in a:
+
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            dict1[k3] = v3
+    dict1['url'] = url
+    # dt=''
+    dict1['date'] = dt2
+    print(dict1)
+    return dict1
+
+# r1_d(url)
+
+
+@retry(3)
+def r1(ny,pg):
+    if pg ==1:
+        url = 'http://qinghai.chinatax.gov.cn/web/{}nd/iframe.shtml'.format(ny)
+    else:
+        url = 'http://qinghai.chinatax.gov.cn/web/{ny}nd/iframe_{pg}.shtml'.format(ny=ny,pg=pg)
+    # url = 'http://qinghai.chinatax.gov.cn/web/2021nd/iframe.shtml'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url, headers=headers, proxies=proxies)
+    html = response.text
+    # print(html)
+    if "404 Not Found" in html:
+        print('zz')
+        return 'zz'
+    selector = etree.HTML(html)
+    a = selector.xpath('//a/@href')
+    list1 = []
+    list2 = []
+    for i in a:
+        print(i)
+        url1 = 'http://qinghai.chinatax.gov.cn' + i
+        utf = r_myco15.sismember('n17', url1)  ##更改
+        if not utf:
+            rsd = r1_d(url1)
+            list1.append(rsd)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n17')
+    if list1:
+        myco17.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco17_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n17', mis)  ##更改
+    # if list1:
+    #     myco17.insert_many(list1)
+        # print('1')
+
+# r1(pg=1)
+def runs(ny):
+    for pg in range(1,100):
+        print(pg, '===========')
+        tf = r1(ny,pg)
+        if tf == "zz":
+            break
+
+runs(2021)
+for pg in range(54,55):
+    print(pg,'===========')
+    r1(2021,pg)

+ 112 - 0
n18_hn.py

@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco18,myco18_b,r_myco15
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'https://henan.chinatax.gov.cn/henanchinatax/xxgk/zdsswfsxaj/2021060109153715435/index.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    seletor = etree.HTML(html)
+    a = seletor.xpath('//table[@class="zhongdatable"]//th')
+    list1 = []
+    for i in a:
+        text = i.xpath('string(.)').strip()
+        t1 = text.replace(' ','').replace('\r','').replace('\t','').replace('\n','')
+        list1.append(t1)
+    b = seletor.xpath('//table[@class="zhongdatable"]//td')
+    list2 = []
+    for i in b:
+        text = i.xpath('string(.)').strip()
+        t2 = text.replace(' ','').replace('\r','').replace('\t','').replace('\n','')
+        list2.append(t2)
+    dict1 = {}
+    # print(list1)
+    # print(list2)
+    for i in range(len(a)):
+        # print(i)
+        k1 = list1[i]
+        v1 = list2[i]
+        # print(k1,v1)
+        dict1[k1] = v1
+    # print(dict1)
+    dict1['url'] = url
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+    # print(len(b))
+    # for i1 in b:
+    #     print(i1.replace(' ','').replace('\r','').replace('\t','').replace('\n',''))
+    # print(response.text)
+# r1_d()
+
+@retry(3)
+def r1(pg,dt):
+    url = 'https://henan.chinatax.gov.cn/eportal/ui?pageId=bdfef9dfa679454c86d68f2203a69e84&currentPage={}&moduleId=143e1aeaa3b6405ea0fe04142c021d5b&staticRequest=yes'.format(pg)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    # data = {
+    #     "filter_LIKE_EXT_STR15":dt
+    # }
+    data = {
+        "filter_LIKE_EXT_STR6":"",
+        "filter_LIKE_main.TITLE": "",
+        "filter_LIKE_EXT_STR2": "",
+        "filter_LIKE_EXT_STR4": "",
+        "filter_LIKE_EXT_STR3": "",
+        "filter_LIKE_EXT_STR8": "",
+        "filter_LIKE_EXT_STR19": "",
+        "filter_LIKE_EXT_STR10": "",
+        "filter_LIKE_EXT_STR23": "",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    # print(html)
+    seletor = etree.HTML(html)
+    a = seletor.xpath('//a[@istitle="true"]')
+    list1 = []
+    list2 = []
+    print(a)
+    for i in a:
+        # print(i.xpath('@title'))
+        url1 = 'https://henan.chinatax.gov.cn' + i.xpath('@href')[0]
+        print(url1)
+        utf = r_myco15.sismember('n18', url1)  ##更改
+        if not utf:
+            rsd = r1_d(url1, dt)
+            print(rsd)
+            list1.append(rsd)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n18')
+    if list1:
+        myco18.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco18_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n18', mis)  ##更改
+    # if list1:
+    #     myco18.insert_many(list1)
+
+
+def runs():
+    dt = '2023'
+    for pg in range(1,2):
+        print(pg,'========')
+        r1(pg,dt)
+runs()

+ 76 - 0
n19_hb.py

@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco19,r_myco15,myco19_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1(ny,pg):
+    url = 'https://etax.hubei.chinatax.gov.cn/webroot/gzcxAction.do'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "method":"zdsswfajcx",
+        "page": pg,
+        "limit": "15",
+        "nsrsbh": "",
+        "zzjgdm": "",
+        "nsrmc": "",
+        "fddbrmc": "",
+        "fddbrzjh": "",
+        "cwfzrmc": "",
+        "cwfzrzjh": "",
+        "nsrlx": "",
+        "ds": "",
+        "zcdz": "",
+        "ajxz": "",
+        "ssnd": parse.quote(ny),
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    # print(response.text)
+    rsd = response.json()
+    cot = rsd['count']
+    rpg = cot//15 + 2
+
+    rsl = rsd['data']
+    # if rsl:
+    #     myco19.insert_many(rsl)
+    list1 = []
+    list2 = []
+    for i in rsl:
+        url1 = i['LSH']
+        utf = r_myco15.sismember('n19', url1)  ##更改
+        if not utf:
+            list1.append(i)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n01')
+    if list1:
+        myco19.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco19_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n19', mis)  ##更改
+    # print(rpg)
+    return rpg
+
+def runs(ny1,ny2):
+    # for ny1 in range(2020,2021):
+    #     for ny2 in range(1,13):
+    ny = str(ny1)+'年'+str(ny2)+'月'
+    rpg = r1(ny,pg=1)
+    print(ny,'======')
+    if rpg >1:
+        for pg in range(2,rpg):
+            print(pg,'==')
+            rpg = r1(ny, pg)
+runs('2023','11')

+ 79 - 0
n20_hn.py

@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco20,r_myco15,myco20_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://hunan.chinatax.gov.cn/hardcasegetdatanew'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    type_value = str(ny1) + '0' + str(ny2)
+    dt = str(ny1)  + '/0' + str(ny2)
+    data = {
+        "type":"3",
+        "type_value": type_value,
+        "case_type": "1",
+        "page": pg,
+        "limit": "10",
+        "is_search": "0",
+        "taxpayerName": "",
+        "taxpayerNumber": "",
+        "organizationalCode": "",
+        "place": "",
+        "legalName": "",
+        "legalIdCard": "",
+        "financeName": "",
+        "financeIdCard": "",
+        "personName": "",
+        "personIdCard": "",
+        "_csrf": "fe7aeeb7-63a9-4770-9f35-84869a82d042",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    # print(response.text)
+    rsd = response.json()
+    rsl = rsd['data']
+    rpg = rsd['hardCasePage']['totalPages']
+    # print(rpg)
+    list1 = []
+    list2 = []
+    if rsl:
+        for i in rsl:
+            i['date'] = dt
+            print(i)
+            url1 = i['id']
+            utf = r_myco15.sismember('n20', url1)  ##更改
+            if not utf:
+                list1.append(i)
+                list2.append(url1)
+            else:
+                print('已存在,>>>n20')
+    if list1:
+        myco20.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco20_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n20', mis)  ##更改
+    # if list1:
+    #     myco20.insert_many(list1)
+    return int(rpg)
+
+
+def runs(ny1,ny2):
+    rpg = r1(ny1,ny2,pg=1)
+    if rpg>1:
+        for pg in range(2,rpg+1):
+            print(pg,'====')
+            r1(ny1,ny2,pg)
+
+runs('2022','11')

+ 95 - 0
n21_jx.py

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re,time
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco21,myco21_b,r_myco15
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r2(dt,uid):
+    url = 'http://jiangxi.chinatax.gov.cn/taxmap/front/getdetail.do'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
+    }
+    params = {
+        "iid":uid
+    }
+    response = r.get(url=url,params=params,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    a = selector.xpath('//table[@class="xxTable"]//tr')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('th/text()')
+        if k1:
+            str1 = ''
+            for kk1 in k1:
+                str1 += kk1
+            v1 =i.xpath('td/text()')
+            str2 = ''
+            if v1:
+                for vv1 in v1:
+                    str2 += vv1
+            dict1[str1] = str2
+    dict1['date'] = dt
+    dict1['uid'] = uid
+    # print(dict1)
+    return dict1
+
+# r2()
+
+@retry(3)
+def r1(pg):
+    dt = '2021/05'
+    url = 'http://jiangxi.chinatax.gov.cn/taxmap/front/result2.do'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
+    }
+    params = {
+        "region":"",
+        "nature": "",
+        "year": "",
+        "pageno": pg,
+        "_": int(round(time.time() * 1000)),
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    html = response.text
+    # print(html)
+    a = re.findall('getDetail\((.*?)\)',html)
+    list1 = []
+    list2 = []
+    for uid in a:
+        if uid != 'iid':
+            print(uid)
+            utf = r_myco15.sismember('n21', uid)  ##更改
+            if not utf:
+                rsd = r2(dt, uid)
+                list1.append(rsd)
+                list2.append(uid)
+            else:
+                print('已存在,>>>n21')
+    if list1:
+        myco21.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco21_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n21', mis)  ##更改
+
+
+# pg = '1'
+def runs():
+    for pg in range(1,3):
+        print(pg,'===============================')
+        r1(pg)
+
+runs()

+ 90 - 0
n22_yn.py

@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco22,r_myco15,myco22_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'https://yunnan.chinatax.gov.cn/art/2021/2/9/art_8101_588.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies,verify=False)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('td[1]/div/text()')
+        v1 = i.xpath('td[2]/div/text()')
+        # print(k1,v1)
+        k2 = ''
+        if k1:
+            for i1 in k1:
+                k2 += i1
+            if v1:
+                v2 = ''
+                for i2 in v1:
+                    v2+=i2
+            else:
+                v2 = ''
+            dict1[k2] = v2
+    dict1['url'] = url
+    dict1['date'] = dt
+    print(dict1)
+    return dict1
+
+# r1_d()
+
+@retry(3)
+def r1(ny1,ny2):
+    url = 'https://yunnan.chinatax.gov.cn/bulletin/ajaxdata.jsp?startrecord=1&endrecord=8&perpage=11&rowpage=1'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "searhvalue":"{}%E6%9C%88".format(ny2),
+        "searchkey": "jd1",
+        "year": "{}%E5%B9%B4%E5%BA%A6".format(ny1),
+    }
+    response = r.post(url=url,data=data,headers=headers,proxies=proxies,verify=False)
+    html = response.text
+    a = re.findall("href='(.*?)'", html)
+    list1 = []
+    list2 = []
+    for i in a:
+        print(i)
+        dt = str(ny1) + '/' + str(ny2)
+        utf = r_myco15.sismember('n22', i)  ##更改
+        if not utf:
+            rsl = r1_d(i, dt)
+            list1.append(rsl)
+            list2.append(i)
+        else:
+            print('已存在,>>>n22')
+    if list1:
+        myco22.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco22_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n22', mis)  ##更改
+    # if list1:
+    #     myco22.insert_many(list1)
+
+def runs(ny1,ny2):
+    print(ny1,ny2,'=========')
+    r1(ny1, ny2)
+runs('2023','5')

+ 116 - 0
n23_hn.py

@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco23,r_myco15,myco23_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(uid,dt):
+    url = 'http://hainan.chinatax.gov.cn/weifaCase/weifa_case_list.htm'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "id":uid
+    }
+    response = r.post(url=url, headers=headers, data=data,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//table[@class="div2-table3"]//tr')
+    dict1 = {}
+    for i in a:
+
+        k1 = i.xpath('th')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+            # print(k2)
+            v1 = i.xpath('td')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            dict1[k3] = v3
+    dict1['url'] = uid
+    # dt = ''
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+
+# r1_d('1')
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://hainan.chinatax.gov.cn/weifaCase/weifa_case_list.htm?pageNo={}'.format(pg)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    dt = str(ny1) + '/' + str(ny2)
+    data = {
+        "area":"",
+        "ajinformation": "",
+        "startDate": str(ny1) + '-' + str(ny2),
+        "month": "1",
+        "nsrname": "",
+        "nsridentify": "",
+        "regaddress": "",
+        "organization": "",
+        "legal": "",
+        "legalId": "",
+        "finance": "",
+        "financeId": "",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    # print(html)
+    tpg = re.findall(r'共<em>(.*?)</em>条',html)
+    # print(tpg)
+    if tpg:
+        rpg = int(tpg[0])
+    else:
+        rpg = 0
+    selector = etree.HTML(html)
+    a = selector.xpath('//input/@onclick')
+    list1 = []
+    list2 = []
+    for i in a:
+        uid = i.replace('weifaCaseDetail(','').replace(')','')
+        # print(uid)
+        utf = r_myco15.sismember('n23', uid)  ##更改
+        if not utf:
+            rsd = r1_d(uid, dt)
+            print(rsd)
+            list1.append(rsd)
+    if list1:
+        myco23.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco23_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n23', mis)  ##更改
+    return rpg
+    # if list1:
+    #     myco23.insert_many(list1)
+
+
+def runs():
+    ny1= '2023'
+    ny2 = '11'
+    # pg = 2
+    rpg = r1(ny1,ny2,pg=1)
+    tpg = rpg//15 +1
+    for pg in range(2,tpg):
+        r1(ny1,ny2,pg)
+
+runs()

+ 68 - 0
n24_sx.py

@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco24,r_myco15,myco24_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    d2 = str(ny2)
+    if len(d2) == 1:
+        d3 = '0' + str(d2)
+    else:
+        d3 = str(ny2)
+    url = 'http://shanxi.chinatax.gov.cn/common/extQuery?sqlid=web_zdsswf&limit=10&cx_lx=0&cx_xsrq={ny1}-{ny2}&page={pg}'.format(ny1=ny1,ny2=d3,pg=pg)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "start":"0",
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    # print(response.text)
+    rsd = response.json()
+    # print(rsd)
+    rsl = rsd['message']['list']
+    rpg = rsd['message']['totalPage']
+    list1 = []
+    list2 = []
+    for i in rsl:
+        url1 = i['ajbh']
+        utf = r_myco15.sismember('n24', url1)  ##更改
+        if not utf:
+            list1.append(i)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n24')
+    if list1:
+        myco24.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco24_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n24', mis)  ##更改
+    return int(rpg)
+    # if rsl:
+    #     print('1')
+    #     myco24.insert_many(rsl)
+
+
+# ny1 = 2020
+# ny2 = 12
+def runs(ny1,ny2):
+    print(ny1,ny2,'---------')
+    rpg = r1(ny1,ny2,pg=1)
+    for pg in range(1,rpg+1):
+        print(pg,'==================')
+        r1(ny1,ny2,pg)
+
+ny1 = 2023
+ny2 = 12
+runs(ny1,ny2)

+ 125 - 0
n25_sx.py

@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco25,r_myco15,myco25_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'http://shaanxi.chinatax.gov.cn/art/2021/4/15/art_15616_7502.html'
+    headers = {
+        "User-Agent": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; .NET CLR 2.0.50727; SLCC2; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729)",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//table[@class="zdsc_con"]//tr')
+    dict1 = {}
+    for i in a:
+
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            dict1[k3] = v3
+    dict1['url'] = url
+    # dt = ''
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+
+# r1_d()
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://shaanxi.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; .NET CLR 2.0.50727; SLCC2; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729)",
+    }
+    params = {
+        "field_2166": "",
+        "field_1656": "",
+        "field_1652": "",
+        "field_1663": "",
+        "field_1653": "",
+        "field_2390": "",
+        "field_2213": "",
+        "field_2391": "",
+        "field_2410": ny2,
+        "field_1670": "",
+        "field_1672": ny1,
+        "currpage": pg,
+        "field_1651": "",
+        "strSelectID": "style_2166,1656,1663,1652,1653,2390,2213,2391,1651,1672,2410,1670",
+        "i_columnid": "style_3",
+        "field": "field_2166:1:0,field_2213:1:0,field_1656:1:0,field_2391:1:0,field_2410:12:0,field_1651:12:0,field_1652:1:0,field_2390:1:0,field_1672:12:0,field_1670:12:0,field_1653:1:0,field_1663:1:0",
+        "initKind": "FieldFormMetadata",
+        "type": "0,0,0,0,0,0,0,0,0,0,0,0",
+        "currentplace": "",
+        "splitflag": "",
+        "fullpath": "0",
+    }
+    response = r.get(url=url, headers=headers, params=params,proxies=proxies)
+    html = response.text
+    # print(html)
+    rpg = re.findall("<font color='red'>(.*?)</font>",html)[0]
+    # print(rpg)
+    selector = etree.HTML(html)
+    a = selector.xpath('//li//a/@href')
+    list1 = []
+    list2 = []
+    for i in a:
+        # print(i)
+        url1 = 'http://shaanxi.chinatax.gov.cn' + i.replace('../..','')
+        dt = str(ny1) + '/' + str(ny2+1)
+        utf = r_myco15.sismember('n25', url1)  ##更改
+        if not utf:
+            rsd = r1_d(url1, dt)
+            print(rsd)
+            list1.append(rsd)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n25')
+    if list1:
+        myco25.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco25_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n25', mis)  ##更改
+    # if list1:
+    #     myco25.insert_many(list1)
+    return int(rpg)
+
+def runs(ny1,ny2):
+    # for ny1 in range(2021,2022):
+    #     for ny2 in range(1,2):
+    #         print(ny1,ny2,'========')
+    ny3 = int(ny2) - 1
+    rpg = r1(ny1,ny3,pg=1)
+    # print(rpg,'----------------')
+    tpg = rpg//20
+    if tpg >1:
+        for pg in range(2,tpg +2):
+            print(pg,'=======')
+            r1(ny1,ny3,pg)
+
+
+runs(2023,11)

+ 67 - 0
n26_gz.py

@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco26,r_myco15,myco26_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://guizhou.chinatax.gov.cn/import/taxApi'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "pageNum": pg,
+        "pageSize": 10,
+        "siteId": 502424,
+        "months": "{}月".format(ny2),
+        "years": "{}年".format(ny1),
+        "isPage": True
+    }
+    response = r.post(url=url,json=data,headers=headers,proxies=proxies)
+    # print(response.text)
+    rsd = response.json()
+    rsl = rsd['data']['list']
+    tpg = rsd['data']['total']
+    rpg = tpg//10
+    # print(rpg)
+    # if rsl:
+    #     myco26.insert_many(rsl)
+    list1 = []
+    list2 = []
+    for i in rsl:
+        # print(i)
+        url1 = i['docpuburl']
+        print(url1)
+        utf = r_myco15.sismember('n26', url1)  ##更改
+        if not utf:
+            list1.append(i)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n26')
+    if list1:
+        myco26.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco26_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n26', mis)  ##更改
+    return rpg
+
+def runs(ny1,ny2):
+    # for ny1 in range(2021,2022):
+    #     for ny2 in range(1,4):
+    print(ny1,ny2,'=========')
+    rpg = r1(ny1,ny2,pg=1)
+    if rpg > 0:
+        for pg in range(2,rpg+1):
+            r1(ny1, ny2,pg)
+                # pass
+runs(2023,4)

+ 82 - 0
n27_nmg.py

@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco27,r_myco15,myco27_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+def r1_d(url,dt):
+    # url = 'http://neimenggu.chinatax.gov.cn/nsfw/sscx/zdaj/hlbeszdwfaj/202106/t20210609_751387.html'
+    headers = {
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+        }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//table//tr')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            if k3:
+                dict1[k3] = v3
+    dict1['url'] = url
+    dict1['date'] = dt
+    print(dict1)
+    return dict1
+
+# r1_d('1')
+
+
+@retry(3)
+def r1():
+    url = 'http://neimenggu.chinatax.gov.cn/nsfw/sscx/zdaj/sj/2023/'  ##查看时间
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    a = re.findall('href="(.*?)"',html)
+    for i in a:
+        # print(i)
+        if "html" in i:
+            url1 = 'http://neimenggu.chinatax.gov.cn/nsfw/sscx/zdaj' + i.replace('../..','')
+            print(url1)
+            u1 = url1.split('/')
+            u2 = u1[-1].split('_')[0]
+            # print(u2)
+            dt = u2[1:5] + '/' + u2[5:7] + '/' + u2[7:9]
+            print(dt)
+            utf = r_myco15.sismember('n27', url1)  ##更改
+            if not utf:
+                rsd = r1_d(url1, dt)
+                myco27.insert_one(rsd)
+                myco27_b.insert_one(rsd)
+                r_myco15.sadd('n27', url1)
+                print('存入主备库,>>>n27')
+            else:
+                print('已存在,>>>n27')
+            # time.sleep(10)
+
+r1()

+ 136 - 0
n28_nx.py

@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco28,r_myco15,myco28_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(url,dt):
+    # url = 'http://ningxia.chinatax.gov.cn/art/2021/3/3/art_14329_8626.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//table[@class="color"]//tr')
+    dict1 = {}
+    for i in a:
+        k1 = i.xpath('td[1]')
+        if k1:
+            k2 = k1[0].xpath('string(.)').strip()
+
+            k3 = k2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+            # print(k2)
+            v1 = i.xpath('td[2]')
+            if v1:
+                v2 = v1[0].xpath('string(.)').strip()
+                v3 = v2.replace('\r', '').replace('\t', '').replace('\n', '').replace(' ', '')
+                # print(v3)
+            else:
+                v3 = ''
+            dict1[k3] = v3
+    dict1['url'] = url
+    dict1['date'] = dt
+    # print(dict1)
+    return dict1
+# r1_d('1')
+
+@retry(3)
+def r1(ny,pg):
+    url = 'http://ningxia.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "vc_name":"",
+        "field_147": "",
+        "field_149": "",
+        "field_148": "",
+        "field_150": "",
+        "field_151": "",
+        "strSelectID": "104,147,148,149,150,151",
+        "i_columnid": ny,   #202103
+        "currpage":pg,
+        "field": "field_148:1,field_149:1,vc_name:1,field_147:1,field_150:1,field_151:1",
+        "initKind": "FieldForm",
+        "currentplace": "",
+        "splitflag": "",
+        "fullpath": "0",
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    html = response.text
+    # print(html)
+    rpg = re.findall(r'共&nbsp;(.*?)&nbsp;页',html)
+    # print(rpg)
+    selector = etree.HTML(html)
+    a = selector.xpath('//a/@href')
+    list1 = []
+    list2 = []
+    for i in a:
+        # print(i)
+        if "art" in i:
+            url1 = 'http://ningxia.chinatax.gov.cn' + i.replace('../..','')
+            print(url1)
+            dt1 = i.split('/')
+            # print(dt1)
+            dt=dt1[3] + '/' + dt1[4] + '/' +dt1[5]
+            print(dt)
+            utf = r_myco15.sismember('n28', url1)  ##更改
+            if not utf:
+                rsd = r1_d(url1, dt)
+                print(rsd)
+                list1.append(rsd)
+                list2.append(url1)
+            else:
+                print('已存在,>>>n28')
+    if list1:
+        myco28.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco28_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n28', mis)  ##更改
+    return rpg[0]
+    # if list1:
+    #     myco28.insert_many(list1)
+
+def get_pg(ny):
+    url = 'http://ningxia.chinatax.gov.cn/col/col14330/index.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url, headers=headers, proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    # print(html)
+    selector = etree.HTML(html)
+    ### niandu3对应2021年
+    a = selector.xpath('//dl[@id="niandu3"]//dt[@class="open"]//a')
+    for i in a:
+        yf = i.xpath('text()')[0]
+        if yf == '{}月'.format(ny):
+            href = i.xpath('@href')[0]
+            print(href)
+            h1 = re.findall(r'col/col(.*?)/i',href)
+            return h1[0]
+
+# yf = get_pg(3)
+# print(yf)
+def runs(ny):
+    yf = get_pg(ny)
+    rpg = r1(yf, pg=1)
+    for pg in range(2,int(rpg)+1):
+        print(pg,'=============')
+        r1(ny, pg)
+
+runs(1)

+ 157 - 0
n29_xz.py

@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco29,r_myco15,myco29_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1_d(cmpname,url):
+    # url = 'https://xizang.chinatax.gov.cn/art/2019/6/26/art_2371_382.html'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies)
+    response.encoding = 'UTF-8'
+    html = response.text
+    if "附件下载" in html:
+        return {}
+    if "市局频道" in html:
+        return {}
+    selector = etree.HTML(html)
+    dts = selector.xpath('//div[@class="main"]//div[@class="main_content"]//span/text()')
+    dt = ''
+    for i in dts:
+        if '发布时间' in i:
+            i1 = i.split(' ')[0]
+            dt = i2 = i1.split(':')[1].replace('-','/')
+    print(dt)
+    dict1 = {}
+    dict1['纳税人名称'] = cmpname
+    result = selector.xpath('//div[@id="zoom"]')
+    result1 = result[0].xpath('string(.)').strip()
+    # print(result1)
+    reu1 = result1.split('注册地址:')
+    # print(reu1[1])
+    dict1['注册地址'] = reu1[1]
+    reu2 = reu1[0].split('主要违法事实:')
+    # print(reu2[1])
+    dict1['主要违法事实'] =reu2[1]
+    reu3 = reu2[0].split('违法案件性质:')
+    # print(reu3[1])
+    dict1['违法案件性质'] =reu3[1]
+    reu4 = reu3[0].split('组织机构代码:')
+    # print(reu4[1])
+    dict1['组织机构代码'] =reu4[1]
+    reu5 = reu4[0].split('法人信息:')
+    # print(reu5[1])
+    dict1['法人信息'] =reu5[1]
+    reu6 = reu5[0].split('纳税人识别号:')
+    # print(reu6[1])
+    dict1['纳税人识别号'] =reu6[1]
+    dict1['url'] = url
+    dict1['date'] = dt
+    print(dict1)
+    return dict1
+# r1_d('')
+
+@retry(3)
+def r1(pg):
+    url = 'https://xizang.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "field":"vc_name:1,field_406:1,field_407:1,field_408:1",
+        "i_columnid": "style_63",
+        "vc_name": "",
+        "field_406": "",
+        "field_407": "",
+        "field_408": "",
+        "currpage": pg,
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//td//a/@href')
+    list1 = []
+    list2 = []
+    for i in a:
+        # print(i)
+        if "art" in i:
+            url1 = "https://xizang.chinatax.gov.cn" + i.replace('../..','')
+            print(url1)
+            utf = r_myco15.sismember('n29', url1)  ##更改
+            if not utf:
+                rsd = r1_d(url1)
+                if rsd:
+                    list1.append(rsd)
+                    list2.append(url1)
+            else:
+                print('已存在,>>>n29')
+    # if list1:
+    #     myco29.insert_many(list1)
+    #     print('已存入原始库')
+    # if list2:
+    #     myco29_b.insert_many(list1)
+    #     print('已存入备份原始库')
+    #     for mis in list2:
+    #         r_myco15.sadd('n29', mis)  ##更改
+    # if list1:
+    #     myco29.insert_many(list1)
+
+@retry(3)
+def r2(pg):
+    url = 'https://xizang.chinatax.gov.cn/module/search/index.jsp'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "field":"vc_name:1,field_406:1,field_407:1,field_408:1",
+        "i_columnid": "style_63",
+        "vc_name": "",
+        "field_406": "",
+        "field_407": "",
+        "field_408": "",
+        "currpage": pg,
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    html = response.text
+    selector = etree.HTML(html)
+    a = selector.xpath('//tr[@class="form-list"]')
+    list1 = []
+    list2 = []
+    for i in a:
+        cmpname = i.xpath('td[2]/text()')[0]
+        # print(cmpname)
+        urlz = i.xpath('td[4]//a/@href')[0]
+        # print(urlz)
+        url1 = "https://xizang.chinatax.gov.cn" + urlz.replace('../..', '')
+        utf = r_myco15.sismember('n29', url1)  ##更改
+        if not utf:
+            rsd = r1_d(cmpname,url1)
+            if rsd:
+                list1.append(rsd)
+                list2.append(url1)
+        else:
+            print('已存在,>>>n29')
+    if list1:
+        myco29.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco29_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n29', mis)  ##更改
+
+def runs():
+    for pg in range(1,4):
+        print(pg,'================')
+        r2(pg)
+
+runs()

+ 95 - 0
n30_xj.py

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json,time
+import random,string
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from pymongo import MongoClient
+from a_cfg_dg_zsq import tail_call_optimized
+myclients = MongoClient("mongodb://127.0.0.1:27017/")
+myco_jb1 = myclients['shuiwu_ml']['xinjiang']
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+from mongo_cho import myco30
+r = requests.session()
+r.keep_alive = False
+def retry(times, exceptions=None):
+    exceptions = exceptions if exceptions is not None else Exception
+    def wrapper(func):
+        def wrapper(*args, **kwargs):
+            last_exception = None
+            for _ in range(times):
+                try:
+                    return func(*args, **kwargs)
+                except exceptions as e:
+                    last_exception = e
+            raise  last_exception
+        return wrapper
+    return wrapper
+
+@tail_call_optimized
+@retry(3)
+def r1(name):
+    etm = int(time.time() * 1000)
+    url = 'https://etax.xinjiang.chinatax.gov.cn/yhs-web/api/yhsyzm/get?{}'.format(etm)
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    response = r.get(url=url,headers=headers,proxies=proxies,verify=False)
+    time.sleep(1)
+    num = string.ascii_letters + string.digits
+    yzm = "".join(random.sample(num, 4))
+    # code_img = response.content
+    # with open('{}.png'.format('1'), 'wb') as fp:
+    #     fp.write(code_img)
+    # print(code_img)
+    # a = input()
+    # print(a)
+    data = {
+        "yzm":yzm,
+        "nsrmc": name,
+        "pageSize": 10,
+        "sswfrlx": "00",
+        "pageIndex": 1
+    }
+    url1 = 'https://etax.xinjiang.chinatax.gov.cn/yhs-web/api/zdwfaj/ajlbcx'
+    response1 = r.post(url=url1,headers=headers,json=data,proxies=proxies,verify=False)
+    print(response1.json())
+    # print(response1.text)
+    rsd = response1.json()
+    rst = rsd['value']['result']
+    list1 = []
+    for i in rst:
+        pid = i['id']
+        etm1 = int(time.time() * 1000)
+        params = {
+            "id":pid,
+            "sswfrlx": "00",
+            "timestamp": etm1,
+        }
+        url2 = 'https://etax.xinjiang.chinatax.gov.cn/yhs-web/api/zdwfaj/ajmxcx'
+        response2= r.get(url=url2,headers=headers,params=params,proxies=proxies,verify=False)
+        rsd1 = response2.json()
+        rsd2 = rsd1['value']
+        list1.append(rsd2)
+        # print(response2.text)
+        time.sleep(1)
+    print(list1)
+    if list1:
+        myco30.insert_many(list1)
+
+# r1(name)
+
+def runs():
+    a = myco_jb1.find().skip(275171+34665+4542)
+    num = 0
+    for i in a:
+        num +=1
+
+        name = i['name']
+        print(num,name )
+        r1(name)
+
+runs()

+ 75 - 0
n31_tj.py

@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import re
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco31,r_myco15,myco31_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://tianjin.chinatax.gov.cn/wzcx/sjcx_cxqyxx.action'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    data = {
+        "szsf":"11200000000",
+        "nfjd": str(ny1) + '0' + str(ny2),
+        "page":pg,
+        "pageCount":"15"
+    }
+    response = r.post(url=url,headers=headers,data=data,proxies=proxies)
+    html = response.text
+    # print(html)
+    html1 = html.replace(' ','').replace('\r','').replace('\t','').replace('\n','')
+    rpg1 = re.findall(r"pageCount='(.*?)'", html1)[0]
+    # print(rpg1)
+    selector = etree.HTML(html)
+    a = selector.xpath('//span[@class="mxxx"]')
+    dt = str(ny1) + '/0' + str(ny2)
+    list1 = []
+    list2 = []
+    for i in a:
+        dict1 = {}
+        nsrm = i.xpath('@data-nsrmc')
+        dict1['纳税人名称'] = nsrm[0]
+        nsrsbh = i.xpath('@data-nsrsbh')
+        dict1['纳税人识别号'] =nsrsbh[0]
+        zzjgdm = i.xpath('@data-zzjgdm')
+        dict1['组织机构代码'] =zzjgdm[0]
+        zcjydz = i.xpath('@data-zcjydz')
+        dict1['注册地址'] =zcjydz[0]
+        fddbrxm = i.xpath('@data-fddbrxm')
+        dict1['姓名'] =fddbrxm[0]
+        fddbrxb = i.xpath('@data-fddbrxb')
+        dict1['性别'] =fddbrxb[0]
+        fddbrzjmc = i.xpath('@data-fddbrzjmc')
+        dict1['证件名称'] =fddbrzjmc[0]
+        fddbrzjhm = i.xpath('@data-fddbrzjhm')
+        dict1['证件号码'] =fddbrzjhm[0]
+        ajlxmc = i.xpath('@data-ajlxmc')
+        dict1['案件性质'] =ajlxmc[0]
+        zywfss = i.xpath('@data-zywfss')
+        dict1['主要违法事实'] =zywfss[0]
+        clqk = i.xpath('@data-clqk')
+        dict1['相关法律依据及税务处理处罚情况'] =clqk[0]
+        dict1['date'] = dt
+        print(dict1)
+        list1.append(dict1)
+    # return int(rpg1)
+    if list1:
+        myco31.insert_many(list1)
+
+
+# http://tianjin.chinatax.gov.cn/wzcx/cx_zdwfaj.action?szsf=11200000000
+# 此数据无法去重,遂单独更新,查看页数,季度
+def runs():
+    for pg in range(1,2):
+        print(pg,'==========')
+        r1(2023,5,pg)  ##中间2为季度,每次更新前务必加1季度
+r1()

+ 69 - 0
n33_sh.py

@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+import requests,json
+from setting import proxies
+from urllib import parse
+from lxml import etree
+from mongo_cho import myco33,r_myco15,myco33_b
+from rety import retry
+r = requests.session()
+r.keep_alive = False
+
+@retry(3)
+def r1(ny1,ny2,pg):
+    url = 'http://shanghai.chinatax.gov.cn/newxbwz/tycx/TYCXzdsswfajgblCtrl-getxxsByTj.pfv'
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36",
+    }
+    params = {
+        "nd":str(ny1)+str(ny2),
+        "qjswjgdm": "",
+        "curPage":pg,
+        "time": "Tue Jun 08 2021 08:11:49 GMT 0800 (中国标准时间)",
+    }
+    response = r.get(url=url,headers=headers,params=params,proxies=proxies)
+    # print(response.text)
+    rsd =response.json()
+    rpg = rsd['pageCount']
+    rsl = rsd['pageData']
+    list1 = []
+    list2 = []
+    for i in rsl:
+        i['date'] = ny1 + '/' + ny2
+        i.pop('toChar(t2.ajDm)')
+        print(i)
+        url1 = i['djxh']
+        utf = r_myco15.sismember('n33', url1)  ##更改
+        if not utf:
+            list1.append(i)
+            list2.append(url1)
+        else:
+            print('已存在,>>>n33')
+    if list1:
+        myco33.insert_many(list1)
+        print('已存入原始库')
+    if list2:
+        myco33_b.insert_many(list1)
+        print('已存入备份原始库')
+        for mis in list2:
+            r_myco15.sadd('n33', mis)  ##更改
+    return int(rpg)
+
+# l1=['2019','2020']
+# l2 = ['01','02','03','04','05','06','07','08','09','10','11','12']
+
+def runs(ny1,ny2):
+    ny11 = str(ny1)
+    ny22 = str(ny2)
+    if len(ny22) ==1:
+        ny33 = '0' + ny22
+    else:
+        ny33 = ny22
+    rpg = r1(ny11,ny33,pg=1)
+    print(ny11,ny33,'===')
+    if rpg >1:
+        for pg in range(2,rpg+1):
+            print(pg,'===')
+            r1(ny11,ny33,pg)
+runs(2020,1)

+ 16 - 0
rety.py

@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# coding:utf-8
+
+def retry(times, exceptions=None):
+    exceptions = exceptions if exceptions is not None else Exception
+    def wrapper(func):
+        def wrapper(*args, **kwargs):
+            last_exception = None
+            for _ in range(times):
+                try:
+                    return func(*args, **kwargs)
+                except exceptions as e:
+                    last_exception = e
+            raise  last_exception
+        return wrapper
+    return wrapper

+ 23 - 0
setting.py

@@ -0,0 +1,23 @@
+proxyHost = "proxy.abuyun.com"
+proxyPort = "9020"
+# proxyUser = "H313YMK8WI70863D"
+# proxyPass = "5F1F50A0850928F6"
+# proxyUser = "H51O63883U62IA5D"
+# proxyPass = "619355632CFFC0AC"
+# proxyUser = "H9D44J5P1I18321D"
+# proxyPass = "C9E60A74404A7A9F"
+proxyUser = "H4ZX1CL3L0535Y5D"
+proxyPass = "6C1BEE51BA5C341C"
+proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
+    "host": proxyHost,
+    "port": proxyPort,
+    "user": proxyUser,
+    "pass": proxyPass,
+}
+proxies = {
+    "http": proxyMeta,
+    "https": proxyMeta,
+}
+
+# a = 1//10
+# print(a)

+ 0 - 0
shuiwulei.py