123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449 |
- #!/usr/bin/env python
- # coding:utf-8
- import csv
- from pymongo import MongoClient
- myclient = MongoClient("mongodb://127.0.0.1:27017/")
- mycoup = myclient['shuiwu06']
- mycohz = myclient['shuiwu_hz']['base_date']
- mycopp = myclient['shuiwu_210709']['base_data']
- def w1(dbn):
- result = mycoup[dbn].find()
- list1 = []
- for i in result:
- print(i)
- # ***********纳税人名称**********************************************#
- if dbn in ['08_fj']:
- n1 = i['S1']
- elif dbn in ['11_gx']:
- n1 = i['NSR_NAME']
- elif dbn in ['12_hb','19_hb']:
- n1 = i['NSRMC']
- elif dbn in ['20_hn']:
- n1 = i['taxpayerName']
- elif dbn in ['24_sx','26_gz','30_xj','33_sh']:
- n1 = i['nsrmc']
- else:
- n1 = i['纳税人名称']
- #************所属地区*****************************************#
- if dbn in ['01_ah','07_zj','22_yn']:
- n2 = i['所属地区']
- elif dbn in ['08_fj']:
- n2 = i['S20']
- elif dbn in ['11_gx']:
- n2 = i['ADDR']
- elif dbn in ['19_hb']:
- n2 = i['SHENGF'] + '/' + i['DIS']
- elif dbn in ['20_hn']:
- n2 = i['siteName']
- elif dbn in ['24_sx']:
- n2 = i['dqmc']
- elif dbn in ['26_gz']:
- n2 = i['area']
- else:
- n2 = ''
- #*************纳税人识别号或社会信用代码******************************#
- if dbn in ['01_ah','04_sz','06_js','10_gd','16_gs','17_qh','21_jx','22_yn','25_sx','29_xz','31_tj']:
- n3 = str(i['纳税人识别号'])
- elif dbn in ['02_nb','03_bj','05_sc','07_zj','09_sd','13_ln','14_hlj','15_jl','18_hn','28_nx']:
- n3 = str(i['纳税人识别号或社会信用代码'])
- elif dbn in ['27_nmg']:
- n3 = str(i['纳税人识别号或社会信用代码']).split('\xa0')[0]
- elif dbn in ['08_fj']:
- n3 = str(i['S2'])
- elif dbn in ['11_gx']:
- n3 = str(i['NSR_ID'])
- elif dbn in ['12_hb','19_hb']:
- n3 = str(i['NSRSBH'])
- elif dbn in ['20_hn']:
- n3 = str(i['taxpayerNumber'])
- elif dbn in ['23_hn']:
- n3 = str(i['统一社会信用代码(纳税人识别号)'])
- elif dbn in ['26_gz','30_xj']:
- n3 = str(i['nsrsbh'])
- elif dbn in ['33_sh']:
- n3 = str(i['nsrsbm'])
- elif dbn in ['24_sx']:
- try:
- n3 = str(i['nsrsbh'])
- except:
- n3 = ''
- #****************组织机构代码************************************#
- if dbn in ['08_fj']:
- n4 = str(i['S3'])
- elif dbn in ['11_gx']:
- n4 = ''
- elif dbn in ['12_hb']:
- n4 = str(i['ZZJGDM'])
- elif dbn in ['19_hb']:
- n4 = str(i['ZZJG'])
- elif dbn in ['20_hn']:
- n4 = str(i['organizationalCode'])
- elif dbn in ['26_gz','30_xj','33_sh']:
- try:
- n4 = str(i['zzjgdm'])
- except:
- n4 = ''
- elif dbn in ['24_sx']:
- try:
- n4 = str(i['zzjgdm'])
- except:
- n4 = ''
- else:
- n4 = str(i['组织机构代码'])
- #***************注册地址************************************#
- if dbn in ['08_fj']:
- n5 = i['S4']
- elif dbn in ['11_gx']:
- n5 = i['REG_ADDR']
- elif dbn in ['12_hb','19_hb']:
- n5 = i['ZCDZ']
- elif dbn in ['20_hn']:
- n5 = i['place']
- elif dbn in ['24_sx','26_gz','30_xj','33_sh']:
- n5 = i['zcdz']
- else:
- n5 = i['注册地址']
- #***********法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)******#
- if dbn in ['01_ah','07_zj','22_yn']:
- # 法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)
- n6 = i['法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)']
- elif dbn in ['02_nb','03_bj','05_sc','13_ln','15_jl','18_hn','28_nx']:
- n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)']
- elif dbn in ['25_sx',]:
- n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码']
- elif dbn in ['27_nmg',]:
- n6 = i['法定代表人或者负责人姓名、性别、及身份证号码(或者其他证件号码)']
- elif dbn in ['29_xz',]:
- n6 = i['法人信息']
- elif dbn in ['14_hlj',]:
- n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或者其他证件号码)']
- elif dbn in ['04_sz']:
- n6 = i['法定代表人或负责人姓名、性别、证件名称及号码']
- elif dbn in ['10_gd','16_gs','17_qh','21_jx']:
- n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码']
- elif dbn in ['06_js']:
- n6 = i['法定代表人姓名、性别及身份证号码']
- elif dbn in ['09_sd']:
- n6 = i['法定代表人或者负责人姓名'] + '。' + i['性别']
- elif dbn in ['08_fj']:
- n6 = i['S5'] + '。' + i['S6'] + '。' + i['S7']
- elif dbn in ['12_hb']:
- n6 = i['FDDBR'] + '。' + i['FDDBSEX'] + '。' + i['FDDBZJ']+ '。' + i['FDDBZJH']
- elif dbn in ['11_gx']:
- n6 = i['FDDBR_NAME'] + '。' + i['FDDBR_SEX'] + '。' + i['FDDBR_SFZHM']
- elif dbn in ['19_hb']:
- n6 = i['FDDBR']
- elif dbn in ['23_hn',]:
- n6 = i['法定代表人、负责人或者经法院判决确定的实际责任人的姓名、性别、证件名称及号码']
- elif dbn in ['30_xj']:
- n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrzjlx']+ '。' + i['fddbrzjhm']
- elif dbn in ['31_tj']:
- n6 = i['姓名'] + '。' + i['性别'] + '。' + i['证件名称'] + '。' + i['证件号码']
- elif dbn in ['26_gz']:
- n6 = i['fddbrhzfzrxm'] + '。' + i['fddbrhzfzrxb'] + '。' + i['fddbrhzfzrzjmc']+ '。' + i['fddbrhzfzrzjhm']
- elif dbn in ['33_sh']:
- n6 = i['frdbmc'] + '。' + i['rowno'] + '。' + i['frdbxb']+ '。' + i['frdbsfz']
- elif dbn in ['24_sx']:
- try:
- n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrsfzhm']
- except:
- n6 = ''
- elif dbn in ['20_hn']:
- n6 = ''
- if i['legalName']:
- n6 += i['legalName']
- if i['legalSex']:
- if i['legalSex'] == 1:
- fgt = '男'
- else:
- fgt = '女'
- n6 += fgt
- if i['legalIdCard']:
- n6 += i['legalIdCard']
- #************违法期间法人代表或者负责人姓名、性别、证件名称及号码*************#
- if dbn in ['01_ah','02_nb','03_bj','05_sc','07_zj','22_yn','28_nx']:
- # 违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码)
- n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码)']
- elif dbn in ['18_hn']:
- n7 = i['违法期间法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)']
- elif dbn in ['14_hlj']:
- n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或者其他证件号码)']
- elif dbn in ['10_gd']:
- n7 = i['违法期间法人代表或者负责人姓名、性别、证件名称及号码']
- elif dbn in ['13_ln','15_jl']:
- n7 = i['违法期间法人代表姓名及身份证号码']
- elif dbn in ['25_sx']:
- n7 = i['违法期间法人代表或者负责人姓名性别及身份证号码(或其他证件号码)']
- elif dbn in ['27_nmg']:
- n7 = i['违法期间法定代表人或责任人姓名、性别、身份证号码(或者其他证件号码)']
- elif dbn in ['08_fj']:
- n7 = i['S8'] + '。' + i['S9'] + '。' + i['S10']
- elif dbn in ['30_xj']:
- try:
- n7 = i['cwfzrxm'] + '。' + i['cwfzrxb'] + '。' + i['cwfzrzjlx']+ '。' + i['cwfzrzjhm']
- except:
- n7 = ''
- elif dbn in ['20_hn']:
- n7 = ''
- if i['legalNameDuring']:
- n7 += i['legalNameDuring']
- if i['legalSexDuring']:
- if i['legalSexDuring'] == 1:
- fgt = '男'
- else:
- fgt = '女'
- n7 += fgt
- if i['legalIdCardDuring']:
- n7 += i['legalIdCardDuring']
- else:
- n7 = ''
- #*************负有直接责任的财务负责人姓名、性别、证件名称及号码*********#
- if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg']:
- #负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码)
- n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或其他证件号码)']
- elif dbn in ['16_gs','25_sx']:
- n8 = i['负有直接责任的财务负责人姓名性别及身份证号码(或其他证件号码)']
- elif dbn in ['14_hlj']:
- n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或者其他证件号码)']
- elif dbn in ['28_nx']:
- n8 = i['负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码)']
- elif dbn in ['04_sz','06_js','10_gd','17_qh','21_jx']:
- n8 = i['负有直接责任的财务负责人姓名、性别、证件名称及号码']
- elif dbn in ['08_fj']:
- n8 = i['S11'] + '。' + i['S12'] + '。' + i['S13']
- elif dbn in ['23_hn']:
- n8 = i['经法院裁判确定的负有直接责任的财务人员、团伙成员的姓名、性别、证件名称及号码']
- elif dbn in ['20_hn']:
- n8 = ''
- if i['financeName']:
- n8 += i['financeName']
- if i['financeSex']:
- if i['financeSex'] == 1:
- fgt = '男'
- else:
- fgt = '女'
- n8 += fgt
- if i['financeIdCard']:
- n8 += i['financeIdCard']
- else:
- n8 = ''
- #***********实际负责人姓名、性别及身份证号码(或其他证件号码)************#
- if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg','28_nx']:
- #实际负责人姓名、性别及身份证号码(或其他证件号码)
- n9 = i['实际负责人姓名、性别及身份证号码(或其他证件号码)']
- elif dbn in ['16_gs','25_sx']:
- n9 = i['实际负责人姓名性别及身份证号码(或其他证件号码)']
- elif dbn in ['06_js']:
- n9 = i['经法院裁判确定的实际责任人姓名、性别、证件名称及号码']
- elif dbn in ['14_hlj']:
- n9 = i['实际负责人姓名、性别及身份证号码(或者其他证件号码)']
- elif dbn in ['20_hn']:
- n9 = ''
- if i['principalName']:
- n9 += i['principalName']
- if i['principalSex']:
- if i['principalSex'] == 1:
- fgt = '男'
- else:
- fgt = '女'
- n9 += fgt
- if i['principalIdCard']:
- n9 += i['principalIdCard']
- # n9 = i['principalName'] + '。' + i['principalSex'] + '。' + i['principalIdCard']
- else:
- n9 = ''
- #*************负有直接责任的中介机构信息****************************#
- if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','14_hlj','15_jl','18_hn','22_yn','27_nmg','28_nx']:
- n10 = i['负有直接责任的中介机构信息']
- elif dbn in ['04_sz','06_js','10_gd','16_gs','17_qh','21_jx','23_hn','25_sx']:
- n10 = i['负有直接责任的中介机构信息及其从业人员信息']
- elif dbn in ['20_hn']:
- n10 = i['agency']
- else:
- n10 = ''
- #*************案件性质*********************************#
- if dbn in ['08_fj']:
- n11 = i['S18']
- elif dbn in ['09_sd']:
- n11 = ''
- elif dbn in ['11_gx','12_hb','19_hb']:
- n11 = i['AJXZ']
- elif dbn in ['20_hn']:
- n11 = i['hardCaseType']['typeName']
- elif dbn in ['26_gz','30_xj']:
- n11 = i['ajxz']
- elif dbn in ['29_xz']:
- n11 = i['违法案件性质']
- elif dbn in ['33_sh']:
- n11 = i['ajMc']
- elif dbn in ['24_sx']:
- try:
- n11 = i['ajxzmc']
- except:
- n11 = ''
- else:
- n11 = i['案件性质']
- #***********主要违法事实相关法律依据及税务处理处罚情况**********************#
- if dbn in ['01_ah','02_nb','05_sc','07_zj','13_ln','14_hlj','15_jl','16_gs','18_hn','22_yn','25_sx','27_nmg']:
- n12 = i['主要违法事实相关法律依据及税务处理处罚情况']
- elif dbn in ['03_bj','29_xz']:
- n12 = i['主要违法事实']
- elif dbn in ['06_js']:
- n12 = i['主要违法事实、相关法律依据及税务处理处罚情况']
- elif dbn in ['04_sz','10_gd','17_qh','28_nx','31_tj']:
- n12 = i['主要违法事实'] + '。' +i['相关法律依据及税务处理处罚情况']
- elif dbn in ['12_hb']:
- n12 = i['ZYWFSS'] + '。' +i['FLYJ']
- elif dbn in ['30_xj']:
- n12 = i['wfss'] + '。' +i['swclcfqk']
- elif dbn in ['09_sd']:
- n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理处罚情况 ']
- elif dbn in ['08_fj']:
- n12 = i['S19']
- elif dbn in ['11_gx']:
- n12 = i['ZYWFSS']
- elif dbn in ['19_hb']:
- n12 = i['WFSS']
- elif dbn in ['33_sh']:
- n12 = i['wfss']
- elif dbn in ['20_hn']:
- n12 = i['content']
- elif dbn in ['21_jx']:
- n12 = i['主要违法事实直接法律依据及税务处理处罚情况']
- elif dbn in ['24_sx']:
- n12 = i['zywfss'] + '。' + i['flyj_cljg']
- elif dbn in ['26_gz']:
- n12 = i['zywfss'] + '。' + i['xgflyjjswclcfqk']
- elif dbn in ['23_hn']:
- n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理、税务行政处罚等情况']
- #**************************************************************#
- if dbn in ['08_fj']:
- n13 = i['docreltime'].replace('-','/')
- elif dbn in ['11_gx']:
- n13 = i['DOCPUBTIME'].split(' ')[0].replace('.','/')
- elif dbn in ['12_hb']:
- n13 = i['GBRQ'].split(' ')[0]
- elif dbn in ['19_hb']:
- n13 = i['GBRQ'].split(' ')[0].replace('年','/').replace('月','')
- elif dbn in ['24_sx']:
- n13 = i['xsrq'].split(' ')[0].replace('-','/')
- elif dbn in ['26_gz']:
- n13 = i['CrTime'].split(' ')[0].replace('-','/')
- elif dbn in ['30_xj']:
- n13 = ''
- else:
- n13 = i['date']
- n14 = dbn
- list1.append([n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14])
- return list1
- # 01_ah 02_nb
- # dbn = '33_sh'
- def drcav():
- l1 = ['01_ah', '02_nb', '03_bj', '04_sz', '05_sc', '06_js', '07_zj', '08_fj', '09_sd', '10_gd', '11_gx', '12_hb',
- '13_ln', '14_hlj', '15_jl', '16_gs', '17_qh', '18_hn', '19_hb', '20_hn', '21_jx', '22_yn', '23_hn', '24_sx'
- , '25_sx', '26_gz', '27_nmg', '28_nx', '29_xz', '30_xj', '31_tj', '33_sh']
- f = open('minglu.csv','w',encoding='utf-8',newline='')
- csv_writer = csv.writer(f)
- for dbn in l1:
- list1 = w1(dbn)
- for i1 in list1:
- csv_writer.writerow(i1)
- f.close()
- def r1():
- with open('minglu.csv', 'r', encoding='utf-8') as f:
- csv_writer = csv.reader(f)
- num = 0
- for i in csv_writer:
- num +=1
- print(num)
- nsrmc = i[0]
- ssdq =i[1]
- nsrsbh= i[2]
- zzjgdm=i[3]
- zcdz= i[4]
- fddbr=i[5]
- wffddbr= i[6]
- zjfzcwr= i[7]
- sjfzr= i[8]
- zjzrzj= i[9]
- ajxz=i[10]
- zywf= i[11]
- date=i[12]
- soure=i[13]
- dict1 = {
- "nsrmc":nsrmc,
- "ssdq":ssdq,
- "nsrsbh":nsrsbh,
- "zzjgdm":zzjgdm,
- "zcdz":zcdz,
- "fddbr":fddbr,
- "wffddbr":wffddbr,
- "zjfzcwr":zjfzcwr,
- "sjfzr":sjfzr,
- "zjzrzj":zjzrzj,
- "ajxz":ajxz,
- "zywf":zywf,
- "date": date,
- "source": soure,
- }
- # print(dict1)
- # break
- mycohz.insert_one(dict1)
- # r1()
- #
- def ser_m(name):
- a = mycohz.find({"nsrmc":name})
- for i in a:
- print(i)
- i.pop('_id')
- mycopp.insert_one(i)
- def w_test():
- f = open('test.csv', 'w', encoding='utf-8', newline='')
- csv_writer = csv.writer(f)
- i1= ['1','2']
- csv_writer.writerow(i1)
- f.close()
- # w_test()
- import xlrd
- #打开excel
- def r2():
- wb = xlrd.open_workbook('税务违法企业匹配名单.xlsx')
- #按工作簿定位工作表
- sh = wb.sheet_by_name('Sheet1')
- # print(sh.nrows)#有效数据行数
- # print(sh.ncols)#有效数据列数
- # print(sh.cell(0,0).value)#输出第一行第一列的值
- # print(sh.row_values(0))#输出第一行的所有值
- # #将数据和标题组合成字典
- # print(dict(zip(sh.row_values(0),sh.row_values(1))))
- #遍历excel,打印所有数据
- num = 0
- for i in range(sh.nrows):
- num +=1
- print(num)
- name = sh.row_values(i)[0]
- ser_m(name)
- # print(name)
- # r2()
- # drcav() #将库内历史数据导入csv,用来备份上传至另一个mongo --写入minglu.csv
- # r1() #将上一步csv导入mongo --写入shuiwu_hz mongoDB
- # 查询前添加mongo索引 db.***.createIndex({})
- # r2() #将xlsx文件查询,数据写入 mongodb
- # 下命令导出csv
- # mongoexport.exe -h 127.0.0.1 --port 27017 -d shuiwu_210709 -c base_data --csv -f nsrmc,ssdq,nsrsbh,zzjgdm,zcdz,fddbr,wffddbr,zjfzcwr,sjfzr,zjzrzj,ajxz,zywf,date,source -o shuiwu.csv
|