#!/usr/bin/env python # coding:utf-8 import csv from pymongo import MongoClient myclient = MongoClient("mongodb://127.0.0.1:27017/") mycoup = myclient['shuiwu06'] mycohz = myclient['shuiwu_hz']['base_date'] mycopp = myclient['shuiwu_210709']['base_data'] def w1(dbn): result = mycoup[dbn].find() list1 = [] for i in result: print(i) # ***********纳税人名称**********************************************# if dbn in ['08_fj']: n1 = i['S1'] elif dbn in ['11_gx']: n1 = i['NSR_NAME'] elif dbn in ['12_hb','19_hb']: n1 = i['NSRMC'] elif dbn in ['20_hn']: n1 = i['taxpayerName'] elif dbn in ['24_sx','26_gz','30_xj','33_sh']: n1 = i['nsrmc'] else: n1 = i['纳税人名称'] #************所属地区*****************************************# if dbn in ['01_ah','07_zj','22_yn']: n2 = i['所属地区'] elif dbn in ['08_fj']: n2 = i['S20'] elif dbn in ['11_gx']: n2 = i['ADDR'] elif dbn in ['19_hb']: n2 = i['SHENGF'] + '/' + i['DIS'] elif dbn in ['20_hn']: n2 = i['siteName'] elif dbn in ['24_sx']: n2 = i['dqmc'] elif dbn in ['26_gz']: n2 = i['area'] else: n2 = '' #*************纳税人识别号或社会信用代码******************************# if dbn in ['01_ah','04_sz','06_js','10_gd','16_gs','17_qh','21_jx','22_yn','25_sx','29_xz','31_tj']: n3 = str(i['纳税人识别号']) elif dbn in ['02_nb','03_bj','05_sc','07_zj','09_sd','13_ln','14_hlj','15_jl','18_hn','28_nx']: n3 = str(i['纳税人识别号或社会信用代码']) elif dbn in ['27_nmg']: n3 = str(i['纳税人识别号或社会信用代码']).split('\xa0')[0] elif dbn in ['08_fj']: n3 = str(i['S2']) elif dbn in ['11_gx']: n3 = str(i['NSR_ID']) elif dbn in ['12_hb','19_hb']: n3 = str(i['NSRSBH']) elif dbn in ['20_hn']: n3 = str(i['taxpayerNumber']) elif dbn in ['23_hn']: n3 = str(i['统一社会信用代码(纳税人识别号)']) elif dbn in ['26_gz','30_xj']: n3 = str(i['nsrsbh']) elif dbn in ['33_sh']: n3 = str(i['nsrsbm']) elif dbn in ['24_sx']: try: n3 = str(i['nsrsbh']) except: n3 = '' #****************组织机构代码************************************# if dbn in ['08_fj']: n4 = str(i['S3']) elif dbn in ['11_gx']: n4 = '' elif dbn in ['12_hb']: n4 = str(i['ZZJGDM']) elif dbn in ['19_hb']: n4 = str(i['ZZJG']) elif dbn in ['20_hn']: n4 = str(i['organizationalCode']) elif dbn in ['26_gz','30_xj','33_sh']: try: n4 = str(i['zzjgdm']) except: n4 = '' elif dbn in ['24_sx']: try: n4 = str(i['zzjgdm']) except: n4 = '' else: n4 = str(i['组织机构代码']) #***************注册地址************************************# if dbn in ['08_fj']: n5 = i['S4'] elif dbn in ['11_gx']: n5 = i['REG_ADDR'] elif dbn in ['12_hb','19_hb']: n5 = i['ZCDZ'] elif dbn in ['20_hn']: n5 = i['place'] elif dbn in ['24_sx','26_gz','30_xj','33_sh']: n5 = i['zcdz'] else: n5 = i['注册地址'] #***********法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)******# if dbn in ['01_ah','07_zj','22_yn']: # 法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码) n6 = i['法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)'] elif dbn in ['02_nb','03_bj','05_sc','13_ln','15_jl','18_hn','28_nx']: n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)'] elif dbn in ['25_sx',]: n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码'] elif dbn in ['27_nmg',]: n6 = i['法定代表人或者负责人姓名、性别、及身份证号码(或者其他证件号码)'] elif dbn in ['29_xz',]: n6 = i['法人信息'] elif dbn in ['14_hlj',]: n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或者其他证件号码)'] elif dbn in ['04_sz']: n6 = i['法定代表人或负责人姓名、性别、证件名称及号码'] elif dbn in ['10_gd','16_gs','17_qh','21_jx']: n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码'] elif dbn in ['06_js']: n6 = i['法定代表人姓名、性别及身份证号码'] elif dbn in ['09_sd']: n6 = i['法定代表人或者负责人姓名'] + '。' + i['性别'] elif dbn in ['08_fj']: n6 = i['S5'] + '。' + i['S6'] + '。' + i['S7'] elif dbn in ['12_hb']: n6 = i['FDDBR'] + '。' + i['FDDBSEX'] + '。' + i['FDDBZJ']+ '。' + i['FDDBZJH'] elif dbn in ['11_gx']: n6 = i['FDDBR_NAME'] + '。' + i['FDDBR_SEX'] + '。' + i['FDDBR_SFZHM'] elif dbn in ['19_hb']: n6 = i['FDDBR'] elif dbn in ['23_hn',]: n6 = i['法定代表人、负责人或者经法院判决确定的实际责任人的姓名、性别、证件名称及号码'] elif dbn in ['30_xj']: n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrzjlx']+ '。' + i['fddbrzjhm'] elif dbn in ['31_tj']: n6 = i['姓名'] + '。' + i['性别'] + '。' + i['证件名称'] + '。' + i['证件号码'] elif dbn in ['26_gz']: n6 = i['fddbrhzfzrxm'] + '。' + i['fddbrhzfzrxb'] + '。' + i['fddbrhzfzrzjmc']+ '。' + i['fddbrhzfzrzjhm'] elif dbn in ['33_sh']: n6 = i['frdbmc'] + '。' + i['rowno'] + '。' + i['frdbxb']+ '。' + i['frdbsfz'] elif dbn in ['24_sx']: try: n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrsfzhm'] except: n6 = '' elif dbn in ['20_hn']: n6 = '' if i['legalName']: n6 += i['legalName'] if i['legalSex']: if i['legalSex'] == 1: fgt = '男' else: fgt = '女' n6 += fgt if i['legalIdCard']: n6 += i['legalIdCard'] #************违法期间法人代表或者负责人姓名、性别、证件名称及号码*************# if dbn in ['01_ah','02_nb','03_bj','05_sc','07_zj','22_yn','28_nx']: # 违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码) n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码)'] elif dbn in ['18_hn']: n7 = i['违法期间法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)'] elif dbn in ['14_hlj']: n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或者其他证件号码)'] elif dbn in ['10_gd']: n7 = i['违法期间法人代表或者负责人姓名、性别、证件名称及号码'] elif dbn in ['13_ln','15_jl']: n7 = i['违法期间法人代表姓名及身份证号码'] elif dbn in ['25_sx']: n7 = i['违法期间法人代表或者负责人姓名性别及身份证号码(或其他证件号码)'] elif dbn in ['27_nmg']: n7 = i['违法期间法定代表人或责任人姓名、性别、身份证号码(或者其他证件号码)'] elif dbn in ['08_fj']: n7 = i['S8'] + '。' + i['S9'] + '。' + i['S10'] elif dbn in ['30_xj']: try: n7 = i['cwfzrxm'] + '。' + i['cwfzrxb'] + '。' + i['cwfzrzjlx']+ '。' + i['cwfzrzjhm'] except: n7 = '' elif dbn in ['20_hn']: n7 = '' if i['legalNameDuring']: n7 += i['legalNameDuring'] if i['legalSexDuring']: if i['legalSexDuring'] == 1: fgt = '男' else: fgt = '女' n7 += fgt if i['legalIdCardDuring']: n7 += i['legalIdCardDuring'] else: n7 = '' #*************负有直接责任的财务负责人姓名、性别、证件名称及号码*********# if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg']: #负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码) n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或其他证件号码)'] elif dbn in ['16_gs','25_sx']: n8 = i['负有直接责任的财务负责人姓名性别及身份证号码(或其他证件号码)'] elif dbn in ['14_hlj']: n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或者其他证件号码)'] elif dbn in ['28_nx']: n8 = i['负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码)'] elif dbn in ['04_sz','06_js','10_gd','17_qh','21_jx']: n8 = i['负有直接责任的财务负责人姓名、性别、证件名称及号码'] elif dbn in ['08_fj']: n8 = i['S11'] + '。' + i['S12'] + '。' + i['S13'] elif dbn in ['23_hn']: n8 = i['经法院裁判确定的负有直接责任的财务人员、团伙成员的姓名、性别、证件名称及号码'] elif dbn in ['20_hn']: n8 = '' if i['financeName']: n8 += i['financeName'] if i['financeSex']: if i['financeSex'] == 1: fgt = '男' else: fgt = '女' n8 += fgt if i['financeIdCard']: n8 += i['financeIdCard'] else: n8 = '' #***********实际负责人姓名、性别及身份证号码(或其他证件号码)************# if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg','28_nx']: #实际负责人姓名、性别及身份证号码(或其他证件号码) n9 = i['实际负责人姓名、性别及身份证号码(或其他证件号码)'] elif dbn in ['16_gs','25_sx']: n9 = i['实际负责人姓名性别及身份证号码(或其他证件号码)'] elif dbn in ['06_js']: n9 = i['经法院裁判确定的实际责任人姓名、性别、证件名称及号码'] elif dbn in ['14_hlj']: n9 = i['实际负责人姓名、性别及身份证号码(或者其他证件号码)'] elif dbn in ['20_hn']: n9 = '' if i['principalName']: n9 += i['principalName'] if i['principalSex']: if i['principalSex'] == 1: fgt = '男' else: fgt = '女' n9 += fgt if i['principalIdCard']: n9 += i['principalIdCard'] # n9 = i['principalName'] + '。' + i['principalSex'] + '。' + i['principalIdCard'] else: n9 = '' #*************负有直接责任的中介机构信息****************************# if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','14_hlj','15_jl','18_hn','22_yn','27_nmg','28_nx']: n10 = i['负有直接责任的中介机构信息'] elif dbn in ['04_sz','06_js','10_gd','16_gs','17_qh','21_jx','23_hn','25_sx']: n10 = i['负有直接责任的中介机构信息及其从业人员信息'] elif dbn in ['20_hn']: n10 = i['agency'] else: n10 = '' #*************案件性质*********************************# if dbn in ['08_fj']: n11 = i['S18'] elif dbn in ['09_sd']: n11 = '' elif dbn in ['11_gx','12_hb','19_hb']: n11 = i['AJXZ'] elif dbn in ['20_hn']: n11 = i['hardCaseType']['typeName'] elif dbn in ['26_gz','30_xj']: n11 = i['ajxz'] elif dbn in ['29_xz']: n11 = i['违法案件性质'] elif dbn in ['33_sh']: n11 = i['ajMc'] elif dbn in ['24_sx']: try: n11 = i['ajxzmc'] except: n11 = '' else: n11 = i['案件性质'] #***********主要违法事实相关法律依据及税务处理处罚情况**********************# if dbn in ['01_ah','02_nb','05_sc','07_zj','13_ln','14_hlj','15_jl','16_gs','18_hn','22_yn','25_sx','27_nmg']: n12 = i['主要违法事实相关法律依据及税务处理处罚情况'] elif dbn in ['03_bj','29_xz']: n12 = i['主要违法事实'] elif dbn in ['06_js']: n12 = i['主要违法事实、相关法律依据及税务处理处罚情况'] elif dbn in ['04_sz','10_gd','17_qh','28_nx','31_tj']: n12 = i['主要违法事实'] + '。' +i['相关法律依据及税务处理处罚情况'] elif dbn in ['12_hb']: n12 = i['ZYWFSS'] + '。' +i['FLYJ'] elif dbn in ['30_xj']: n12 = i['wfss'] + '。' +i['swclcfqk'] elif dbn in ['09_sd']: n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理处罚情况 '] elif dbn in ['08_fj']: n12 = i['S19'] elif dbn in ['11_gx']: n12 = i['ZYWFSS'] elif dbn in ['19_hb']: n12 = i['WFSS'] elif dbn in ['33_sh']: n12 = i['wfss'] elif dbn in ['20_hn']: n12 = i['content'] elif dbn in ['21_jx']: n12 = i['主要违法事实直接法律依据及税务处理处罚情况'] elif dbn in ['24_sx']: n12 = i['zywfss'] + '。' + i['flyj_cljg'] elif dbn in ['26_gz']: n12 = i['zywfss'] + '。' + i['xgflyjjswclcfqk'] elif dbn in ['23_hn']: n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理、税务行政处罚等情况'] #**************************************************************# if dbn in ['08_fj']: n13 = i['docreltime'].replace('-','/') elif dbn in ['11_gx']: n13 = i['DOCPUBTIME'].split(' ')[0].replace('.','/') elif dbn in ['12_hb']: n13 = i['GBRQ'].split(' ')[0] elif dbn in ['19_hb']: n13 = i['GBRQ'].split(' ')[0].replace('年','/').replace('月','') elif dbn in ['24_sx']: n13 = i['xsrq'].split(' ')[0].replace('-','/') elif dbn in ['26_gz']: n13 = i['CrTime'].split(' ')[0].replace('-','/') elif dbn in ['30_xj']: n13 = '' else: n13 = i['date'] n14 = dbn list1.append([n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14]) return list1 # 01_ah 02_nb # dbn = '33_sh' def drcav(): l1 = ['01_ah', '02_nb', '03_bj', '04_sz', '05_sc', '06_js', '07_zj', '08_fj', '09_sd', '10_gd', '11_gx', '12_hb', '13_ln', '14_hlj', '15_jl', '16_gs', '17_qh', '18_hn', '19_hb', '20_hn', '21_jx', '22_yn', '23_hn', '24_sx' , '25_sx', '26_gz', '27_nmg', '28_nx', '29_xz', '30_xj', '31_tj', '33_sh'] f = open('minglu.csv','w',encoding='utf-8',newline='') csv_writer = csv.writer(f) for dbn in l1: list1 = w1(dbn) for i1 in list1: csv_writer.writerow(i1) f.close() def r1(): with open('minglu.csv', 'r', encoding='utf-8') as f: csv_writer = csv.reader(f) num = 0 for i in csv_writer: num +=1 print(num) nsrmc = i[0] ssdq =i[1] nsrsbh= i[2] zzjgdm=i[3] zcdz= i[4] fddbr=i[5] wffddbr= i[6] zjfzcwr= i[7] sjfzr= i[8] zjzrzj= i[9] ajxz=i[10] zywf= i[11] date=i[12] soure=i[13] dict1 = { "nsrmc":nsrmc, "ssdq":ssdq, "nsrsbh":nsrsbh, "zzjgdm":zzjgdm, "zcdz":zcdz, "fddbr":fddbr, "wffddbr":wffddbr, "zjfzcwr":zjfzcwr, "sjfzr":sjfzr, "zjzrzj":zjzrzj, "ajxz":ajxz, "zywf":zywf, "date": date, "source": soure, } # print(dict1) # break mycohz.insert_one(dict1) # r1() # def ser_m(name): a = mycohz.find({"nsrmc":name}) for i in a: print(i) i.pop('_id') mycopp.insert_one(i) def w_test(): f = open('test.csv', 'w', encoding='utf-8', newline='') csv_writer = csv.writer(f) i1= ['1','2'] csv_writer.writerow(i1) f.close() # w_test() import xlrd #打开excel def r2(): wb = xlrd.open_workbook('税务违法企业匹配名单.xlsx') #按工作簿定位工作表 sh = wb.sheet_by_name('Sheet1') # print(sh.nrows)#有效数据行数 # print(sh.ncols)#有效数据列数 # print(sh.cell(0,0).value)#输出第一行第一列的值 # print(sh.row_values(0))#输出第一行的所有值 # #将数据和标题组合成字典 # print(dict(zip(sh.row_values(0),sh.row_values(1)))) #遍历excel,打印所有数据 num = 0 for i in range(sh.nrows): num +=1 print(num) name = sh.row_values(i)[0] ser_m(name) # print(name) # r2() # drcav() #将库内历史数据导入csv,用来备份上传至另一个mongo --写入minglu.csv # r1() #将上一步csv导入mongo --写入shuiwu_hz mongoDB # 查询前添加mongo索引 db.***.createIndex({}) # r2() #将xlsx文件查询,数据写入 mongodb # 下命令导出csv # mongoexport.exe -h 127.0.0.1 --port 27017 -d shuiwu_210709 -c base_data --csv -f nsrmc,ssdq,nsrsbh,zzjgdm,zcdz,fddbr,wffddbr,zjfzcwr,sjfzr,zjzrzj,ajxz,zywf,date,source -o shuiwu.csv