daocsv.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. #!/usr/bin/env python
  2. # coding:utf-8
  3. import csv
  4. from pymongo import MongoClient
  5. myclient = MongoClient("mongodb://127.0.0.1:27017/")
  6. mycoup = myclient['shuiwu06']
  7. mycohz = myclient['shuiwu_hz']['base_date']
  8. mycopp = myclient['shuiwu_210709']['base_data']
  9. def w1(dbn):
  10. result = mycoup[dbn].find()
  11. list1 = []
  12. for i in result:
  13. print(i)
  14. # ***********纳税人名称**********************************************#
  15. if dbn in ['08_fj']:
  16. n1 = i['S1']
  17. elif dbn in ['11_gx']:
  18. n1 = i['NSR_NAME']
  19. elif dbn in ['12_hb','19_hb']:
  20. n1 = i['NSRMC']
  21. elif dbn in ['20_hn']:
  22. n1 = i['taxpayerName']
  23. elif dbn in ['24_sx','26_gz','30_xj','33_sh']:
  24. n1 = i['nsrmc']
  25. else:
  26. n1 = i['纳税人名称']
  27. #************所属地区*****************************************#
  28. if dbn in ['01_ah','07_zj','22_yn']:
  29. n2 = i['所属地区']
  30. elif dbn in ['08_fj']:
  31. n2 = i['S20']
  32. elif dbn in ['11_gx']:
  33. n2 = i['ADDR']
  34. elif dbn in ['19_hb']:
  35. n2 = i['SHENGF'] + '/' + i['DIS']
  36. elif dbn in ['20_hn']:
  37. n2 = i['siteName']
  38. elif dbn in ['24_sx']:
  39. n2 = i['dqmc']
  40. elif dbn in ['26_gz']:
  41. n2 = i['area']
  42. else:
  43. n2 = ''
  44. #*************纳税人识别号或社会信用代码******************************#
  45. if dbn in ['01_ah','04_sz','06_js','10_gd','16_gs','17_qh','21_jx','22_yn','25_sx','29_xz','31_tj']:
  46. n3 = str(i['纳税人识别号'])
  47. elif dbn in ['02_nb','03_bj','05_sc','07_zj','09_sd','13_ln','14_hlj','15_jl','18_hn','28_nx']:
  48. n3 = str(i['纳税人识别号或社会信用代码'])
  49. elif dbn in ['27_nmg']:
  50. n3 = str(i['纳税人识别号或社会信用代码']).split('\xa0')[0]
  51. elif dbn in ['08_fj']:
  52. n3 = str(i['S2'])
  53. elif dbn in ['11_gx']:
  54. n3 = str(i['NSR_ID'])
  55. elif dbn in ['12_hb','19_hb']:
  56. n3 = str(i['NSRSBH'])
  57. elif dbn in ['20_hn']:
  58. n3 = str(i['taxpayerNumber'])
  59. elif dbn in ['23_hn']:
  60. n3 = str(i['统一社会信用代码(纳税人识别号)'])
  61. elif dbn in ['26_gz','30_xj']:
  62. n3 = str(i['nsrsbh'])
  63. elif dbn in ['33_sh']:
  64. n3 = str(i['nsrsbm'])
  65. elif dbn in ['24_sx']:
  66. try:
  67. n3 = str(i['nsrsbh'])
  68. except:
  69. n3 = ''
  70. #****************组织机构代码************************************#
  71. if dbn in ['08_fj']:
  72. n4 = str(i['S3'])
  73. elif dbn in ['11_gx']:
  74. n4 = ''
  75. elif dbn in ['12_hb']:
  76. n4 = str(i['ZZJGDM'])
  77. elif dbn in ['19_hb']:
  78. n4 = str(i['ZZJG'])
  79. elif dbn in ['20_hn']:
  80. n4 = str(i['organizationalCode'])
  81. elif dbn in ['26_gz','30_xj','33_sh']:
  82. try:
  83. n4 = str(i['zzjgdm'])
  84. except:
  85. n4 = ''
  86. elif dbn in ['24_sx']:
  87. try:
  88. n4 = str(i['zzjgdm'])
  89. except:
  90. n4 = ''
  91. else:
  92. n4 = str(i['组织机构代码'])
  93. #***************注册地址************************************#
  94. if dbn in ['08_fj']:
  95. n5 = i['S4']
  96. elif dbn in ['11_gx']:
  97. n5 = i['REG_ADDR']
  98. elif dbn in ['12_hb','19_hb']:
  99. n5 = i['ZCDZ']
  100. elif dbn in ['20_hn']:
  101. n5 = i['place']
  102. elif dbn in ['24_sx','26_gz','30_xj','33_sh']:
  103. n5 = i['zcdz']
  104. else:
  105. n5 = i['注册地址']
  106. #***********法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)******#
  107. if dbn in ['01_ah','07_zj','22_yn']:
  108. # 法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)
  109. n6 = i['法定代表人或负责人姓名、性别及身份证号码(或其他证件号码)']
  110. elif dbn in ['02_nb','03_bj','05_sc','13_ln','15_jl','18_hn','28_nx']:
  111. n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)']
  112. elif dbn in ['25_sx',]:
  113. n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码']
  114. elif dbn in ['27_nmg',]:
  115. n6 = i['法定代表人或者负责人姓名、性别、及身份证号码(或者其他证件号码)']
  116. elif dbn in ['29_xz',]:
  117. n6 = i['法人信息']
  118. elif dbn in ['14_hlj',]:
  119. n6 = i['法定代表人或者负责人姓名、性别及身份证号码(或者其他证件号码)']
  120. elif dbn in ['04_sz']:
  121. n6 = i['法定代表人或负责人姓名、性别、证件名称及号码']
  122. elif dbn in ['10_gd','16_gs','17_qh','21_jx']:
  123. n6 = i['法定代表人或者负责人姓名、性别、证件名称及号码']
  124. elif dbn in ['06_js']:
  125. n6 = i['法定代表人姓名、性别及身份证号码']
  126. elif dbn in ['09_sd']:
  127. n6 = i['法定代表人或者负责人姓名'] + '。' + i['性别']
  128. elif dbn in ['08_fj']:
  129. n6 = i['S5'] + '。' + i['S6'] + '。' + i['S7']
  130. elif dbn in ['12_hb']:
  131. n6 = i['FDDBR'] + '。' + i['FDDBSEX'] + '。' + i['FDDBZJ']+ '。' + i['FDDBZJH']
  132. elif dbn in ['11_gx']:
  133. n6 = i['FDDBR_NAME'] + '。' + i['FDDBR_SEX'] + '。' + i['FDDBR_SFZHM']
  134. elif dbn in ['19_hb']:
  135. n6 = i['FDDBR']
  136. elif dbn in ['23_hn',]:
  137. n6 = i['法定代表人、负责人或者经法院判决确定的实际责任人的姓名、性别、证件名称及号码']
  138. elif dbn in ['30_xj']:
  139. n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrzjlx']+ '。' + i['fddbrzjhm']
  140. elif dbn in ['31_tj']:
  141. n6 = i['姓名'] + '。' + i['性别'] + '。' + i['证件名称'] + '。' + i['证件号码']
  142. elif dbn in ['26_gz']:
  143. n6 = i['fddbrhzfzrxm'] + '。' + i['fddbrhzfzrxb'] + '。' + i['fddbrhzfzrzjmc']+ '。' + i['fddbrhzfzrzjhm']
  144. elif dbn in ['33_sh']:
  145. n6 = i['frdbmc'] + '。' + i['rowno'] + '。' + i['frdbxb']+ '。' + i['frdbsfz']
  146. elif dbn in ['24_sx']:
  147. try:
  148. n6 = i['fddbrxm'] + '。' + i['fddbrxb'] + '。' + i['fddbrsfzhm']
  149. except:
  150. n6 = ''
  151. elif dbn in ['20_hn']:
  152. n6 = ''
  153. if i['legalName']:
  154. n6 += i['legalName']
  155. if i['legalSex']:
  156. if i['legalSex'] == 1:
  157. fgt = '男'
  158. else:
  159. fgt = '女'
  160. n6 += fgt
  161. if i['legalIdCard']:
  162. n6 += i['legalIdCard']
  163. #************违法期间法人代表或者负责人姓名、性别、证件名称及号码*************#
  164. if dbn in ['01_ah','02_nb','03_bj','05_sc','07_zj','22_yn','28_nx']:
  165. # 违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码)
  166. n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或其他证件号码)']
  167. elif dbn in ['18_hn']:
  168. n7 = i['违法期间法定代表人或者负责人姓名、性别及身份证号码(或其他证件号码)']
  169. elif dbn in ['14_hlj']:
  170. n7 = i['违法期间法人代表或者负责人姓名、性别及身份证号码(或者其他证件号码)']
  171. elif dbn in ['10_gd']:
  172. n7 = i['违法期间法人代表或者负责人姓名、性别、证件名称及号码']
  173. elif dbn in ['13_ln','15_jl']:
  174. n7 = i['违法期间法人代表姓名及身份证号码']
  175. elif dbn in ['25_sx']:
  176. n7 = i['违法期间法人代表或者负责人姓名性别及身份证号码(或其他证件号码)']
  177. elif dbn in ['27_nmg']:
  178. n7 = i['违法期间法定代表人或责任人姓名、性别、身份证号码(或者其他证件号码)']
  179. elif dbn in ['08_fj']:
  180. n7 = i['S8'] + '。' + i['S9'] + '。' + i['S10']
  181. elif dbn in ['30_xj']:
  182. try:
  183. n7 = i['cwfzrxm'] + '。' + i['cwfzrxb'] + '。' + i['cwfzrzjlx']+ '。' + i['cwfzrzjhm']
  184. except:
  185. n7 = ''
  186. elif dbn in ['20_hn']:
  187. n7 = ''
  188. if i['legalNameDuring']:
  189. n7 += i['legalNameDuring']
  190. if i['legalSexDuring']:
  191. if i['legalSexDuring'] == 1:
  192. fgt = '男'
  193. else:
  194. fgt = '女'
  195. n7 += fgt
  196. if i['legalIdCardDuring']:
  197. n7 += i['legalIdCardDuring']
  198. else:
  199. n7 = ''
  200. #*************负有直接责任的财务负责人姓名、性别、证件名称及号码*********#
  201. if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg']:
  202. #负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码)
  203. n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或其他证件号码)']
  204. elif dbn in ['16_gs','25_sx']:
  205. n8 = i['负有直接责任的财务负责人姓名性别及身份证号码(或其他证件号码)']
  206. elif dbn in ['14_hlj']:
  207. n8 = i['负有直接责任的财务人员姓名、性别及身份证号码(或者其他证件号码)']
  208. elif dbn in ['28_nx']:
  209. n8 = i['负有直接责任的财务负责人姓名、性别及身份证号码(或其他证件号码)']
  210. elif dbn in ['04_sz','06_js','10_gd','17_qh','21_jx']:
  211. n8 = i['负有直接责任的财务负责人姓名、性别、证件名称及号码']
  212. elif dbn in ['08_fj']:
  213. n8 = i['S11'] + '。' + i['S12'] + '。' + i['S13']
  214. elif dbn in ['23_hn']:
  215. n8 = i['经法院裁判确定的负有直接责任的财务人员、团伙成员的姓名、性别、证件名称及号码']
  216. elif dbn in ['20_hn']:
  217. n8 = ''
  218. if i['financeName']:
  219. n8 += i['financeName']
  220. if i['financeSex']:
  221. if i['financeSex'] == 1:
  222. fgt = '男'
  223. else:
  224. fgt = '女'
  225. n8 += fgt
  226. if i['financeIdCard']:
  227. n8 += i['financeIdCard']
  228. else:
  229. n8 = ''
  230. #***********实际负责人姓名、性别及身份证号码(或其他证件号码)************#
  231. if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','15_jl','18_hn','22_yn','27_nmg','28_nx']:
  232. #实际负责人姓名、性别及身份证号码(或其他证件号码)
  233. n9 = i['实际负责人姓名、性别及身份证号码(或其他证件号码)']
  234. elif dbn in ['16_gs','25_sx']:
  235. n9 = i['实际负责人姓名性别及身份证号码(或其他证件号码)']
  236. elif dbn in ['06_js']:
  237. n9 = i['经法院裁判确定的实际责任人姓名、性别、证件名称及号码']
  238. elif dbn in ['14_hlj']:
  239. n9 = i['实际负责人姓名、性别及身份证号码(或者其他证件号码)']
  240. elif dbn in ['20_hn']:
  241. n9 = ''
  242. if i['principalName']:
  243. n9 += i['principalName']
  244. if i['principalSex']:
  245. if i['principalSex'] == 1:
  246. fgt = '男'
  247. else:
  248. fgt = '女'
  249. n9 += fgt
  250. if i['principalIdCard']:
  251. n9 += i['principalIdCard']
  252. # n9 = i['principalName'] + '。' + i['principalSex'] + '。' + i['principalIdCard']
  253. else:
  254. n9 = ''
  255. #*************负有直接责任的中介机构信息****************************#
  256. if dbn in ['01_ah', '02_nb', '03_bj','05_sc','07_zj','13_ln','14_hlj','15_jl','18_hn','22_yn','27_nmg','28_nx']:
  257. n10 = i['负有直接责任的中介机构信息']
  258. elif dbn in ['04_sz','06_js','10_gd','16_gs','17_qh','21_jx','23_hn','25_sx']:
  259. n10 = i['负有直接责任的中介机构信息及其从业人员信息']
  260. elif dbn in ['20_hn']:
  261. n10 = i['agency']
  262. else:
  263. n10 = ''
  264. #*************案件性质*********************************#
  265. if dbn in ['08_fj']:
  266. n11 = i['S18']
  267. elif dbn in ['09_sd']:
  268. n11 = ''
  269. elif dbn in ['11_gx','12_hb','19_hb']:
  270. n11 = i['AJXZ']
  271. elif dbn in ['20_hn']:
  272. n11 = i['hardCaseType']['typeName']
  273. elif dbn in ['26_gz','30_xj']:
  274. n11 = i['ajxz']
  275. elif dbn in ['29_xz']:
  276. n11 = i['违法案件性质']
  277. elif dbn in ['33_sh']:
  278. n11 = i['ajMc']
  279. elif dbn in ['24_sx']:
  280. try:
  281. n11 = i['ajxzmc']
  282. except:
  283. n11 = ''
  284. else:
  285. n11 = i['案件性质']
  286. #***********主要违法事实相关法律依据及税务处理处罚情况**********************#
  287. if dbn in ['01_ah','02_nb','05_sc','07_zj','13_ln','14_hlj','15_jl','16_gs','18_hn','22_yn','25_sx','27_nmg']:
  288. n12 = i['主要违法事实相关法律依据及税务处理处罚情况']
  289. elif dbn in ['03_bj','29_xz']:
  290. n12 = i['主要违法事实']
  291. elif dbn in ['06_js']:
  292. n12 = i['主要违法事实、相关法律依据及税务处理处罚情况']
  293. elif dbn in ['04_sz','10_gd','17_qh','28_nx','31_tj']:
  294. n12 = i['主要违法事实'] + '。' +i['相关法律依据及税务处理处罚情况']
  295. elif dbn in ['12_hb']:
  296. n12 = i['ZYWFSS'] + '。' +i['FLYJ']
  297. elif dbn in ['30_xj']:
  298. n12 = i['wfss'] + '。' +i['swclcfqk']
  299. elif dbn in ['09_sd']:
  300. n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理处罚情况 ']
  301. elif dbn in ['08_fj']:
  302. n12 = i['S19']
  303. elif dbn in ['11_gx']:
  304. n12 = i['ZYWFSS']
  305. elif dbn in ['19_hb']:
  306. n12 = i['WFSS']
  307. elif dbn in ['33_sh']:
  308. n12 = i['wfss']
  309. elif dbn in ['20_hn']:
  310. n12 = i['content']
  311. elif dbn in ['21_jx']:
  312. n12 = i['主要违法事实直接法律依据及税务处理处罚情况']
  313. elif dbn in ['24_sx']:
  314. n12 = i['zywfss'] + '。' + i['flyj_cljg']
  315. elif dbn in ['26_gz']:
  316. n12 = i['zywfss'] + '。' + i['xgflyjjswclcfqk']
  317. elif dbn in ['23_hn']:
  318. n12 = i['主要违法事实'] + '。' + i['相关法律依据及税务处理、税务行政处罚等情况']
  319. #**************************************************************#
  320. if dbn in ['08_fj']:
  321. n13 = i['docreltime'].replace('-','/')
  322. elif dbn in ['11_gx']:
  323. n13 = i['DOCPUBTIME'].split(' ')[0].replace('.','/')
  324. elif dbn in ['12_hb']:
  325. n13 = i['GBRQ'].split(' ')[0]
  326. elif dbn in ['19_hb']:
  327. n13 = i['GBRQ'].split(' ')[0].replace('年','/').replace('月','')
  328. elif dbn in ['24_sx']:
  329. n13 = i['xsrq'].split(' ')[0].replace('-','/')
  330. elif dbn in ['26_gz']:
  331. n13 = i['CrTime'].split(' ')[0].replace('-','/')
  332. elif dbn in ['30_xj']:
  333. n13 = ''
  334. else:
  335. n13 = i['date']
  336. n14 = dbn
  337. list1.append([n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14])
  338. return list1
  339. # 01_ah 02_nb
  340. # dbn = '33_sh'
  341. def drcav():
  342. l1 = ['01_ah', '02_nb', '03_bj', '04_sz', '05_sc', '06_js', '07_zj', '08_fj', '09_sd', '10_gd', '11_gx', '12_hb',
  343. '13_ln', '14_hlj', '15_jl', '16_gs', '17_qh', '18_hn', '19_hb', '20_hn', '21_jx', '22_yn', '23_hn', '24_sx'
  344. , '25_sx', '26_gz', '27_nmg', '28_nx', '29_xz', '30_xj', '31_tj', '33_sh']
  345. f = open('minglu.csv','w',encoding='utf-8',newline='')
  346. csv_writer = csv.writer(f)
  347. for dbn in l1:
  348. list1 = w1(dbn)
  349. for i1 in list1:
  350. csv_writer.writerow(i1)
  351. f.close()
  352. def r1():
  353. with open('minglu.csv', 'r', encoding='utf-8') as f:
  354. csv_writer = csv.reader(f)
  355. num = 0
  356. for i in csv_writer:
  357. num +=1
  358. print(num)
  359. nsrmc = i[0]
  360. ssdq =i[1]
  361. nsrsbh= i[2]
  362. zzjgdm=i[3]
  363. zcdz= i[4]
  364. fddbr=i[5]
  365. wffddbr= i[6]
  366. zjfzcwr= i[7]
  367. sjfzr= i[8]
  368. zjzrzj= i[9]
  369. ajxz=i[10]
  370. zywf= i[11]
  371. date=i[12]
  372. soure=i[13]
  373. dict1 = {
  374. "nsrmc":nsrmc,
  375. "ssdq":ssdq,
  376. "nsrsbh":nsrsbh,
  377. "zzjgdm":zzjgdm,
  378. "zcdz":zcdz,
  379. "fddbr":fddbr,
  380. "wffddbr":wffddbr,
  381. "zjfzcwr":zjfzcwr,
  382. "sjfzr":sjfzr,
  383. "zjzrzj":zjzrzj,
  384. "ajxz":ajxz,
  385. "zywf":zywf,
  386. "date": date,
  387. "source": soure,
  388. }
  389. # print(dict1)
  390. # break
  391. mycohz.insert_one(dict1)
  392. # r1()
  393. #
  394. def ser_m(name):
  395. a = mycohz.find({"nsrmc":name})
  396. for i in a:
  397. print(i)
  398. i.pop('_id')
  399. mycopp.insert_one(i)
  400. def w_test():
  401. f = open('test.csv', 'w', encoding='utf-8', newline='')
  402. csv_writer = csv.writer(f)
  403. i1= ['1','2']
  404. csv_writer.writerow(i1)
  405. f.close()
  406. # w_test()
  407. import xlrd
  408. #打开excel
  409. def r2():
  410. wb = xlrd.open_workbook('税务违法企业匹配名单.xlsx')
  411. #按工作簿定位工作表
  412. sh = wb.sheet_by_name('Sheet1')
  413. # print(sh.nrows)#有效数据行数
  414. # print(sh.ncols)#有效数据列数
  415. # print(sh.cell(0,0).value)#输出第一行第一列的值
  416. # print(sh.row_values(0))#输出第一行的所有值
  417. # #将数据和标题组合成字典
  418. # print(dict(zip(sh.row_values(0),sh.row_values(1))))
  419. #遍历excel,打印所有数据
  420. num = 0
  421. for i in range(sh.nrows):
  422. num +=1
  423. print(num)
  424. name = sh.row_values(i)[0]
  425. ser_m(name)
  426. # print(name)
  427. # r2()
  428. # drcav() #将库内历史数据导入csv,用来备份上传至另一个mongo --写入minglu.csv
  429. # r1() #将上一步csv导入mongo --写入shuiwu_hz mongoDB
  430. # 查询前添加mongo索引 db.***.createIndex({})
  431. # r2() #将xlsx文件查询,数据写入 mongodb
  432. # 下命令导出csv
  433. # mongoexport.exe -h 127.0.0.1 --port 27017 -d shuiwu_210709 -c base_data --csv -f nsrmc,ssdq,nsrsbh,zzjgdm,zcdz,fddbr,wffddbr,zjfzcwr,sjfzr,zjzrzj,ajxz,zywf,date,source -o shuiwu.csv