get_message.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # -*- coding: utf-8 -*-
  2. # @Author : ChenZhaoyuchen
  3. # @Time : 2024/9/26 16:20
  4. # @File : get_message.py
  5. from bs4 import BeautifulSoup
  6. from anjuke.utils.anjuke_response import *
  7. from anjuke.utils.setting import *
  8. # 省级
  9. def get_province():
  10. province_name_list = []
  11. province_url_list = []
  12. response_province = requests.get(url = url_start, headers=headers).content.decode('utf8')
  13. soup = BeautifulSoup(response_province, 'html.parser')
  14. print(soup)
  15. filter_area_wrap = soup.find(class_="filter-area-wrap")
  16. print(filter_area_wrap)
  17. for province_ in filter_area_wrap.find_all('a'):
  18. province_name = province_.text
  19. province_url = province_.get('href')
  20. province_name_list.append(province_name)
  21. province_url_list.append('https://www.anjuke.com'+province_url)
  22. del province_name_list[0],province_url_list[0]
  23. return province_name_list,province_url_list
  24. # 市级
  25. def get_city():
  26. province_name_list,province_url_list = get_province()
  27. city_name_list = []
  28. city_url_list = []
  29. for i in range(len(province_url_list)):
  30. province_url = province_url_list[i]
  31. province_name = province_name_list[i]
  32. response_city = requests.get(url = province_url, headers=headers).content.decode('utf8')
  33. soup = BeautifulSoup(response_city, 'html.parser')
  34. filter_area_wrap = soup.find(class_="sel-content bank")
  35. for city_ in filter_area_wrap.find_all('a'):
  36. city_name = province_name + city_.text
  37. city_url = city_.get('href')
  38. city_name_list.append(city_name)
  39. city_url_list.append(city_url)
  40. del city_name_list[0],city_url_list[0]
  41. return city_name_list,city_url_list
  42. # 区级
  43. def get_area():
  44. area_name_list = []
  45. area_url_list = []
  46. response_area = requests.get(url = '', headers=headers).content.decode('utf8')
  47. soup = BeautifulSoup(response_area, 'html.parser')
  48. filter_area_wrap = soup.find(class_="filter-area-wrap")
  49. for area_ in filter_area_wrap.find_all('a'):
  50. area_name = area_.text
  51. area_url = area_.get('href')
  52. area_name_list.append(area_name)
  53. area_url_list.append('https://www.anjuke.com'+area_url)
  54. del area_name_list[0],area_url_list[0]
  55. return area_name_list,area_url_list
  56. # 周边
  57. def get_periphery():
  58. periphery_name_list = []
  59. periphery_url_list = []
  60. response_periphery = requests.get(url = '', headers=headers).content.decode('utf8')
  61. soup = BeautifulSoup(response_periphery, 'html.parser')
  62. filter_area_wrap = soup.find(class_="filter-area-wrap")
  63. for periphery_ in filter_area_wrap.find_all('a'):
  64. periphery_name = periphery_.text
  65. periphery_url = periphery_.get('href')
  66. periphery_name_list.append(periphery_name)
  67. periphery_url_list.append('https://www.anjuke.com'+periphery_url)
  68. del periphery_name_list[0],periphery_url_list[0]
  69. return periphery_name_list,periphery_url_list
  70. # 获取年份
  71. def get_Year():
  72. Years_list = []
  73. soup = BeautifulSoup(response_origion, 'html.parser')
  74. years = soup.find(class_="year-list")
  75. for year_ in years.find_all('a'):
  76. year = year_.text
  77. Years_list.append(year)
  78. return Years_list
  79. # 测试函数
  80. if __name__ == '__main__':
  81. # print('时间跨度:',get_Year())
  82. print('省级单位:',get_province())
  83. # print('市级单位:',get_city())
  84. # print('区级单位',get_area())
  85. # print('周边单位',get_periphery())