get_message.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # -*- coding: utf-8 -*-
  2. # @Author : ChenZhaoyuchen
  3. # @Time : 2024/9/26 16:20
  4. # @File : get_message.py
  5. from anjuke.utils.setting import *
  6. from bs4 import BeautifulSoup
  7. def get_province_area():
  8. Area_list = []
  9. soup = BeautifulSoup(response_HTML_province, 'html.parser')
  10. div_table = soup.find(class_='sel-content')
  11. for area_ in div_table.find_all('a'):
  12. area = area_.text
  13. Area_list.append(area)
  14. # 此处出现错误是因为del操作缩进错误,for循环里append,又del导致输出为空
  15. del Area_list[0]
  16. return Area_list
  17. def get_city_area():
  18. Area_list = []
  19. soup = BeautifulSoup(response_HTML_province, 'html.parser')
  20. div_table = soup.find(class_='sel-sec')
  21. for area_ in div_table.find_all('a'):
  22. area = area_.text
  23. Area_list.append(area)
  24. del Area_list[0]
  25. return Area_list
  26. def get_qu_area():
  27. Area_list = []
  28. soup = BeautifulSoup(response_HTML, 'html.parser')
  29. div_table = soup.find(class_= 'sel-content')
  30. for area_ in div_table.find_all('a'):
  31. area = area_.text
  32. Area_list.append(area)
  33. del Area_list[0]
  34. return Area_list
  35. def get_zhoubian_area():
  36. Area_list = []
  37. soup = BeautifulSoup(response_HTML, 'html.parser')
  38. div_table = soup.find(class_= 'sel-sec')
  39. for area_ in div_table.find_all('a'):
  40. area = area_.text
  41. Area_list.append(area)
  42. del Area_list[0]
  43. return Area_list
  44. def get_Year():
  45. Years_list = []
  46. soup = BeautifulSoup(response_HTML, 'html.parser')
  47. years = soup.find(class_="year-list")
  48. for year_ in years.find_all('a'):
  49. year = year_.text
  50. Years_list.append(year)
  51. return Years_list
  52. def get_area_url():
  53. Urls_list = []
  54. soup = BeautifulSoup(response_HTML, 'html.parser')
  55. urls = soup.find(class_='sel-content')
  56. for url_area in urls.find_all('a'):
  57. url_area = url_area.get('href')
  58. Urls_list.append('https://www.anjuke.com' + url_area)
  59. del Urls_list[0]
  60. return Urls_list
  61. def get_zhoubian_url():
  62. Urls_list = []
  63. soup = BeautifulSoup(response_HTML, 'html.parser')
  64. urls = soup.find(class_='sel-sec')
  65. for url_area in urls.find_all('a'):
  66. url_area = url_area.get('href')
  67. Urls_list.append( url_area)
  68. del Urls_list[0]
  69. return Urls_list
  70. if __name__ == '__main__':
  71. # url和response值
  72. print(urls_template_high,response_200)
  73. print(urls_template_low,response_2002)
  74. # 时间跨度及地区唯一url
  75. print('时间跨度:',get_Year())
  76. # 分url
  77. print('省级/直辖市:',get_province_area())
  78. print('市级:',get_city_area())
  79. # 分url
  80. print('区级:',get_qu_area())
  81. print('区级'+'唯一url:',get_area_url())
  82. print('周边:',get_zhoubian_area())
  83. print('周边'+'唯一url:',get_zhoubian_url())
  84. # # html内容
  85. # print(response_HTML)