|
@@ -0,0 +1,105 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+# @Author : ChenZhaoyuchen
|
|
|
+# @Time : 2024/9/26 16:20
|
|
|
+# @File : get_message.py
|
|
|
+
|
|
|
+from anjuke.utils.setting import *
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+
|
|
|
+
|
|
|
+def get_province_area():
|
|
|
+ Area_list = []
|
|
|
+ soup = BeautifulSoup(response_HTML_province, 'html.parser')
|
|
|
+ div_table = soup.find(class_='sel-content')
|
|
|
+
|
|
|
+ for area_ in div_table.find_all('a'):
|
|
|
+ area = area_.text
|
|
|
+ Area_list.append(area)
|
|
|
+ # 此处出现错误是因为del操作缩进错误,for循环里append,又del导致输出为空
|
|
|
+ del Area_list[0]
|
|
|
+ return Area_list
|
|
|
+
|
|
|
+
|
|
|
+def get_city_area():
|
|
|
+ Area_list = []
|
|
|
+ soup = BeautifulSoup(response_HTML_province, 'html.parser')
|
|
|
+ div_table = soup.find(class_='sel-sec')
|
|
|
+
|
|
|
+ for area_ in div_table.find_all('a'):
|
|
|
+ area = area_.text
|
|
|
+ Area_list.append(area)
|
|
|
+ del Area_list[0]
|
|
|
+ return Area_list
|
|
|
+
|
|
|
+def get_qu_area():
|
|
|
+ Area_list = []
|
|
|
+ soup = BeautifulSoup(response_HTML, 'html.parser')
|
|
|
+ div_table = soup.find(class_= 'sel-content')
|
|
|
+
|
|
|
+ for area_ in div_table.find_all('a'):
|
|
|
+ area = area_.text
|
|
|
+ Area_list.append(area)
|
|
|
+ del Area_list[0]
|
|
|
+ return Area_list
|
|
|
+
|
|
|
+def get_zhoubian_area():
|
|
|
+ Area_list = []
|
|
|
+ soup = BeautifulSoup(response_HTML, 'html.parser')
|
|
|
+ div_table = soup.find(class_= 'sel-sec')
|
|
|
+
|
|
|
+ for area_ in div_table.find_all('a'):
|
|
|
+ area = area_.text
|
|
|
+ Area_list.append(area)
|
|
|
+ del Area_list[0]
|
|
|
+ return Area_list
|
|
|
+
|
|
|
+def get_Year():
|
|
|
+ Years_list = []
|
|
|
+ soup = BeautifulSoup(response_HTML, 'html.parser')
|
|
|
+ years = soup.find(class_="year-list")
|
|
|
+ for year_ in years.find_all('a'):
|
|
|
+ year = year_.text
|
|
|
+ Years_list.append(year)
|
|
|
+ return Years_list
|
|
|
+
|
|
|
+def get_area_url():
|
|
|
+ Urls_list = []
|
|
|
+ soup = BeautifulSoup(response_HTML, 'html.parser')
|
|
|
+ urls = soup.find(class_='sel-content')
|
|
|
+
|
|
|
+ for url_area in urls.find_all('a'):
|
|
|
+ url_area = url_area.get('href')
|
|
|
+ Urls_list.append('https://www.anjuke.com' + url_area)
|
|
|
+ del Urls_list[0]
|
|
|
+ return Urls_list
|
|
|
+
|
|
|
+def get_zhoubian_url():
|
|
|
+ Urls_list = []
|
|
|
+ soup = BeautifulSoup(response_HTML, 'html.parser')
|
|
|
+ urls = soup.find(class_='sel-sec')
|
|
|
+
|
|
|
+ for url_area in urls.find_all('a'):
|
|
|
+ url_area = url_area.get('href')
|
|
|
+ Urls_list.append( url_area)
|
|
|
+ del Urls_list[0]
|
|
|
+ return Urls_list
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ # url和response值
|
|
|
+ print(urls_template_high,response_200)
|
|
|
+ print(urls_template_low,response_2002)
|
|
|
+ # 时间跨度及地区唯一url
|
|
|
+ print('时间跨度:',get_Year())
|
|
|
+ # 分url
|
|
|
+ print('省级/直辖市:',get_province_area())
|
|
|
+ print('市级:',get_city_area())
|
|
|
+ # 分url
|
|
|
+ print('区级:',get_qu_area())
|
|
|
+ print('区级'+'唯一url:',get_area_url())
|
|
|
+ print('周边:',get_zhoubian_area())
|
|
|
+ print('周边'+'唯一url:',get_zhoubian_url())
|
|
|
+ # # html内容
|
|
|
+ # print(response_HTML)
|
|
|
+
|
|
|
+
|
|
|
+
|