# -*- coding: utf-8 -*- # @Author : ChenZhaoyuchen # @Time : 2024/9/26 16:20 # @File : get_message.py from anjuke.utils.setting import * from bs4 import BeautifulSoup def get_province_area(): Area_list = [] soup = BeautifulSoup(response_HTML_province, 'html.parser') div_table = soup.find(class_='sel-content') for area_ in div_table.find_all('a'): area = area_.text Area_list.append(area) # 此处出现错误是因为del操作缩进错误,for循环里append,又del导致输出为空 del Area_list[0] return Area_list def get_city_area(): Area_list = [] soup = BeautifulSoup(response_HTML_province, 'html.parser') div_table = soup.find(class_='sel-sec') for area_ in div_table.find_all('a'): area = area_.text Area_list.append(area) del Area_list[0] return Area_list def get_qu_area(): Area_list = [] soup = BeautifulSoup(response_HTML, 'html.parser') div_table = soup.find(class_= 'sel-content') for area_ in div_table.find_all('a'): area = area_.text Area_list.append(area) del Area_list[0] return Area_list def get_zhoubian_area(): Area_list = [] soup = BeautifulSoup(response_HTML, 'html.parser') div_table = soup.find(class_= 'sel-sec') for area_ in div_table.find_all('a'): area = area_.text Area_list.append(area) del Area_list[0] return Area_list def get_Year(): Years_list = [] soup = BeautifulSoup(response_HTML, 'html.parser') years = soup.find(class_="year-list") for year_ in years.find_all('a'): year = year_.text Years_list.append(year) return Years_list def get_area_url(): Urls_list = [] soup = BeautifulSoup(response_HTML, 'html.parser') urls = soup.find(class_='sel-content') for url_area in urls.find_all('a'): url_area = url_area.get('href') Urls_list.append('https://www.anjuke.com' + url_area) del Urls_list[0] return Urls_list def get_zhoubian_url(): Urls_list = [] soup = BeautifulSoup(response_HTML, 'html.parser') urls = soup.find(class_='sel-sec') for url_area in urls.find_all('a'): url_area = url_area.get('href') Urls_list.append( url_area) del Urls_list[0] return Urls_list if __name__ == '__main__': # url和response值 print(urls_template_high,response_200) print(urls_template_low,response_2002) # 时间跨度及地区唯一url print('时间跨度:',get_Year()) # 分url print('省级/直辖市:',get_province_area()) print('市级:',get_city_area()) # 分url print('区级:',get_qu_area()) print('区级'+'唯一url:',get_area_url()) print('周边:',get_zhoubian_area()) print('周边'+'唯一url:',get_zhoubian_url()) # # html内容 # print(response_HTML)