# -*- coding: utf-8 -*- # @Author : ChenZhaoyuchen # @Time : 2024/9/26 16:20 # @File : get_message.py from bs4 import BeautifulSoup from anjuke.utils.anjuke_response import * from anjuke.utils.setting import * # 省级 def get_province(): province_name_list = [] province_url_list = [] response_province = requests.get(url = url_start, headers=headers).content.decode('utf8') soup = BeautifulSoup(response_province, 'html.parser') print(soup) filter_area_wrap = soup.find(class_="filter-area-wrap") print(filter_area_wrap) for province_ in filter_area_wrap.find_all('a'): province_name = province_.text province_url = province_.get('href') province_name_list.append(province_name) province_url_list.append('https://www.anjuke.com'+province_url) del province_name_list[0],province_url_list[0] return province_name_list,province_url_list # 市级 def get_city(): province_name_list,province_url_list = get_province() city_name_list = [] city_url_list = [] for i in range(len(province_url_list)): province_url = province_url_list[i] province_name = province_name_list[i] response_city = requests.get(url = province_url, headers=headers).content.decode('utf8') soup = BeautifulSoup(response_city, 'html.parser') filter_area_wrap = soup.find(class_="sel-content bank") for city_ in filter_area_wrap.find_all('a'): city_name = province_name + city_.text city_url = city_.get('href') city_name_list.append(city_name) city_url_list.append(city_url) del city_name_list[0],city_url_list[0] return city_name_list,city_url_list # 区级 def get_area(): area_name_list = [] area_url_list = [] response_area = requests.get(url = '', headers=headers).content.decode('utf8') soup = BeautifulSoup(response_area, 'html.parser') filter_area_wrap = soup.find(class_="filter-area-wrap") for area_ in filter_area_wrap.find_all('a'): area_name = area_.text area_url = area_.get('href') area_name_list.append(area_name) area_url_list.append('https://www.anjuke.com'+area_url) del area_name_list[0],area_url_list[0] return area_name_list,area_url_list # 周边 def get_periphery(): periphery_name_list = [] periphery_url_list = [] response_periphery = requests.get(url = '', headers=headers).content.decode('utf8') soup = BeautifulSoup(response_periphery, 'html.parser') filter_area_wrap = soup.find(class_="filter-area-wrap") for periphery_ in filter_area_wrap.find_all('a'): periphery_name = periphery_.text periphery_url = periphery_.get('href') periphery_name_list.append(periphery_name) periphery_url_list.append('https://www.anjuke.com'+periphery_url) del periphery_name_list[0],periphery_url_list[0] return periphery_name_list,periphery_url_list # 获取年份 def get_Year(): Years_list = [] soup = BeautifulSoup(response_origion, 'html.parser') years = soup.find(class_="year-list") for year_ in years.find_all('a'): year = year_.text Years_list.append(year) return Years_list # 测试函数 if __name__ == '__main__': # print('时间跨度:',get_Year()) print('省级单位:',get_province()) # print('市级单位:',get_city()) # print('区级单位',get_area()) # print('周边单位',get_periphery())