123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- # -*- coding: utf-8 -*-
- # @Author : ChenZhaoyuchen
- # @Time : 2024/9/26 16:20
- # @File : get_message.py
- from bs4 import BeautifulSoup
- from anjuke.utils.anjuke_response import *
- from anjuke.utils.setting import *
- # 省级
- def get_province():
- province_name_list = []
- province_url_list = []
- response_province = requests.get(url = url_start, headers=headers).content.decode('utf8')
- soup = BeautifulSoup(response_province, 'html.parser')
- print(soup)
- filter_area_wrap = soup.find(class_="filter-area-wrap")
- print(filter_area_wrap)
- for province_ in filter_area_wrap.find_all('a'):
- province_name = province_.text
- province_url = province_.get('href')
- province_name_list.append(province_name)
- province_url_list.append('https://www.anjuke.com'+province_url)
- del province_name_list[0],province_url_list[0]
- return province_name_list,province_url_list
- # 市级
- def get_city():
- province_name_list,province_url_list = get_province()
- city_name_list = []
- city_url_list = []
- for i in range(len(province_url_list)):
- province_url = province_url_list[i]
- province_name = province_name_list[i]
- response_city = requests.get(url = province_url, headers=headers).content.decode('utf8')
- soup = BeautifulSoup(response_city, 'html.parser')
- filter_area_wrap = soup.find(class_="sel-content bank")
- for city_ in filter_area_wrap.find_all('a'):
- city_name = province_name + city_.text
- city_url = city_.get('href')
- city_name_list.append(city_name)
- city_url_list.append(city_url)
- del city_name_list[0],city_url_list[0]
- return city_name_list,city_url_list
- # 区级
- def get_area():
- area_name_list = []
- area_url_list = []
- response_area = requests.get(url = '', headers=headers).content.decode('utf8')
- soup = BeautifulSoup(response_area, 'html.parser')
- filter_area_wrap = soup.find(class_="filter-area-wrap")
- for area_ in filter_area_wrap.find_all('a'):
- area_name = area_.text
- area_url = area_.get('href')
- area_name_list.append(area_name)
- area_url_list.append('https://www.anjuke.com'+area_url)
- del area_name_list[0],area_url_list[0]
- return area_name_list,area_url_list
- # 周边
- def get_periphery():
- periphery_name_list = []
- periphery_url_list = []
- response_periphery = requests.get(url = '', headers=headers).content.decode('utf8')
- soup = BeautifulSoup(response_periphery, 'html.parser')
- filter_area_wrap = soup.find(class_="filter-area-wrap")
- for periphery_ in filter_area_wrap.find_all('a'):
- periphery_name = periphery_.text
- periphery_url = periphery_.get('href')
- periphery_name_list.append(periphery_name)
- periphery_url_list.append('https://www.anjuke.com'+periphery_url)
- del periphery_name_list[0],periphery_url_list[0]
- return periphery_name_list,periphery_url_list
- # 获取年份
- def get_Year():
- Years_list = []
- soup = BeautifulSoup(response_origion, 'html.parser')
- years = soup.find(class_="year-list")
- for year_ in years.find_all('a'):
- year = year_.text
- Years_list.append(year)
- return Years_list
- # 测试函数
- if __name__ == '__main__':
- # print('时间跨度:',get_Year())
- print('省级单位:',get_province())
- # print('市级单位:',get_city())
- # print('区级单位',get_area())
- # print('周边单位',get_periphery())
|