9 months ago · 892414c84c
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,4 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author  : ChenZhaoyuchen
			
 
				+# @Time    : 2024/9/26 15:59
			
 
				+# @File    : __init__.py
			
--- a/__pycache__/__init__.cpython-39.pyc
+++ b/__pycache__/__init__.cpython-39.pyc
--- a/main.py
+++ b/main.py
@@ -0,0 +1,6 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author  : ChenZhaoyuchen
			
 
				+# @Time    : 2024/9/26 15:59
			
 
				+# @File    : main.py
			
 
				+import requests
			
 
				+
			
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -0,0 +1,4 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author  : ChenZhaoyuchen
			
 
				+# @Time    : 2024/9/26 16:36
			
 
				+# @File    : __init__.py
			
--- a/utils/__pycache__/__init__.cpython-39.pyc
+++ b/utils/__pycache__/__init__.cpython-39.pyc
--- a/utils/__pycache__/get_message.cpython-39.pyc
+++ b/utils/__pycache__/get_message.cpython-39.pyc
--- a/utils/__pycache__/setting.cpython-39.pyc
+++ b/utils/__pycache__/setting.cpython-39.pyc
--- a/utils/get_message.py
+++ b/utils/get_message.py
@@ -0,0 +1,105 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author  : ChenZhaoyuchen
			
 
				+# @Time    : 2024/9/26 16:20
			
 
				+# @File    : get_message.py
			
 
				+
			
 
				+from anjuke.utils.setting import *
			
 
				+from bs4 import BeautifulSoup
			
 
				+
			
 
				+
			
 
				+def get_province_area():
			
 
				+    Area_list = []
			
 
				+    soup = BeautifulSoup(response_HTML_province, 'html.parser')
			
 
				+    div_table = soup.find(class_='sel-content')
			
 
				+
			
 
				+    for area_ in div_table.find_all('a'):
			
 
				+        area = area_.text
			
 
				+        Area_list.append(area)
			
 
				+    # 此处出现错误是因为del操作缩进错误，for循环里append，又del导致输出为空
			
 
				+    del Area_list[0]
			
 
				+    return Area_list
			
 
				+
			
 
				+
			
 
				+def get_city_area():
			
 
				+    Area_list = []
			
 
				+    soup = BeautifulSoup(response_HTML_province, 'html.parser')
			
 
				+    div_table = soup.find(class_='sel-sec')
			
 
				+
			
 
				+    for area_ in div_table.find_all('a'):
			
 
				+        area = area_.text
			
 
				+        Area_list.append(area)
			
 
				+    del Area_list[0]
			
 
				+    return Area_list
			
 
				+
			
 
				+def get_qu_area():
			
 
				+    Area_list = []
			
 
				+    soup = BeautifulSoup(response_HTML, 'html.parser')
			
 
				+    div_table = soup.find(class_= 'sel-content')
			
 
				+
			
 
				+    for area_ in div_table.find_all('a'):
			
 
				+        area = area_.text
			
 
				+        Area_list.append(area)
			
 
				+    del Area_list[0]
			
 
				+    return Area_list
			
 
				+
			
 
				+def get_zhoubian_area():
			
 
				+    Area_list = []
			
 
				+    soup = BeautifulSoup(response_HTML, 'html.parser')
			
 
				+    div_table = soup.find(class_= 'sel-sec')
			
 
				+
			
 
				+    for area_ in div_table.find_all('a'):
			
 
				+        area = area_.text
			
 
				+        Area_list.append(area)
			
 
				+    del Area_list[0]
			
 
				+    return Area_list
			
 
				+
			
 
				+def get_Year():
			
 
				+    Years_list = []
			
 
				+    soup = BeautifulSoup(response_HTML, 'html.parser')
			
 
				+    years = soup.find(class_="year-list")
			
 
				+    for year_ in years.find_all('a'):
			
 
				+        year = year_.text
			
 
				+        Years_list.append(year)
			
 
				+    return Years_list
			
 
				+
			
 
				+def get_area_url():
			
 
				+    Urls_list = []
			
 
				+    soup = BeautifulSoup(response_HTML, 'html.parser')
			
 
				+    urls = soup.find(class_='sel-content')
			
 
				+
			
 
				+    for url_area in urls.find_all('a'):
			
 
				+        url_area = url_area.get('href')
			
 
				+        Urls_list.append('https://www.anjuke.com' + url_area)
			
 
				+    del Urls_list[0]
			
 
				+    return Urls_list
			
 
				+
			
 
				+def get_zhoubian_url():
			
 
				+    Urls_list = []
			
 
				+    soup = BeautifulSoup(response_HTML, 'html.parser')
			
 
				+    urls = soup.find(class_='sel-sec')
			
 
				+
			
 
				+    for url_area in urls.find_all('a'):
			
 
				+        url_area = url_area.get('href')
			
 
				+        Urls_list.append( url_area)
			
 
				+    del Urls_list[0]
			
 
				+    return Urls_list
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # url和response值
			
 
				+    print(urls_template_high,response_200)
			
 
				+    print(urls_template_low,response_2002)
			
 
				+    # 时间跨度及地区唯一url
			
 
				+    print('时间跨度:',get_Year())
			
 
				+    # 分url
			
 
				+    print('省级/直辖市:',get_province_area())
			
 
				+    print('市级:',get_city_area())
			
 
				+    # 分url
			
 
				+    print('区级:',get_qu_area())
			
 
				+    print('区级'+'唯一url:',get_area_url())
			
 
				+    print('周边:',get_zhoubian_area())
			
 
				+    print('周边'+'唯一url:',get_zhoubian_url())
			
 
				+    # # html内容
			
 
				+    # print(response_HTML)
			
 
				+
			
 
				+
			
 
				+
			
--- a/utils/get_price.py
+++ b/utils/get_price.py
@@ -0,0 +1,36 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author  : ChenZhaoyuchen
			
 
				+# @Time    : 2024/9/27 16:17
			
 
				+# @File    : get_price.py
			
 
				+
			
 
				+from bs4 import BeautifulSoup
			
 
				+from anjuke.utils.get_message import *
			
 
				+from anjuke.utils.setting import *
			
 
				+
			
 
				+def get_price():
			
 
				+    price_list = []
			
 
				+    zhoubian_name_list = get_zhoubian_area()
			
 
				+    zhoubian_url_list = get_zhoubian_url()
			
 
				+    for i in range(len(zhoubian_name_list)):
			
 
				+        name = zhoubian_name_list[i]
			
 
				+        url = zhoubian_url_list[i]
			
 
				+        response_price = requests.get(url = url, headers = headers).content.decode('utf8')
			
 
				+        soup = BeautifulSoup(response_price, 'html.parser')
			
 
				+        price_ = soup.find(class_ = "table is-headless")
			
 
				+        table_trs = price_.find_all('div',class_ = "table-tr")
			
 
				+
			
 
				+        for table_tr in table_trs:
			
 
				+            list = []
			
 
				+            yue_fang_lv = table_tr.find_all('div',class_ = 'td')
			
 
				+            for td in yue_fang_lv:
			
 
				+                a = td.text.strip()
			
 
				+                # Price_Rate = table_tr.find('div',class_ = "up") or table_tr.find('div',class_ = "down")
			
 
				+                # if Price_Rate.find('div',class_ = "up"):
			
 
				+                #     tab = '上涨'
			
 
				+                # else:
			
 
				+                #     tab = '下降'
			
 
				+                print(a)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    get_price()
			
 
				+
			
--- a/utils/readme.txt
+++ b/utils/readme.txt
@@ -0,0 +1,11 @@
 
				+## 安居客房价
			
 
				+### 1.请求头
			
 
				+### 2.确定基本格式：精细到区域，带价格，三个参数：区域，日期，价格
			
 
				+如（省级单位）：全国-安徽-合肥-包河-包河工业园
			
 
				+如（直辖市）：全国-直辖市-北京-朝阳—CBD
			
 
				+
			
 
				+### 3.爬出内容格式：
			
 
				+
			
 
				+每个省都有独立的市，每个市都有独立的区，每个区都有独立的范围
			
 
				+所以创建爬虫爬取每个
			
 
				+
			
--- a/utils/setting.py
+++ b/utils/setting.py
@@ -0,0 +1,41 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author  : ChenZhaoyuchen
			
 
				+# @Time    : 2024/9/26 16:00
			
 
				+# @File    : setting.py
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+# 先试着爬取包河工业园2024年1-12月房价
			
 
				+year = '2024'
			
 
				+province = 'anhui'
			
 
				+city = 'hf'
			
 
				+area = 'baohequ'
			
 
				+position = 'bhgyy'
			
 
				+
			
 
				+# url模板
			
 
				+urls_template_high = f'https://www.anjuke.com/fangjia/{province}/'
			
 
				+urls_template_low = f'https://www.anjuke.com/fangjia/{city}{year}/{area}/'
			
 
				+
			
 
				+
			
 
				+# 省级单位的url，以省名为url后缀，进入后重定向
			
 
				+url_province = 'https://www.anjuke.com/fangjia/anhui/'
			
 
				+
			
 
				+# 值得注意的是：选择城市之后，省级信息在url里不会体现
			
 
				+url_sjzxs = f'https://www.anjuke.com/fangjia/'
			
 
				+url_city = f'https://www.anjuke.com/fangjia/hf/'
			
 
				+
			
 
				+# 例如这个url就显示了在合肥市的包河区，但没有显示在安徽省
			
 
				+url_qu = f'https://www.anjuke.com/fangjia/hf/baohequ/'
			
 
				+
			
 
				+headers = {
			
 
				+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
			
 
				+    'Accept-Encoding': 'gzip, deflate, br',
			
 
				+    'Accept-Language': 'zh-CN,zh;q=0.9',
			
 
				+    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
			
 
				+}
			
 
				+
			
 
				+response_HTML_province = requests.get(url = urls_template_high, headers=headers).content.decode('utf8')
			
 
				+response_HTML = requests.get(url = urls_template_low, headers=headers).content.decode('utf8')
			
 
				+
			
 
				+response_200 = requests.get(url = urls_template_high, headers=headers)
			
 
				+response_2002 = requests.get(url = urls_template_low, headers=headers)