9 months ago · a8b2040c47
--- a/utils/__pycache__/get_message.cpython-39.pyc
+++ b/utils/__pycache__/get_message.cpython-39.pyc
--- a/utils/__pycache__/setting.cpython-39.pyc
+++ b/utils/__pycache__/setting.cpython-39.pyc
--- a/utils/get_message.py
+++ b/utils/get_message.py
@@ -6,22 +6,24 @@
 
				 from bs4 import BeautifulSoup
			
 
				 from anjuke.utils.anjuke_response import *
			
 
				 from anjuke.utils.setting import *
			
 
				-
			
 
				+import time,random
			
 
				 # 省级
			
 
				 def get_province():
			
 
				     province_name_list = []
			
 
				     province_url_list = []
			
 
				-    response_province = requests.get(url = url_start, headers=headers).content.decode('utf8')
			
 
				+    response_province = requests.get(url = url_start, headers = headers).content.decode('utf8')
			
 
				+    time.sleep(random.uniform(0.5, 1))
			
 
				     soup = BeautifulSoup(response_province, 'html.parser')
			
 
				-    print(soup)
			
 
				+    # print(soup)
			
 
				     filter_area_wrap = soup.find(class_="filter-area-wrap")
			
 
				-    print(filter_area_wrap)
			
 
				+    # print(filter_area_wrap)
			
 
				     for province_ in filter_area_wrap.find_all('a'):
			
 
				         province_name = province_.text
			
 
				         province_url = province_.get('href')
			
 
				         province_name_list.append(province_name)
			
 
				         province_url_list.append('https://www.anjuke.com'+province_url)
			
 
				     del province_name_list[0],province_url_list[0]
			
 
				+    time.sleep(random.uniform(0.5, 1))
			
 
				     return province_name_list,province_url_list
			
 
				 
			
 
				 # 市级
			
@@ -29,53 +31,73 @@ def get_city():
 
				     province_name_list,province_url_list = get_province()
			
 
				     city_name_list = []
			
 
				     city_url_list = []
			
 
				-
			
 
				     for i in range(len(province_url_list)):
			
 
				         province_url = province_url_list[i]
			
 
				         province_name = province_name_list[i]
			
 
				-        response_city = requests.get(url = province_url, headers=headers).content.decode('utf8')
			
 
				+        response_city = requests.get(url = province_url, headers = headers).content.decode('utf8')
			
 
				+        time.sleep(random.uniform(3, 4))
			
 
				         soup = BeautifulSoup(response_city, 'html.parser')
			
 
				         filter_area_wrap = soup.find(class_="sel-content bank")
			
 
				-
			
 
				+        zhongji_name_list = []
			
 
				+        zhongji_url_list = []
			
 
				         for city_ in filter_area_wrap.find_all('a'):
			
 
				             city_name = province_name + city_.text
			
 
				             city_url = city_.get('href')
			
 
				-            city_name_list.append(city_name)
			
 
				-            city_url_list.append(city_url)
			
 
				-        del city_name_list[0],city_url_list[0]
			
 
				-        return city_name_list,city_url_list
			
 
				+            zhongji_name_list.append(city_name)
			
 
				+            zhongji_url_list.append(city_url)
			
 
				+        del zhongji_name_list[0], zhongji_url_list[0]
			
 
				+        city_name_list += zhongji_name_list
			
 
				+        city_url_list += zhongji_url_list
			
 
				+        print(f'已循环到第{i}个省级单位:{province_name_list[i]}')
			
 
				+    return city_name_list,city_url_list
			
 
				 
			
 
				 # 区级
			
 
				 def get_area():
			
 
				+    city_name_list, city_url_list = get_city()
			
 
				     area_name_list = []
			
 
				     area_url_list = []
			
 
				-    response_area = requests.get(url = '', headers=headers).content.decode('utf8')
			
 
				-    soup = BeautifulSoup(response_area, 'html.parser')
			
 
				-    filter_area_wrap = soup.find(class_="filter-area-wrap")
			
 
				 
			
 
				-    for area_ in filter_area_wrap.find_all('a'):
			
 
				-        area_name = area_.text
			
 
				-        area_url = area_.get('href')
			
 
				-        area_name_list.append(area_name)
			
 
				-        area_url_list.append('https://www.anjuke.com'+area_url)
			
 
				-    del area_name_list[0],area_url_list[0]
			
 
				+    for i in range(len(city_url_list)):
			
 
				+        city_url = city_url_list[i]
			
 
				+        city_name = city_name_list[i]
			
 
				+        response_area = requests.get(url = city_url, headers = headers).content.decode('utf8')
			
 
				+        time.sleep(random.uniform(2, 3))
			
 
				+        soup = BeautifulSoup(response_area, 'html.parser')
			
 
				+        filter_area_wrap = soup.find(class_="sel-content bank")
			
 
				+        zhongji_name_list = []
			
 
				+        zhongji_url_list = []
			
 
				+        for area_ in filter_area_wrap.find_all('a'):
			
 
				+            area_name = city_name + area_.text
			
 
				+            area_url = area_.get('href')
			
 
				+            zhongji_name_list.append(area_name)
			
 
				+            zhongji_url_list.append(area_url)
			
 
				+            area_name_list.append(area_name)
			
 
				+            area_url_list.append(area_url)
			
 
				+        del area_name_list[0],area_url_list[0]
			
 
				     return area_name_list,area_url_list
			
 
				 
			
 
				+
			
 
				 # 周边
			
 
				 def get_periphery():
			
 
				+    area_name_list, area_url_list = get_area()
			
 
				     periphery_name_list = []
			
 
				     periphery_url_list = []
			
 
				-    response_periphery = requests.get(url = '', headers=headers).content.decode('utf8')
			
 
				-    soup = BeautifulSoup(response_periphery, 'html.parser')
			
 
				-    filter_area_wrap = soup.find(class_="filter-area-wrap")
			
 
				 
			
 
				-    for periphery_ in filter_area_wrap.find_all('a'):
			
 
				-        periphery_name = periphery_.text
			
 
				-        periphery_url = periphery_.get('href')
			
 
				-        periphery_name_list.append(periphery_name)
			
 
				-        periphery_url_list.append('https://www.anjuke.com'+periphery_url)
			
 
				-    del periphery_name_list[0],periphery_url_list[0]
			
 
				-    return periphery_name_list,periphery_url_list
			
 
				+    for i in range(len(area_url_list)):
			
 
				+        area_url = area_url_list[i]
			
 
				+        area_name = area_name_list[i]
			
 
				+        response_periphery = requests.get(url = area_url, headers = headers).content.decode('utf8')
			
 
				+        time.sleep(random.uniform(3, 5))
			
 
				+        soup = BeautifulSoup(response_periphery, 'html.parser')
			
 
				+        filter_area_wrap = soup.find(class_="sel-content bank")
			
 
				+        for periphery_ in filter_area_wrap.find_all('a'):
			
 
				+            periphery_name = area_name + periphery_.text
			
 
				+            periphery_url = periphery_.get('href')
			
 
				+            periphery_name_list.append(periphery_name)
			
 
				+            periphery_url_list.append(periphery_url)
			
 
				+        del periphery_name_list[0], periphery_url_list[0]
			
 
				+        time.sleep(random.uniform(3, 5))
			
 
				+    return periphery_name_list, periphery_url_list
			
 
				 
			
 
				 # 获取年份
			
 
				 def get_Year():
			
@@ -90,8 +112,8 @@ def get_Year():
 
				 # 测试函数
			
 
				 if __name__ == '__main__':
			
 
				     # print('时间跨度:',get_Year())
			
 
				-    print('省级单位:',get_province())
			
 
				-    # print('市级单位:',get_city())
			
 
				+    # print('省级单位:',get_province())
			
 
				+    print('市级单位:',get_city())
			
 
				     # print('区级单位',get_area())
			
 
				     # print('周边单位',get_periphery())
			
 
				 
			
--- a/utils/get_price.py
+++ b/utils/get_price.py
@@ -7,48 +7,44 @@ from anjuke.utils.get_message import *
 
				 from anjuke.utils.setting import *
			
 
				 
			
 
				 def get_price():
			
 
				-    # # 价格列表
			
 
				-    # price_list = []
			
 
				-    # periphery_name_list = get_periphery()[0]
			
 
				-    # periphery_url_list = get_periphery()[1]
			
 
				-    #
			
 
				-    # # 主函数
			
 
				-    # for i in range(len(periphery_name_list)):
			
 
				-    #     name = periphery_name_list[i]
			
 
				-    #     url = periphery_url_list[i]
			
 
				-        url = 'https://www.anjuke.com/fangjia/hf2024/bhgyy/'
			
 
				+    # 价格列表
			
 
				+    periphery_name_list = get_periphery()[0]
			
 
				+    periphery_url_list = get_periphery()[1]
			
 
				+
			
 
				+    # 主函数
			
 
				+    result = []
			
 
				+    for i in range(len(periphery_name_list)):
			
 
				+        name = periphery_name_list[i]
			
 
				+        url = periphery_url_list[i]
			
 
				+        # url = 'https://www.anjuke.com/fangjia/beijing2024/chaoyang/'
			
 
				         response_price = requests.get(url = url, headers = headers).content.decode('utf8')
			
 
				         soup = BeautifulSoup(response_price, 'html.parser')
			
 
				         price_ = soup.find(class_ = "table is-headless")
			
 
				         table_trs = price_.find_all('div',class_ = "table-tr")
			
 
				         # html提取 时间time，单价price，涨跌tab，涨跌比率Price_Rate
			
 
				-        list = []
			
 
				-
			
 
				+        list1 = []
			
 
				         for table_tr in table_trs:
			
 
				             all_mes = table_tr.find_all('div',class_ = 'td')
			
 
				             zhongji = []
			
 
				             for td in all_mes:
			
 
				                 a = td.text.strip() # 防止出现换行符等符号错误
			
 
				                 print('a',a)
			
 
				+                # 每轮3个a值，包括时间time，单价price，涨跌比率Price_Rate
			
 
				                 zhongji.append(a)
			
 
				-
			
 
				-            UporDown = table_tr.find('div',class_ = "up") or table_tr.find('div',class_ = "down")
			
 
				-            if UporDown.find('div',class_ = "up"):
			
 
				+            UporDown = table_tr.find(
			
 
				+                'div', class_="up") or table_tr.find('div', class_="down")
			
 
				+            attr = UporDown['class'][0]
			
 
				+            if attr == "up":
			
 
				                 tab = '↑'
			
 
				+            elif attr == "down":
			
 
				+                tab = '↓'
			
 
				             else:
			
 
				-                if UporDown.find('div', class_="down"):
			
 
				-                    tab = '↓'
			
 
				-                else:
			
 
				-                    tab = '未找到趋势'
			
 
				-
			
 
				+                tab = '-'
			
 
				             zhongji.insert(2,tab)
			
 
				+            list1.append(zhongji)
			
 
				+            list1.append(name)
			
 
				+        result.append(list1)
			
 
				+    return result
			
 
				 
			
 
				-            list.append(zhongji)
			
 
				-            print('zhongji',zhongji)
			
 
				-        print('list',list)
			
 
				-        # print(list[0][2])
			
 
				-
			
 
				-
			
 
				-            # return list
			
 
				 if __name__ == '__main__':
			
 
				-    print(get_price())
			
 
				+    get_price()
			
--- a/utils/setting.py
+++ b/utils/setting.py
@@ -2,14 +2,50 @@
 
				 # @Author  : ChenZhaoyuchen
			
 
				 # @Time    : 2024/9/26 16:00
			
 
				 # @File    : setting.py
			
 
				+import random
			
 
				+
			
 
				+
			
 
				+# 模拟请求头
			
 
				+agent_list = [
			
 
				+	"Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
			
 
				+	"Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.310.0 Safari/532.9",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20",
			
 
				+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0 Safari/534.27",
			
 
				+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1",
			
 
				+	"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2",
			
 
				+	"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre",
			
 
				+	"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10"
			
 
				+]
			
 
				 
			
 
				 # 初始url
			
 
				 url_start = 'https://www.anjuke.com/fangjia/quanguo2024/'
			
 
				 
			
 
				 # 请求头
			
 
				+# headers = {
			
 
				+#     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
			
 
				+#     'Accept-Encoding': 'gzip, deflate, br',
			
 
				+#     'Accept-Language': 'zh-CN,zh;q=0.9',
			
 
				+#     'referer':'https://www.anjuke.com/',
			
 
				+#     'cookie':'xxzlxxid=pfmxpoucXXdKPZe3nePjn1oG3tEFYp6CwGDK9cSqkSE8FQ+YKsyHR+C1hZCtXLFDNP0S',
			
 
				+#     'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
			
 
				+# }
			
 
				+
			
 
				 headers = {
			
 
				-    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
			
 
				-    'Accept-Encoding': 'gzip, deflate, br',
			
 
				-    'Accept-Language': 'zh-CN,zh;q=0.9',
			
 
				-    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
			
 
				+'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
			
 
				+'accept-encoding':'gzip, deflate, br, zstd',
			
 
				+'accept-language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
			
 
				+'cache-control':'max-age=0',
			
 
				+'connection':'keep-alive',
			
 
				+'cookie':'aQQ_ajkguid=8A58B742-7F9E-4169-9684-065B9DF9AC96; sessid=2C9914D2-0EC1-4CF8-8B9F-BB2DD2432060; ajk-appVersion=; ctid=33; obtain_by=2; twe=2; id58=CkwAb2cHJ2CSvzjwJRw2Ag==; xxzlclientid=ac98d936-a0a8-41cf-b57a-1728522083413; xxzlxxid=pfmxpoucXXdKPZe3nePjn1oG3tEFYp6CwGDK9cSqkSE8FQ+YKsyHR+C1hZCtXLFDNP0S; fzq_h=bfec261cadc68ed6a35b0159901cf584_1728609619502_56d94ea5cdbf416fac02c5b4f2d27600_47896428890875912854068920960365571880; xxzlbbid=pfmbM3wxMDM0NnwxLjEwLjF8MTcyODYxMDg4MjI2MTg3OTkyMXwvak5hTThZUkZuRzE1TkkxbnJKaVBDZnZvTUR5WjB0QXA4dUtlZDZ2VWVrPXw0ZTM5ZmI1NzdkM2QyMGM1ZGJkM2I3MDEyNDQ5ODU3N18xNzI4NjEwODgxODU5Xzk1MjY2MTViNmRhMjQ3NmQ5ZGU5MDlkNWI4OGYyNzM4XzM3MDQ0ODAwNjB8ZTc0OWUyMTUyNTMzMzUzMzM4ZGZjZmE5ODY4NWE3OGNfMTcyODYxMDg4MTg0NV8yNTU=',
			
 
				+'host':'www.anjuke.com',
			
 
				+'if-none-match':"2db91-inlYvGp0xIvRpS6/mdxeLVOjQ9k",
			
 
				+'user-agent':random.choice(agent_list),
			
 
				 }
			
 
				+
			
 
				+