|
@@ -11,7 +11,7 @@ import time,random
|
|
def get_province():
|
|
def get_province():
|
|
province_name_list = []
|
|
province_name_list = []
|
|
province_url_list = []
|
|
province_url_list = []
|
|
- response_province = requests.get(url = url_start, headers = headers).content.decode('utf8')
|
|
|
|
|
|
+ response_province = requests.get(url = url_start, headers = headers, proxies=proxies, timeout=10).content.decode('utf8')
|
|
time.sleep(random.uniform(0.5, 1))
|
|
time.sleep(random.uniform(0.5, 1))
|
|
soup = BeautifulSoup(response_province, 'html.parser')
|
|
soup = BeautifulSoup(response_province, 'html.parser')
|
|
# print(soup)
|
|
# print(soup)
|
|
@@ -23,7 +23,6 @@ def get_province():
|
|
province_name_list.append(province_name)
|
|
province_name_list.append(province_name)
|
|
province_url_list.append('https://www.anjuke.com'+province_url)
|
|
province_url_list.append('https://www.anjuke.com'+province_url)
|
|
del province_name_list[0],province_url_list[0]
|
|
del province_name_list[0],province_url_list[0]
|
|
- time.sleep(random.uniform(0.5, 1))
|
|
|
|
return province_name_list,province_url_list
|
|
return province_name_list,province_url_list
|
|
|
|
|
|
# 市级
|
|
# 市级
|
|
@@ -34,7 +33,7 @@ def get_city():
|
|
for i in range(len(province_url_list)):
|
|
for i in range(len(province_url_list)):
|
|
province_url = province_url_list[i]
|
|
province_url = province_url_list[i]
|
|
province_name = province_name_list[i]
|
|
province_name = province_name_list[i]
|
|
- response_city = requests.get(url = province_url, headers = headers).content.decode('utf8')
|
|
|
|
|
|
+ response_city = requests.get(url = province_url, headers = headers, proxies=proxies, timeout=10).content.decode('utf8')
|
|
time.sleep(random.uniform(3, 4))
|
|
time.sleep(random.uniform(3, 4))
|
|
soup = BeautifulSoup(response_city, 'html.parser')
|
|
soup = BeautifulSoup(response_city, 'html.parser')
|
|
filter_area_wrap = soup.find(class_="sel-content bank")
|
|
filter_area_wrap = soup.find(class_="sel-content bank")
|
|
@@ -60,7 +59,7 @@ def get_area():
|
|
for i in range(len(city_url_list)):
|
|
for i in range(len(city_url_list)):
|
|
city_url = city_url_list[i]
|
|
city_url = city_url_list[i]
|
|
city_name = city_name_list[i]
|
|
city_name = city_name_list[i]
|
|
- response_area = requests.get(url = city_url, headers = headers).content.decode('utf8')
|
|
|
|
|
|
+ response_area = requests.get(url = city_url, headers = headers ,proxies=proxies, timeout=10).content.decode('utf8')
|
|
time.sleep(random.uniform(2, 3))
|
|
time.sleep(random.uniform(2, 3))
|
|
soup = BeautifulSoup(response_area, 'html.parser')
|
|
soup = BeautifulSoup(response_area, 'html.parser')
|
|
filter_area_wrap = soup.find(class_="sel-content bank")
|
|
filter_area_wrap = soup.find(class_="sel-content bank")
|
|
@@ -86,7 +85,7 @@ def get_periphery():
|
|
for i in range(len(area_url_list)):
|
|
for i in range(len(area_url_list)):
|
|
area_url = area_url_list[i]
|
|
area_url = area_url_list[i]
|
|
area_name = area_name_list[i]
|
|
area_name = area_name_list[i]
|
|
- response_periphery = requests.get(url = area_url, headers = headers).content.decode('utf8')
|
|
|
|
|
|
+ response_periphery = requests.get(url = area_url, headers = headers ,proxies=proxies, timeout=10).content.decode('utf8')
|
|
time.sleep(random.uniform(3, 5))
|
|
time.sleep(random.uniform(3, 5))
|
|
soup = BeautifulSoup(response_periphery, 'html.parser')
|
|
soup = BeautifulSoup(response_periphery, 'html.parser')
|
|
filter_area_wrap = soup.find(class_="sel-content bank")
|
|
filter_area_wrap = soup.find(class_="sel-content bank")
|