# -*- encoding: utf-8 -*- ''' @File : spider.py @Time : 2024/08/29 15:37:35 @Author : Zhangziheng ''' import re import requests from bs4 import BeautifulSoup from .conf_spider import * from .ext_logger import logger def pageDeep() -> int: response = requests.get(url, headers=headers) response.encoding = "utf-8" bat_page_index = r"page_div\',([\d]{0,4})," try: _data = re.search(bat_page_index, response.text).group(1) return int(_data) except Exception as e: return 10 def fetch_news(urls): news_list = [] for url in urls: try: response = requests.get(url, headers=headers) response.encoding = "utf-8" # 解决乱码问题 response.raise_for_status() # 如果请求失败,将抛出异常 soup = BeautifulSoup(response.text, 'html.parser') ul = soup.find("ul", id="list") for li in ul.find_all('li'): _a = li.find("a") _span = li.find("span") date = _span.text link = _a.get("href") title = _a.text news_list.append({'title': title, 'link': link, 'date': date}) except requests.RequestException as e: logger.error(f'Request failed for {url}: {e}') except Exception as e: logger.error(f'An error occurred for {url}: {e}') return news_list