Эх сурвалжийг харах

中国证券监督管理委员会——源文件

Cloudmistery 8 сар өмнө
parent
commit
ed0f3f990d

+ 14 - 0
.gitignore

@@ -0,0 +1,14 @@
+__pycache__
+.VSCodeCounter
+*.json
+venv
+migrations
+log
+test*
+mysqlBackUp
+.DS_Store
+*.xml
+Dockerfile
+.idea
+*.html
+.vscode

+ 16 - 0
app/__init__.py

@@ -0,0 +1,16 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   __init__.py
+@Time    :   2024/08/29 15:36:35
+@Author  :   Zhangziheng 
+'''
+from .route.main import api_blueprint
+from flask import Flask
+from flask_sqlalchemy import SQLAlchemy
+
+app = Flask(__name__)
+db = SQLAlchemy(app)
+db.init_app(app=app)
+
+
+app.register_blueprint(api_blueprint)

+ 8 - 0
app/ext_db.py

@@ -0,0 +1,8 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   ext_db.py
+@Time    :   2024/08/29 16:00:32
+@Author  :   Zhangziheng 
+'''
+
+from app import db

+ 32 - 0
app/route/main.py

@@ -0,0 +1,32 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   main.py
+@Time    :   2024/08/29 15:47:26
+@Author  :   Zhangziheng 
+'''
+
+
+from flask import Blueprint, request, jsonify
+from ..utils.conf_spider import wangye_name
+
+api_blueprint = Blueprint('mainRoute', __name__, url_prefix='/')
+
+@api_blueprint.route("/")
+def shouye():
+    return f"欢迎访问{wangye_name}!"
+
+
+@api_blueprint.route("/data/add", methods=['POST'])
+def add_data():
+
+    req_json = request.json
+    page_index = req_json.get("pageDeep")
+    # 爬虫的具体功能
+
+    item = {
+        "code": 200,
+        "msg": "success",
+        "data": []
+    }
+
+    return jsonify(item)

+ 6 - 0
app/setting.py

@@ -0,0 +1,6 @@
+MYSQL_HOST = "117.72.33.120"
+MYSQL_PORT = 3306
+MYSQL_DB = "czyc_mysql"
+MYSQL_USER = "root"
+MYSQL_PASS = "zzh9472"
+SQLALCHEMY_DATABASE_URI = f'mysql+pymysql://{MYSQL_USER}:{MYSQL_PASS}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DB}?charset=utf8mb4'

+ 11 - 0
app/utils/conf_spider.py

@@ -0,0 +1,11 @@
+wangye_name = "中国证券监督管理委员会_时政要闻"
+
+headers = {
+    'accept': 'text / html, application / xhtml + xml, application / xml;q = 0.9, image / avif, image / webp, image / apng, * / *;q = 0.8, application / signed - exchange;v = b3;q = 0.7',
+    'accept - encoding': 'gzip, deflate',
+    'accept - language': 'zh - CN, zh;q = 0.9, en;q = 0.8, en - GB;q = 0.7, en - US;q = 0.6',
+    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0'
+}
+
+urls_template = 'http://www.csrc.gov.cn/csrc/c100027/common_list_{}.shtml'
+url = 'http://www.csrc.gov.cn/csrc/c100027/common_list.shtml'

+ 11 - 0
app/utils/ext_logger.py

@@ -0,0 +1,11 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   ext_logger.py
+@Time    :   2024/08/29 15:40:03
+@Author  :   Zhangziheng 
+'''
+
+from .logger import createLogger
+
+
+logger = createLogger(__name__)

+ 14 - 0
app/utils/logger.py

@@ -0,0 +1,14 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   logger.py
+@Time    :   2024/08/29 15:36:48
+@Author  :   Zhangziheng 
+'''
+
+import logging
+
+
+def createLogger(name: str) -> logging.Logger:
+    datefmt = '%Y-%m-%d'
+    logging.basicConfig(level=logging.INFO, datefmt=datefmt,format='[%(asctime)s]-[%(name)s]-[%(levelname)s]::[%(message)s]')
+    return logging.getLogger(name)

+ 16 - 0
app/utils/models.py

@@ -0,0 +1,16 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   models.py
+@Time    :   2024/08/29 15:40:42
+@Author  :   Zhangziheng 
+'''
+
+from ..ext_db import db
+
+
+class User(db.Model):
+    __tablename__ = "user_czyc"
+    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
+    date = db.Column(db.String(1000), nullable=False)
+    link = db.Column(db.String(1000), nullable=False)
+    title = db.Column(db.String(1000), nullable=False)

+ 51 - 0
app/utils/spider.py

@@ -0,0 +1,51 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   spider.py
+@Time    :   2024/08/29 15:37:35
+@Author  :   Zhangziheng 
+'''
+
+import re
+
+import requests
+from bs4 import BeautifulSoup
+
+from .conf_spider import *
+from .ext_logger import logger
+
+
+def pageDeep() -> int:
+    response = requests.get(url, headers=headers)
+    response.encoding = "utf-8"
+    bat_page_index = r"page_div\',([\d]{0,4}),"
+    try:
+        _data = re.search(bat_page_index, response.text).group(1)
+        return int(_data)
+    except Exception as e:
+        return 10
+
+
+def fetch_news(urls):
+    news_list = []
+    for url in urls:
+        try:
+            response = requests.get(url, headers=headers)
+            response.encoding = "utf-8"  # 解决乱码问题
+            response.raise_for_status()  # 如果请求失败,将抛出异常
+            soup = BeautifulSoup(response.text, 'html.parser')
+            ul = soup.find("ul", id="list")
+
+            for li in ul.find_all('li'):
+                _a = li.find("a")
+                _span = li.find("span")
+
+                date = _span.text
+                link = _a.get("href")
+                title = _a.text
+                news_list.append({'title': title, 'link': link, 'date': date})
+
+        except requests.RequestException as e:
+            logger.error(f'Request failed for {url}: {e}')
+        except Exception as e:
+            logger.error(f'An error occurred for {url}: {e}')
+    return news_list

+ 11 - 0
run.py

@@ -0,0 +1,11 @@
+# -*- encoding: utf-8 -*-
+'''
+@File    :   run.py
+@Time    :   2024/08/29 15:41:44
+@Author  :   Zhangziheng 
+'''
+
+from app import app
+
+if __name__ == '__main__':
+    app.run(port=5200)