refactor: 模块化重构项目结构
This commit is contained in:
75
fetch_and_process.py
Normal file
75
fetch_and_process.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
从 Confluence 获取交接班日志并保存到数据库
|
||||
"""
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from src.confluence import ConfluenceClient
|
||||
from src.extractor import HTMLTextExtractor
|
||||
from src.parser import HandoverLogParser
|
||||
from src.database import DailyLogsDatabase
|
||||
|
||||
|
||||
def run(save_db: bool = True):
|
||||
"""运行主流程"""
|
||||
# 配置
|
||||
CONTENT_ID = '155764524'
|
||||
BASE_URL = 'https://confluence.westwell-lab.com/rest/api'
|
||||
TOKEN = 'NDE1NTcwMDE1ODQ0OiinqS5HLm12v2orWEYyjJcI1bl5'
|
||||
|
||||
print('正在从 Confluence 获取 HTML 内容...')
|
||||
|
||||
# 获取 HTML
|
||||
client = ConfluenceClient(BASE_URL, TOKEN)
|
||||
html = client.get_html(CONTENT_ID)
|
||||
|
||||
if not html:
|
||||
print('错误:未获取到 HTML 内容')
|
||||
sys.exit(1)
|
||||
|
||||
print('正在提取布局文本...')
|
||||
|
||||
# 提取文本
|
||||
extractor = HTMLTextExtractor()
|
||||
layout_text = extractor.extract(html)
|
||||
|
||||
print(f'\n提取完成,共 {len(layout_text)} 字符\n')
|
||||
|
||||
# 保存到文件(可选)
|
||||
with open('layout_output.txt', 'w', encoding='utf-8') as f:
|
||||
f.write(layout_text)
|
||||
print('布局文本已保存到 layout_output.txt')
|
||||
|
||||
# 保存到数据库(可选)
|
||||
if save_db:
|
||||
print('\n正在解析日志数据...')
|
||||
|
||||
parser = HandoverLogParser()
|
||||
logs = parser.parse(layout_text)
|
||||
|
||||
if not logs:
|
||||
print('未解析到任何记录')
|
||||
return
|
||||
|
||||
print(f'解析到 {len(logs)} 条记录')
|
||||
|
||||
db = DailyLogsDatabase()
|
||||
count = db.insert_many([log.to_dict() for log in logs])
|
||||
print(f'已保存 {count} 条记录到数据库')
|
||||
|
||||
stats = db.get_stats()
|
||||
print(f'\n数据库统计:')
|
||||
print(f' 总记录: {stats["total"]}')
|
||||
print(f' 船次: {len(stats["ships"])}')
|
||||
print(f' 日期范围: {stats["date_range"]["start"]} ~ {stats["date_range"]["end"]}')
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='从 Confluence 获取交接班日志')
|
||||
parser.add_argument('--no-db', action='store_true', help='不保存到数据库')
|
||||
args = parser.parse_args()
|
||||
|
||||
run(save_db=not args.no_db)
|
||||
Reference in New Issue
Block a user