mirror of
https://devops.liangqichi.top/qichi.liang/Orbitin.git
synced 2026-02-10 07:41:29 +08:00
76 lines
2.2 KiB
Python
76 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
从 Confluence 获取交接班日志并保存到数据库
|
|
"""
|
|
import argparse
|
|
import sys
|
|
|
|
from src.confluence import ConfluenceClient
|
|
from src.extractor import HTMLTextExtractor
|
|
from src.parser import HandoverLogParser
|
|
from src.database import DailyLogsDatabase
|
|
|
|
|
|
def run(save_db: bool = True):
|
|
"""运行主流程"""
|
|
# 配置
|
|
CONTENT_ID = '155764524'
|
|
BASE_URL = 'https://confluence.westwell-lab.com/rest/api'
|
|
TOKEN = 'NDE1NTcwMDE1ODQ0OiinqS5HLm12v2orWEYyjJcI1bl5'
|
|
|
|
print('正在从 Confluence 获取 HTML 内容...')
|
|
|
|
# 获取 HTML
|
|
client = ConfluenceClient(BASE_URL, TOKEN)
|
|
html = client.get_html(CONTENT_ID)
|
|
|
|
if not html:
|
|
print('错误:未获取到 HTML 内容')
|
|
sys.exit(1)
|
|
|
|
print('正在提取布局文本...')
|
|
|
|
# 提取文本
|
|
extractor = HTMLTextExtractor()
|
|
layout_text = extractor.extract(html)
|
|
|
|
print(f'\n提取完成,共 {len(layout_text)} 字符\n')
|
|
|
|
# 保存到文件(可选)
|
|
with open('layout_output.txt', 'w', encoding='utf-8') as f:
|
|
f.write(layout_text)
|
|
print('布局文本已保存到 layout_output.txt')
|
|
|
|
# 保存到数据库(可选)
|
|
if save_db:
|
|
print('\n正在解析日志数据...')
|
|
|
|
parser = HandoverLogParser()
|
|
logs = parser.parse(layout_text)
|
|
|
|
if not logs:
|
|
print('未解析到任何记录')
|
|
return
|
|
|
|
print(f'解析到 {len(logs)} 条记录')
|
|
|
|
db = DailyLogsDatabase()
|
|
count = db.insert_many([log.to_dict() for log in logs])
|
|
print(f'已保存 {count} 条记录到数据库')
|
|
|
|
stats = db.get_stats()
|
|
print(f'\n数据库统计:')
|
|
print(f' 总记录: {stats["total"]}')
|
|
print(f' 船次: {len(stats["ships"])}')
|
|
print(f' 日期范围: {stats["date_range"]["start"]} ~ {stats["date_range"]["end"]}')
|
|
|
|
db.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(description='从 Confluence 获取交接班日志')
|
|
parser.add_argument('--no-db', action='store_true', help='不保存到数据库')
|
|
args = parser.parse_args()
|
|
|
|
run(save_db=not args.no_db)
|