Refactor: modular main.py with selectable functions, save debug output to debug/ dir

2026-02-10 07:41:29 +08:00 · 2025-12-29 01:09:59 +08:00
parent 283a035ab1
commit 04b7b3bb36
4 changed files with 253 additions and 61 deletions
--- a/main.py
+++ b/main.py
@@ -1,75 +1,243 @@
 #!/usr/bin/env python3
 """
+码头作业日志管理工具
 从 Confluence 获取交接班日志并保存到数据库
 """
 import argparse
 import sys
+import os
+from datetime import datetime

 from src.confluence import ConfluenceClient
 from src.extractor import HTMLTextExtractor
 from src.parser import HandoverLogParser
 from src.database import DailyLogsDatabase
+from src.report import DailyReportGenerator
+
+# 配置
+CONF_BASE_URL = 'https://confluence.westwell-lab.com/rest/api'
+CONF_TOKEN = 'NDE1NTcwMDE1ODQ0OiinqS5HLm12v2orWEYyjJcI1bl5'
+CONF_CONTENT_ID = '155764524'
+
+DEBUG_DIR = 'debug'


-def run(save_db: bool = True):
-    """运行主流程"""
-    # 配置
-    CONTENT_ID = '155764524'
-    BASE_URL = 'https://confluence.westwell-lab.com/rest/api'
-    TOKEN = 'NDE1NTcwMDE1ODQ0OiinqS5HLm12v2orWEYyjJcI1bl5'
-    
+def ensure_debug_dir():
+    """确保debug目录存在"""
+    if not os.path.exists(DEBUG_DIR):
+        os.makedirs(DEBUG_DIR)
+
+
+def get_timestamp():
+    """获取时间戳用于文件名"""
+    return datetime.now().strftime('%Y%m%d_%H%M%S')
+
+
+def fetch_html():
+    """获取HTML内容"""
    print('正在从 Confluence 获取 HTML 内容...')
-    
-    # 获取 HTML
-    client = ConfluenceClient(BASE_URL, TOKEN)
-    html = client.get_html(CONTENT_ID)
-    
+    client = ConfluenceClient(CONF_BASE_URL, CONF_TOKEN)
+    html = client.get_html(CONF_CONTENT_ID)
    if not html:
        print('错误：未获取到 HTML 内容')
        sys.exit(1)
-    
+    print(f'获取成功，共 {len(html)} 字符')
+    return html
+
+
+def extract_text(html):
+    """提取布局文本"""
    print('正在提取布局文本...')
-    
-    # 提取文本
    extractor = HTMLTextExtractor()
    layout_text = extractor.extract(html)
+    print(f'提取完成，共 {len(layout_text)} 字符')
+    return layout_text
+
+
+def save_debug_file(content, suffix=''):
+    """保存调试文件到debug目录"""
+    ensure_debug_dir()
+    filename = f'layout_output{suffix}.txt' if suffix else 'layout_output.txt'
+    filepath = os.path.join(DEBUG_DIR, filename)
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.write(content)
+    print(f'已保存到 {filepath}')
+    return filepath
+
+
+def parse_logs(text):
+    """解析日志数据"""
+    print('正在解析日志数据...')
+    parser = HandoverLogParser()
+    logs = parser.parse(text)
+    print(f'解析到 {len(logs)} 条记录')
+    return logs
+
+
+def save_to_db(logs):
+    """保存到数据库"""
+    if not logs:
+        print('没有记录可保存')
+        return 0
    
-    print(f'\n提取完成，共 {len(layout_text)} 字符\n')
+    db = DailyLogsDatabase()
+    count = db.insert_many([log.to_dict() for log in logs])
+    print(f'已保存 {count} 条记录到数据库')
    
-    # 保存到文件（可选）
-    with open('layout_output.txt', 'w', encoding='utf-8') as f:
-        f.write(layout_text)
-    print('布局文本已保存到 layout_output.txt')
+    stats = db.get_stats()
+    print(f'\n数据库统计:')
+    print(f'  总记录: {stats["total"]}')
+    print(f'  船次: {len(stats["ships"])}')
+    print(f'  日期范围: {stats["date_range"]["start"]} ~ {stats["date_range"]["end"]}')
    
-    # 保存到数据库（可选）
-    if save_db:
-        print('\n正在解析日志数据...')
-        
-        parser = HandoverLogParser()
-        logs = parser.parse(layout_text)
-        
-        if not logs:
-            print('未解析到任何记录')
-            return
-        
-        print(f'解析到 {len(logs)} 条记录')
-        
-        db = DailyLogsDatabase()
-        count = db.insert_many([log.to_dict() for log in logs])
-        print(f'已保存 {count} 条记录到数据库')
-        
-        stats = db.get_stats()
-        print(f'\n数据库统计:')
-        print(f'  总记录: {stats["total"]}')
-        print(f'  船次: {len(stats["ships"])}')
-        print(f'  日期范围: {stats["date_range"]["start"]} ~ {stats["date_range"]["end"]}')
-        
-        db.close()
+    db.close()
+    return count
+
+
+def add_unaccounted(year_month, teu, note=''):
+    """添加未统计数据"""
+    db = DailyLogsDatabase()
+    result = db.insert_unaccounted(year_month, teu, note)
+    if result:
+        print(f'已添加 {year_month} 月未统计数据: {teu}TEU')
+    else:
+        print('添加失败')
+    db.close()
+
+
+def show_stats(date):
+    """显示指定日期的统计"""
+    g = DailyReportGenerator()
+    g.print_report(date)
+    g.close()
+
+
+def run_fetch():
+    """执行：获取HTML并提取文本"""
+    html = fetch_html()
+    text = extract_text(html)
+    save_debug_file(text)
+    return text
+
+
+def run_fetch_and_save():
+    """执行：获取、提取、解析、保存到数据库"""
+    text = run_fetch()
+    logs = parse_logs(text)
+    save_to_db(logs)
+
+
+def run_fetch_save_debug():
+    """执行：获取、提取、保存到debug目录"""
+    html = fetch_html()
+    text = extract_text(html)
+    suffix = f'_{get_timestamp()}'
+    save_debug_file(text, suffix)
+    return text
+
+
+def run_report(date=None):
+    """执行：生成日报"""
+    if not date:
+        date = datetime.now().strftime('%Y-%m-%d')
+    show_stats(date)
+
+
+def run_parser_test():
+    """执行：解析测试"""
+    ensure_debug_file_path = os.path.join(DEBUG_DIR, 'layout_output.txt')
+    if os.path.exists('layout_output.txt'):
+        filepath = 'layout_output.txt'
+    elif os.path.exists(ensure_debug_file_path):
+        filepath = ensure_debug_file_path
+    else:
+        print('未找到 layout_output.txt 文件')
+        return
+    
+    print(f'使用文件: {filepath}')
+    with open(filepath, 'r', encoding='utf-8') as f:
+        text = f.read()
+    
+    parser = HandoverLogParser()
+    logs = parser.parse(text)
+    print(f'解析到 {len(logs)} 条记录')
+    for log in logs[:5]:
+        print(f'  {log.date} {log.shift} {log.ship_name}: {log.teu}TEU')
+
+
+# 功能映射
+FUNCTIONS = {
+    'fetch': run_fetch,
+    'fetch-save': run_fetch_and_save,
+    'fetch-debug': run_fetch_save_debug,
+    'report': lambda: run_report(),
+    'report-today': lambda: run_report(datetime.now().strftime('%Y-%m-%d')),
+    'parse-test': run_parser_test,
+    'stats': lambda: show_stats(datetime.now().strftime('%Y-%m-%d')),
+}
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='码头作业日志管理工具',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''
+可选功能:
+  fetch        获取HTML并提取文本（保存到debug目录）
+  fetch-save   获取、提取、解析并保存到数据库
+  fetch-debug  获取、提取并保存带时间戳的debug文件
+  report       生成日报（默认今天）
+  report-today 生成今天日报
+  parse-test   解析测试（使用已有的layout_output.txt）
+  stats        显示今日统计
+
+示例:
+  python3 main.py fetch
+  python3 main.py fetch-save
+  python3 main.py report 2025-12-28
+  python3 main.py parse-test
+'''
+    )
+    parser.add_argument(
+        'function',
+        nargs='?',
+        default='fetch-save',
+        choices=list(FUNCTIONS.keys()),
+        help='要执行的功能 (默认: fetch-save)'
+    )
+    parser.add_argument(
+        'date',
+        nargs='?',
+        help='日期 (格式: YYYY-MM-DD)，用于 report 功能'
+    )
+    parser.add_argument(
+        '--unaccounted',
+        '-u',
+        metavar='TEU',
+        type=int,
+        help='添加未统计数据（需同时指定月份，如 -u 118 2025-12）'
+    )
+    parser.add_argument(
+        '--month',
+        '-m',
+        metavar='YEAR-MONTH',
+        help='指定月份（与 --unaccounted 配合使用）'
+    )
+    
+    args = parser.parse_args()
+    
+    # 添加未统计数据
+    if args.unaccounted:
+        year_month = args.month or datetime.now().strftime('%Y-%m')
+        add_unaccounted(year_month, args.unaccounted)
+        return
+    
+    # 执行功能
+    if args.function == 'report' and args.date:
+        run_report(args.date)
+    else:
+        FUNCTIONS[args.function]()


 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='从 Confluence 获取交接班日志')
-    parser.add_argument('--no-db', action='store_true', help='不保存到数据库')
-    args = parser.parse_args()
-    
-    run(save_db=not args.no_db)
+    main()