Orbitin/main.py

#!/usr/bin/env python3
"""
码头作业日志管理工具
从 Confluence 获取交接班日志并保存到数据库
"""
import argparse
import sys
import os
from datetime import datetime

from src.confluence import ConfluenceClient
from src.extractor import HTMLTextExtractor
from src.parser import HandoverLogParser
from src.database import DailyLogsDatabase
from src.report import DailyReportGenerator

# 加载环境变量
from dotenv import load_dotenv
load_dotenv()

# 配置（从环境变量读取）
CONF_BASE_URL = os.getenv('CONFLUENCE_BASE_URL')
CONF_TOKEN = os.getenv('CONFLUENCE_TOKEN')
CONF_CONTENT_ID = os.getenv('CONFLUENCE_CONTENT_ID')

# 飞书配置（可选）
FEISHU_BASE_URL = os.getenv('FEISHU_BASE_URL')
FEISHU_TOKEN = os.getenv('FEISHU_TOKEN')
FEISHU_SPREADSHEET_TOKEN = os.getenv('FEISHU_SPREADSHEET_TOKEN')

DEBUG_DIR = 'debug'


def ensure_debug_dir():
    """确保debug目录存在"""
    if not os.path.exists(DEBUG_DIR):
        os.makedirs(DEBUG_DIR)


def get_timestamp():
    """获取时间戳用于文件名"""
    return datetime.now().strftime('%Y%m%d_%H%M%S')


def fetch_html():
    """获取HTML内容"""
    if not CONF_BASE_URL or not CONF_TOKEN or not CONF_CONTENT_ID:
        print('错误：未配置 Confluence 信息，请检查 .env 文件')
        sys.exit(1)
    
    print('正在从 Confluence 获取 HTML 内容...')
    client = ConfluenceClient(CONF_BASE_URL, CONF_TOKEN)
    html = client.get_html(CONF_CONTENT_ID)
    if not html:
        print('错误：未获取到 HTML 内容')
        sys.exit(1)
    print(f'获取成功，共 {len(html)} 字符')
    return html


def extract_text(html):
    """提取布局文本"""
    print('正在提取布局文本...')
    extractor = HTMLTextExtractor()
    layout_text = extractor.extract(html)
    print(f'提取完成，共 {len(layout_text)} 字符')
    return layout_text


def save_debug_file(content, suffix=''):
    """保存调试文件到debug目录"""
    ensure_debug_dir()
    filename = f'layout_output{suffix}.txt' if suffix else 'layout_output.txt'
    filepath = os.path.join(DEBUG_DIR, filename)
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(content)
    print(f'已保存到 {filepath}')
    return filepath


def parse_logs(text):
    """解析日志数据"""
    print('正在解析日志数据...')
    parser = HandoverLogParser()
    logs = parser.parse(text)
    print(f'解析到 {len(logs)} 条记录')
    return logs


def save_to_db(logs):
    """保存到数据库"""
    if not logs:
        print('没有记录可保存')
        return 0
    
    db = DailyLogsDatabase()
    count = db.insert_many([log.to_dict() for log in logs])
    print(f'已保存 {count} 条记录到数据库')
    
    stats = db.get_stats()
    print(f'\n数据库统计:')
    print(f'  总记录: {stats["total"]}')
    print(f'  船次: {len(stats["ships"])}')
    print(f'  日期范围: {stats["date_range"]["start"]} ~ {stats["date_range"]["end"]}')
    
    db.close()
    return count


def add_unaccounted(year_month, teu, note=''):
    """添加未统计数据"""
    db = DailyLogsDatabase()
    result = db.insert_unaccounted(year_month, teu, note)
    if result:
        print(f'已添加 {year_month} 月未统计数据: {teu}TEU')
    else:
        print('添加失败')
    db.close()


def show_stats(date):
    """显示指定日期的统计"""
    g = DailyReportGenerator()
    g.print_report(date)
    g.close()


def run_fetch():
    """执行：获取HTML并提取文本"""
    html = fetch_html()
    text = extract_text(html)
    save_debug_file(text)
    return text


def run_fetch_and_save():
    """执行：获取、提取、解析、保存到数据库"""
    text = run_fetch()
    logs = parse_logs(text)
    save_to_db(logs)


def run_fetch_save_debug():
    """执行：获取、提取、保存到debug目录"""
    html = fetch_html()
    text = extract_text(html)
    suffix = f'_{get_timestamp()}'
    save_debug_file(text, suffix)
    return text


def run_report(date=None):
    """执行：生成日报"""
    if not date:
        date = datetime.now().strftime('%Y-%m-%d')
    show_stats(date)


def run_parser_test():
    """执行：解析测试"""
    ensure_debug_file_path = os.path.join(DEBUG_DIR, 'layout_output.txt')
    if os.path.exists('layout_output.txt'):
        filepath = 'layout_output.txt'
    elif os.path.exists(ensure_debug_file_path):
        filepath = ensure_debug_file_path
    else:
        print('未找到 layout_output.txt 文件')
        return
    
    print(f'使用文件: {filepath}')
    with open(filepath, 'r', encoding='utf-8') as f:
        text = f.read()
    
    parser = HandoverLogParser()
    logs = parser.parse(text)
    print(f'解析到 {len(logs)} 条记录')
    for log in logs[:5]:
        print(f'  {log.date} {log.shift} {log.ship_name}: {log.teu}TEU')


# 功能映射
FUNCTIONS = {
    'fetch': run_fetch,
    'fetch-save': run_fetch_and_save,
    'fetch-debug': run_fetch_save_debug,
    'report': lambda: run_report(),
    'report-today': lambda: run_report(datetime.now().strftime('%Y-%m-%d')),
    'parse-test': run_parser_test,
    'stats': lambda: show_stats(datetime.now().strftime('%Y-%m-%d')),
}


def main():
    parser = argparse.ArgumentParser(
        description='码头作业日志管理工具',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog='''
可选功能:
  fetch        获取HTML并提取文本（保存到debug目录）
  fetch-save   获取、提取、解析并保存到数据库
  fetch-debug  获取、提取并保存带时间戳的debug文件
  report       生成日报（默认今天）
  report-today 生成今日日报
  parse-test   解析测试（使用已有的layout_output.txt）
  stats        显示今日统计

示例:
  python3 main.py fetch
  python3 main.py fetch-save
  python3 main.py report 2025-12-28
  python3 main.py parse-test
'''
    )
    parser.add_argument(
        'function',
        nargs='?',
        default='fetch-save',
        choices=list(FUNCTIONS.keys()),
        help='要执行的功能 (默认: fetch-save)'
    )
    parser.add_argument(
        'date',
        nargs='?',
        help='日期 (格式: YYYY-MM-DD)，用于 report 功能'
    )
    parser.add_argument(
        '--unaccounted',
        '-u',
        metavar='TEU',
        type=int,
        help='添加未统计数据（需同时指定月份，如 -u 118 2025-12）'
    )
    parser.add_argument(
        '--month',
        '-m',
        metavar='YEAR-MONTH',
        help='指定月份（与 --unaccounted 配合使用）'
    )
    
    args = parser.parse_args()
    
    # 添加未统计数据
    if args.unaccounted:
        year_month = args.month or datetime.now().strftime('%Y-%m')
        add_unaccounted(year_month, args.unaccounted)
        return
    
    # 执行功能
    if args.function == 'report' and args.date:
        run_report(args.date)
    else:
        FUNCTIONS[args.function]()


if __name__ == '__main__':
    main()
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
+								#!/usr/bin/env python3
 								"""
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								码头作业日志管理工具
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
+								从 Confluence 获取交接班日志并保存到数据库
 								"""
 								import argparse
 								import sys
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								import os
 								from datetime import datetime
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
 								from src.confluence import ConfluenceClient
 								from src.extractor import HTMLTextExtractor
 								from src.parser import HandoverLogParser
 								from src.database import DailyLogsDatabase
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								from src.report import DailyReportGenerator
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
-												Add .env configuration for Confluence settings

											
										
										
											2025-12-29 01:15:57 +08:00
+								# 加载环境变量
 								from dotenv import load_dotenv
 								load_dotenv()
 								# 配置（从环境变量读取）
 								CONF_BASE_URL = os.getenv('CONFLUENCE_BASE_URL')
 								CONF_TOKEN = os.getenv('CONFLUENCE_TOKEN')
 								CONF_CONTENT_ID = os.getenv('CONFLUENCE_CONTENT_ID')
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
-												feat: 添加飞书表格模块支持排班人员信息获取

- 新增 src/feishu_v2.py: 飞书表格API客户端，支持数据库存储和2026年全年排班表
- 新增 src/schedule_database.py: 排班信息数据库模块，用于缓存排班数据
- 新增 docs/feishu_data_flow.md: 飞书数据流文档
- 新增 plans/feishu_scheduling_plan.md: 飞书排班表模块设计文档
- 更新 src/report.py: 使用新的飞书模块获取排班人员信息
- 更新 src/gui.py: 启动时自动获取新数据，添加auto_fetch_data方法
- 更新 .env.example: 添加飞书配置示例
- 更新 AGENTS.md: 更新项目文档
- 更新 main.py: 集成飞书模块

功能特性:
1. 支持从飞书表格获取排班人员信息
2. 支持2025年月度表格和2026年全年排班表
3. 使用SQLite数据库缓存，减少API调用
4. 自动检测表格更新
5. GUI启动时自动获取最新数据
6. 日报中正确显示次日班次人员信息

											
										
										
											2025-12-31 00:03:34 +08:00
+								# 飞书配置（可选）
 								FEISHU_BASE_URL = os.getenv('FEISHU_BASE_URL')
 								FEISHU_TOKEN = os.getenv('FEISHU_TOKEN')
 								FEISHU_SPREADSHEET_TOKEN = os.getenv('FEISHU_SPREADSHEET_TOKEN')
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								DEBUG_DIR = 'debug'
 								def ensure_debug_dir():
 								    """确保debug目录存在"""
 								    if not os.path.exists(DEBUG_DIR):
 								        os.makedirs(DEBUG_DIR)
 								def get_timestamp():
 								    """获取时间戳用于文件名"""
 								    return datetime.now().strftime('%Y%m%d_%H%M%S')
 								def fetch_html():
 								    """获取HTML内容"""
-												Add .env configuration for Confluence settings

											
										
										
											2025-12-29 01:15:57 +08:00
+								    if not CONF_BASE_URL or not CONF_TOKEN or not CONF_CONTENT_ID:
 								        print('错误：未配置 Confluence 信息，请检查 .env 文件')
 								        sys.exit(1)
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
+								    print('正在从 Confluence 获取 HTML 内容...')
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								    client = ConfluenceClient(CONF_BASE_URL, CONF_TOKEN)
 								    html = client.get_html(CONF_CONTENT_ID)
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
+								    if not html:
 								        print('错误：未获取到 HTML 内容')
 								        sys.exit(1)
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								    print(f'获取成功，共 {len(html)} 字符')
 								    return html
 								def extract_text(html):
 								    """提取布局文本"""
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
+								    print('正在提取布局文本...')
 								    extractor = HTMLTextExtractor()
 								    layout_text = extractor.extract(html)
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								    print(f'提取完成，共 {len(layout_text)} 字符')
 								    return layout_text
 								def save_debug_file(content, suffix=''):
 								    """保存调试文件到debug目录"""
 								    ensure_debug_dir()
 								    filename = f'layout_output{suffix}.txt' if suffix else 'layout_output.txt'
 								    filepath = os.path.join(DEBUG_DIR, filename)
 								    with open(filepath, 'w', encoding='utf-8') as f:
 								        f.write(content)
 								    print(f'已保存到 {filepath}')
 								    return filepath
 								def parse_logs(text):
 								    """解析日志数据"""
 								    print('正在解析日志数据...')
 								    parser = HandoverLogParser()
 								    logs = parser.parse(text)
 								    print(f'解析到 {len(logs)} 条记录')
 								    return logs
 								def save_to_db(logs):
 								    """保存到数据库"""
 								    if not logs:
 								        print('没有记录可保存')
 								        return 0
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								    db = DailyLogsDatabase()
 								    count = db.insert_many([log.to_dict() for log in logs])
 								    print(f'已保存 {count} 条记录到数据库')
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								    stats = db.get_stats()
 								    print(f'\n数据库统计:')
 								    print(f'  总记录: {stats["total"]}')
 								    print(f'  船次: {len(stats["ships"])}')
 								    print(f'  日期范围: {stats["date_range"]["start"]} ~ {stats["date_range"]["end"]}')
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								    db.close()
 								    return count
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								def add_unaccounted(year_month, teu, note=''):
 								    """添加未统计数据"""
 								    db = DailyLogsDatabase()
 								    result = db.insert_unaccounted(year_month, teu, note)
 								    if result:
 								        print(f'已添加 {year_month} 月未统计数据: {teu}TEU')
 								    else:
 								        print('添加失败')
 								    db.close()
 								def show_stats(date):
 								    """显示指定日期的统计"""
 								    g = DailyReportGenerator()
 								    g.print_report(date)
 								    g.close()
 								def run_fetch():
 								    """执行：获取HTML并提取文本"""
 								    html = fetch_html()
 								    text = extract_text(html)
 								    save_debug_file(text)
 								    return text
 								def run_fetch_and_save():
 								    """执行：获取、提取、解析、保存到数据库"""
 								    text = run_fetch()
 								    logs = parse_logs(text)
 								    save_to_db(logs)
 								def run_fetch_save_debug():
 								    """执行：获取、提取、保存到debug目录"""
 								    html = fetch_html()
 								    text = extract_text(html)
 								    suffix = f'_{get_timestamp()}'
 								    save_debug_file(text, suffix)
 								    return text
 								def run_report(date=None):
 								    """执行：生成日报"""
 								    if not date:
 								        date = datetime.now().strftime('%Y-%m-%d')
 								    show_stats(date)
 								def run_parser_test():
 								    """执行：解析测试"""
 								    ensure_debug_file_path = os.path.join(DEBUG_DIR, 'layout_output.txt')
 								    if os.path.exists('layout_output.txt'):
 								        filepath = 'layout_output.txt'
 								    elif os.path.exists(ensure_debug_file_path):
 								        filepath = ensure_debug_file_path
 								    else:
 								        print('未找到 layout_output.txt 文件')
 								        return
 								    print(f'使用文件: {filepath}')
 								    with open(filepath, 'r', encoding='utf-8') as f:
 								        text = f.read()
 								    parser = HandoverLogParser()
 								    logs = parser.parse(text)
 								    print(f'解析到 {len(logs)} 条记录')
 								    for log in logs[:5]:
 								        print(f'  {log.date} {log.shift} {log.ship_name}: {log.teu}TEU')
 								# 功能映射
 								FUNCTIONS = {
 								    'fetch': run_fetch,
 								    'fetch-save': run_fetch_and_save,
 								    'fetch-debug': run_fetch_save_debug,
 								    'report': lambda: run_report(),
 								    'report-today': lambda: run_report(datetime.now().strftime('%Y-%m-%d')),
 								    'parse-test': run_parser_test,
 								    'stats': lambda: show_stats(datetime.now().strftime('%Y-%m-%d')),
 								}
 								def main():
 								    parser = argparse.ArgumentParser(
 								        description='码头作业日志管理工具',
 								        formatter_class=argparse.RawDescriptionHelpFormatter,
 								        epilog='''
 								可选功能:
 								  fetch        获取HTML并提取文本（保存到debug目录）
 								  fetch-save   获取、提取、解析并保存到数据库
 								  fetch-debug  获取、提取并保存带时间戳的debug文件
 								  report       生成日报（默认今天）
-												Add .env configuration for Confluence settings

											
										
										
											2025-12-29 01:15:57 +08:00
+								  report-today 生成今日日报
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								  parse-test   解析测试（使用已有的layout_output.txt）
 								  stats        显示今日统计
 								示例:
 								  python3 main.py fetch
 								  python3 main.py fetch-save
 								  python3 main.py report 2025-12-28
 								  python3 main.py parse-test
 								'''
 								    )
 								    parser.add_argument(
 								        'function',
 								        nargs='?',
 								        default='fetch-save',
 								        choices=list(FUNCTIONS.keys()),
 								        help='要执行的功能 (默认: fetch-save)'
 								    )
 								    parser.add_argument(
 								        'date',
 								        nargs='?',
 								        help='日期 (格式: YYYY-MM-DD)，用于 report 功能'
 								    )
 								    parser.add_argument(
 								        '--unaccounted',
 								        '-u',
 								        metavar='TEU',
 								        type=int,
 								        help='添加未统计数据（需同时指定月份，如 -u 118 2025-12）'
 								    )
 								    parser.add_argument(
 								        '--month',
 								        '-m',
 								        metavar='YEAR-MONTH',
 								        help='指定月份（与 --unaccounted 配合使用）'
 								    )
-												refactor: 模块化重构项目结构

											
										
										
											2025-12-28 23:31:22 +08:00
+								    args = parser.parse_args()
-												Refactor: modular main.py with selectable functions, save debug output to debug/ dir

											
										
										
											2025-12-29 01:09:59 +08:00
+								    # 添加未统计数据
 								    if args.unaccounted:
 								        year_month = args.month or datetime.now().strftime('%Y-%m')
 								        add_unaccounted(year_month, args.unaccounted)
 								        return
 								    # 执行功能
 								    if args.function == 'report' and args.date:
 								        run_report(args.date)
 								    else:
 								        FUNCTIONS[args.function]()
 								if __name__ == '__main__':
 								    main()