Files
Gloria/export_data.py

197 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""
船舶数据导出脚本
用法:
python export_data.py --format json --output ship_data.json
python export_data.py --format csv --output ship_data.csv
python export_data.py --analysis # 打印数据分析报告
"""
import argparse
import json
import csv
import asyncio
import sys
from datetime import datetime
from pathlib import Path
# 添加项目根目录到路径
sys.path.insert(0, str(Path(__file__).parent))
from main import get_table_data, get_data_analysis
from services.cache import cache
async def export_to_json(output_path: str):
"""导出数据到JSON文件"""
print("正在获取表格数据...")
table_data = await get_table_data()
data_dict = {
"export_time": datetime.now().isoformat(),
"total_records": table_data.total_records,
"fields": table_data.fields,
"records_with_data": table_data.records_with_data,
"records_without_data": table_data.records_without_data,
"data": [row.model_dump() for row in table_data.data]
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data_dict, f, ensure_ascii=False, indent=2)
print(f"✓ 数据已导出到: {output_path}")
print(f" - 总记录数: {table_data.total_records}")
print(f" - 有业务数据: {table_data.records_with_data}")
print(f" - 无业务数据: {table_data.records_without_data}")
async def export_to_csv(output_path: str):
"""导出数据到CSV文件"""
print("正在获取表格数据...")
table_data = await get_table_data()
with open(output_path, 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
# 写入表头
headers = [
"船舶代码", "船名", "报告日期", "月份", "作业时间", "作业类型",
"TEU", "Moves", "毛效率", "净效率",
"故障次数", "故障率", "人工介入次数", "人工介入率",
"页面ID", "页面URL", "是否有完整数据"
]
writer.writerow(headers)
# 写入数据行
for row in table_data.data:
writer.writerow([
row.ship_code,
row.ship_name or "",
row.report_date or "",
row.month or "",
row.operation_time or "",
row.operation_type or "",
row.teu if row.teu is not None else "",
row.moves if row.moves is not None else "",
row.gross_efficiency if row.gross_efficiency is not None else "",
row.net_efficiency if row.net_efficiency is not None else "",
row.fault_count if row.fault_count is not None else "",
row.fault_rate if row.fault_rate is not None else "",
row.manual_intervention_count if row.manual_intervention_count is not None else "",
row.manual_intervention_rate if row.manual_intervention_rate is not None else "",
row.page_id,
row.page_url,
"" if row.has_complete_data else ""
])
print(f"✓ 数据已导出到: {output_path}")
print(f" - 总记录数: {table_data.total_records}")
print(f" - 有业务数据: {table_data.records_with_data}")
print(f" - 无业务数据: {table_data.records_without_data}")
async def print_analysis():
"""打印数据分析报告"""
print("正在生成数据分析报告...\n")
analysis = await get_data_analysis()
print("=" * 60)
print("船舶数据分析报告")
print("=" * 60)
print(f"\n📊 总体统计:")
print(f" - 船舶总数: {analysis.total_ships}")
print(f" - 有TEU数据: {analysis.ships_with_teu} ({analysis.ships_with_teu/analysis.total_ships*100:.1f}%)")
print(f" - 有Moves数据: {analysis.ships_with_moves} ({analysis.ships_with_moves/analysis.total_ships*100:.1f}%)")
print(f" - 有效率数据: {analysis.ships_with_efficiency} ({analysis.ships_with_efficiency/analysis.total_ships*100:.1f}%)")
print(f" - 有故障数据: {analysis.ships_with_faults} ({analysis.ships_with_faults/analysis.total_ships*100:.1f}%)")
print(f" - 有人工介入数据: {analysis.ships_with_manual} ({analysis.ships_with_manual/analysis.total_ships*100:.1f}%)")
print(f"\n📅 月份分布:")
for month, count in sorted(analysis.monthly_distribution.items()):
print(f" - {month}: {count} 条记录")
print(f"\n⚓ 作业类型分布:")
for op_type, count in analysis.operation_type_distribution.items():
print(f" - {op_type}: {count} 条记录")
if analysis.teu_stats.get('total'):
print(f"\n📦 TEU统计:")
print(f" - 最小值: {analysis.teu_stats['min']:.2f}")
print(f" - 最大值: {analysis.teu_stats['max']:.2f}")
print(f" - 平均值: {analysis.teu_stats['avg']:.2f}")
print(f" - 总计: {analysis.teu_stats['total']:.2f}")
if analysis.moves_stats.get('total'):
print(f"\n🔄 Moves统计:")
print(f" - 最小值: {analysis.moves_stats['min']:.2f}")
print(f" - 最大值: {analysis.moves_stats['max']:.2f}")
print(f" - 平均值: {analysis.moves_stats['avg']:.2f}")
print(f" - 总计: {analysis.moves_stats['total']:.2f}")
if analysis.efficiency_stats.get('avg'):
print(f"\n⚡ 效率统计:")
print(f" - 最小值: {analysis.efficiency_stats['min']:.2f}")
print(f" - 最大值: {analysis.efficiency_stats['max']:.2f}")
print(f" - 平均值: {analysis.efficiency_stats['avg']:.2f}")
print("\n" + "=" * 60)
print("可用字段列表:")
print("=" * 60)
fields = [
("ship_code", "船舶代码"),
("ship_name", "船名"),
("report_date", "报告日期"),
("month", "所属月份"),
("operation_time", "作业时间"),
("operation_type", "作业类型"),
("teu", "作业箱量 (TEU)"),
("moves", "作业循环 (move)"),
("gross_efficiency", "作业毛效率 (move/车/小时)"),
("net_efficiency", "作业净效率 (move/车/小时)"),
("fault_count", "故障次数"),
("fault_rate", "故障率 (%)"),
("manual_intervention_count", "人工介入次数"),
("manual_intervention_rate", "人工介入率 (%)"),
("page_id", "Confluence页面ID"),
("page_url", "Confluence页面URL"),
]
for field, desc in fields:
print(f" - {field}: {desc}")
print("\n")
async def main():
parser = argparse.ArgumentParser(description="船舶数据导出工具")
parser.add_argument("--format", choices=["json", "csv"], default="json",
help="导出格式 (默认: json)")
parser.add_argument("--output", type=str, default=None,
help="输出文件路径")
parser.add_argument("--analysis", action="store_true",
help="打印数据分析报告")
args = parser.parse_args()
if args.analysis:
await print_analysis()
return
# 设置默认输出文件名
if not args.output:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
args.output = f"ship_data_{timestamp}.{args.format}"
# 执行导出
if args.format == "json":
await export_to_json(args.output)
else:
await export_to_csv(args.output)
if __name__ == "__main__":
asyncio.run(main())