Files
Orbitin/src/parser.py

173 lines
5.6 KiB
Python
Raw Normal View History

2025-12-28 23:31:22 +08:00
#!/usr/bin/env python3
"""
日志解析模块
"""
import re
from typing import List, Dict, Optional
from dataclasses import dataclass
@dataclass
class ShipLog:
"""船次日志数据类"""
date: str
shift: str
ship_name: str
teu: Optional[int] = None
efficiency: Optional[float] = None
vehicles: Optional[int] = None
def to_dict(self) -> Dict:
"""转换为字典"""
return {
'date': self.date,
'shift': self.shift,
'ship_name': self.ship_name,
'teu': self.teu,
'efficiency': self.efficiency,
'vehicles': self.vehicles
}
class HandoverLogParser:
"""交接班日志解析器"""
SEPARATOR = '———————————————————————————————————————————————'
def __init__(self):
"""初始化解析器"""
pass
@staticmethod
def parse_date(date_str: str) -> str:
"""解析日期字符串"""
try:
parts = date_str.split('.')
if len(parts) == 3:
return f"{parts[0]}-{parts[1]}-{parts[2]}"
return date_str
except Exception:
return date_str
def parse(self, text: str) -> List[ShipLog]:
"""
解析日志文本
参数:
text: 日志文本
返回:
船次日志列表已合并同日期同班次同船名的记录
2025-12-28 23:31:22 +08:00
"""
logs = []
blocks = text.split(self.SEPARATOR)
for block in blocks:
if not block.strip() or '日期:' not in block:
continue
# 解析日期
date_match = re.search(r'日期:(\d{4}\.\d{2}\.\d{2})', block)
if not date_match:
continue
date = self.parse_date(date_match.group(1))
self._parse_block(block, date, logs)
# 合并同日期同班次同船名的记录累加TEU
merged = {}
for log in logs:
key = (log.date, log.shift, log.ship_name)
if key not in merged:
merged[key] = ShipLog(
date=log.date,
shift=log.shift,
ship_name=log.ship_name,
teu=log.teu,
efficiency=log.efficiency,
vehicles=log.vehicles
)
else:
# 累加TEU
if log.teu:
if merged[key].teu is None:
merged[key].teu = log.teu
else:
merged[key].teu += log.teu
# 累加车辆数
if log.vehicles:
if merged[key].vehicles is None:
merged[key].vehicles = log.vehicles
else:
merged[key].vehicles += log.vehicles
return list(merged.values())
2025-12-28 23:31:22 +08:00
def _parse_block(self, block: str, date: str, logs: List[ShipLog]):
"""解析日期块"""
for shift in ['白班', '夜班']:
shift_pattern = f'{shift}'
if shift_pattern not in block:
continue
shift_start = block.find(shift_pattern) + len(shift_pattern)
# 只找到下一个班次作为边界,不限制"注意事项:"
2025-12-28 23:31:22 +08:00
next_pos = len(block)
for next_shift in ['白班', '夜班']:
if next_shift != shift:
pos = block.find(f'{next_shift}', shift_start)
if pos != -1 and pos < next_pos:
next_pos = pos
shift_content = block[shift_start:next_pos]
self._parse_ships(shift_content, date, shift, logs)
def _parse_ships(self, content: str, date: str, shift: str, logs: List[ShipLog]):
"""解析船次"""
parts = content.split('实船作业:')
for part in parts:
if not part.strip():
continue
cleaned = part.replace('\xa0', ' ').strip()
# 匹配 "xxx# 船名" 格式(船号和船名分开)
ship_match = re.search(r'(\d+)#\s*(\S+)', cleaned)
2025-12-28 23:31:22 +08:00
if not ship_match:
continue
# 船名只取纯船名去掉xx#前缀和二次靠泊等标注)
ship_name = ship_match.group(2)
# 移除二次靠泊等标注
ship_name = re.sub(r'(二次靠泊)|(再次靠泊)|\(二次靠泊\)|\(再次靠泊\)', '', ship_name).strip()
2025-12-28 23:31:22 +08:00
vehicles_match = re.search(r'上场车辆数:(\d+)', cleaned)
teu_eff_match = re.search(
r'作业量/效率:(\d+)TEU[,\s]*', cleaned
2025-12-28 23:31:22 +08:00
)
log = ShipLog(
date=date,
shift=shift,
ship_name=ship_name,
teu=int(teu_eff_match.group(1)) if teu_eff_match else None,
efficiency=None,
2025-12-28 23:31:22 +08:00
vehicles=int(vehicles_match.group(1)) if vehicles_match else None
)
logs.append(log)
if __name__ == '__main__':
# 测试
with open('layout_output.txt', 'r', encoding='utf-8') as f:
text = f.read()
parser = HandoverLogParser()
logs = parser.parse(text)
print(f'解析到 {len(logs)} 条记录')
for log in logs[:5]:
print(f'{log.date} {log.shift} {log.ship_name}: {log.teu}TEU')