""" 船舶报告管理器 从 Confluence 提取船舶报告数据,包括故障次数、人工介入次数等。 """ import re from typing import Optional, Dict, List, Any from datetime import datetime, timedelta from .client import ConfluenceClient class VesselReportManager: """船舶报告数据管理器""" # 月度统计页面父页面 ID PARENT_PAGE_ID = "137446574" # "福州江阴实船作业统计" # 日期范围映射 (根据实际页面结构调整) MONTH_PAGE_MAPPING: Dict[str, str] = {} def __init__(self, client: Optional[ConfluenceClient] = None): """ 初始化船舶报告管理器 Args: client: Confluence 客户端实例,为 None 时自动创建 """ if client is None: # 从环境变量获取配置 import os base_url = os.getenv( "CONFLUENCE_URL", "https://confluence.westwell-lab.com" ) token = os.getenv("CONFLUENCE_TOKEN") if not token: raise ValueError("未设置 CONFLUENCE_TOKEN 环境变量") self.client = ConfluenceClient(base_url, token) else: self.client = client self.jira_client = None def set_jira_client(self, jira_client): self.jira_client = jira_client def _extract_jira_jqls(self, body: str, vessel_number: str) -> List[str]: jqls = [] jira_macros = re.findall( r']*ac:name="jira"[^>]*>(.*?)', body, re.DOTALL, ) for macro in jira_macros: jql_match = re.search( r']*name="jqlQuery"[^>]*>(.*?)', macro, re.DOTALL, ) if jql_match: jql = jql_match.group(1) jql = ( jql.replace(""", '"') .replace("<", "<") .replace(">", ">") .replace("&", "&") ) if f'实船船次 = "{vessel_number}"' in jql: jqls.append(jql) return jqls def _count_issues_from_jira(self, body: str, vessel_number: Optional[str]) -> int: if not self.jira_client or not vessel_number: return 0 try: jqls = self._extract_jira_jqls(body, vessel_number) total_issues = 0 for jql in jqls: count = self.jira_client.count_issues(jql) total_issues += count return total_issues except Exception as e: print(f"从 Jira 查询故障数量失败: {e}") return 0 def _get_monthly_page_id(self, year_month: str) -> Optional[str]: """ 获取指定年月的统计页面 ID Args: year_month: 年月格式 "YYYY.MM" Returns: 页面 ID,未找到返回 None """ # 首先检查缓存 if year_month in self.MONTH_PAGE_MAPPING: return self.MONTH_PAGE_MAPPING[year_month] # 获取父页面的子页面 children = self.client.get_child_pages(self.PARENT_PAGE_ID, limit=100) for child in children: title = child.get("title", "") # 匹配标题格式: "2025.06 实船作业统计" if f"{year_month} 实船作业统计" in title: page_id = child.get("id") if page_id: self.MONTH_PAGE_MAPPING[year_month] = str(page_id) return str(page_id) return None def _parse_vessel_page(self, page_data: Dict) -> Optional[Dict[str, Any]]: """ 解析船舶报告页面数据 Args: page_data: 页面数据字典 Returns: 解析后的船舶数据字典 """ try: title = page_data.get("title", "") body = page_data.get("body", {}).get("storage", {}).get("value", "") if not body: return None # 提取船次号 vessel_match = re.search(r"船次.*?]*>(\d+)#", body, re.DOTALL) vessel_number = vessel_match.group(1) if vessel_match else None if not vessel_number: # 尝试从标题提取 title_match = re.search(r"(\d+)#", title) if title_match: vessel_number = title_match.group(1) # 提取船名 name_match = re.search(r"船名.*?]*>([^<]+)", body, re.DOTALL) vessel_name = name_match.group(1).strip() if name_match else "" # 提取作业时间 time_match = re.search(r"作业时间.*?]*>(.*?)", body, re.DOTALL) operation_time = "" if time_match: operation_time = re.sub(r"<[^>]+>", "", time_match.group(1)).strip() operation_time = operation_time.replace(" ", " ") # 提取 TEU teu_match = re.search(r"作业箱量 \(TEU\).*?]*>(\d+)", body, re.DOTALL) teu = int(teu_match.group(1)) if teu_match else 0 # 提取故障次数 failure_match = re.search(r"故障次数.*?]*>(\d+)", body, re.DOTALL) failures = int(failure_match.group(1)) if failure_match else 0 if failures == 0 and vessel_number: failures = self._count_issues_from_jira(body, vessel_number) # 提取故障率(如有) failure_rate_match = re.search( r"故障率.*?]*>([\d.]+)%", body, re.DOTALL ) failure_rate = ( float(failure_rate_match.group(1)) if failure_rate_match else None ) # 提取人工介入次数 intervention_match = re.search( r"人工介入次数.*?]*>(\d+)", body, re.DOTALL ) interventions = ( int(intervention_match.group(1)) if intervention_match else 0 ) # 提取人工介入率(如有) intervention_rate_match = re.search( r"人工介入率.*?]*>([\d.]+)%", body, re.DOTALL ) intervention_rate = ( float(intervention_rate_match.group(1)) if intervention_rate_match else None ) # 提取上线车辆 vehicles_match = re.search(r"上线车辆.*?]*>([^<]+)", body, re.DOTALL) vehicles = vehicles_match.group(1).strip() if vehicles_match else "" # 提取作业循环 moves_match = re.search(r"作业循环.*?]*>(\d+)", body, re.DOTALL) moves = int(moves_match.group(1)) if moves_match else 0 # 提取作业效率 efficiency_match = re.search( r"作业净效率.*?]*>([\d.]+)", body, re.DOTALL ) efficiency = float(efficiency_match.group(1)) if efficiency_match else 0.0 # 提取作业类型 type_match = re.search(r"作业类型.*?]*>([^<]+)", body, re.DOTALL) operation_type = type_match.group(1).strip() if type_match else "" # 解析作业日期(从标题或作业时间) operation_date = None # 尝试从标题提取日期: "FZ 433#实船报告2026.03.01" date_match = re.search(r"(\d{4})\.(\d{2})\.(\d{2})", title) if date_match: try: operation_date = f"{date_match.group(1)}-{date_match.group(2)}-{date_match.group(3)}" except: pass # 计算故障率和人工介入率(如果页面上没有) if failure_rate is None and teu > 0: failure_rate = round((failures / (teu / 2)) * 100, 2) if intervention_rate is None and teu > 0: intervention_rate = round((interventions / (teu / 2)) * 100, 2) return { "vessel_number": vessel_number, "vessel_name": vessel_name, "vessel_code": vessel_number, # 兼容现有模板 "operation_date": operation_date, "operation_time": operation_time, "teu": teu, "failures": failures, "failure_rate": failure_rate if failure_rate is not None else 0.0, "interventions": interventions, "intervention_rate": intervention_rate if intervention_rate is not None else 0.0, "vehicles": vehicles, "vehicle_count": len(vehicles.split("、")) if vehicles else 0, "moves": moves, "efficiency": efficiency, "operation_type": operation_type, "page_id": page_data.get("id"), "page_title": title, } except Exception as e: print(f"解析页面数据失败: {e}") return None def get_vessel_reports_by_month(self, year_month: str) -> List[Dict[str, Any]]: """ 获取指定月份的所有船舶报告 Args: year_month: 年月格式 "YYYY.MM" Returns: 船舶报告列表 """ month_page_id = self._get_monthly_page_id(year_month) if not month_page_id: print(f"未找到 {year_month} 的统计页面") return [] # 获取该月份下的所有船舶报告页面 vessel_pages = self.client.get_child_pages( month_page_id, limit=100, expand="body.storage" ) reports = [] for page in vessel_pages: report = self._parse_vessel_page(page) if report: reports.append(report) # 按船次号排序 reports.sort( key=lambda x: ( int(x.get("vessel_number", 0)) if x.get("vessel_number") else 0 ) ) return reports def get_vessel_report_by_number( self, vessel_number: str ) -> Optional[Dict[str, Any]]: """ 根据船次号获取船舶报告 Args: vessel_number: 船次号 (如 "433") Returns: 船舶报告数据,未找到返回 None """ # 尝试获取最近几个月的数据 from datetime import datetime now = datetime.now() for i in range(3): # 最近3个月 year_month = (now - timedelta(days=30 * i)).strftime("%Y.%m") reports = self.get_vessel_reports_by_month(year_month) for report in reports: if report.get("vessel_number") == vessel_number: return report return None def get_vessel_reports_in_range( self, start_date: str, end_date: str ) -> List[Dict[str, Any]]: """ 获取指定日期范围内的所有船舶报告 Args: start_date: 开始日期 "YYYY-MM-DD" end_date: 结束日期 "YYYY-MM-DD" Returns: 船舶报告列表 """ try: start = datetime.strptime(start_date, "%Y-%m-%d") end = datetime.strptime(end_date, "%Y-%m-%d") months_to_query = set() current = start while current <= end: months_to_query.add(current.strftime("%Y.%m")) current += timedelta(days=1) all_reports = [] for year_month in sorted(months_to_query): reports = self.get_vessel_reports_by_month(year_month) all_reports.extend(reports) filtered_reports = [] for report in all_reports: report_date = report.get("operation_date") if report_date: try: report_dt = datetime.strptime(report_date, "%Y-%m-%d") if start <= report_dt <= end: filtered_reports.append(report) except: pass return filtered_reports except ValueError as e: print(f"日期解析错误: {e}") return [] def get_vessel_reports_by_date(self, date_str: str) -> List[Dict[str, Any]]: """ 获取指定日期的船舶报告 Args: date_str: 日期格式 "YYYY-MM-DD" Returns: 该日期的船舶报告列表 """ return self.get_vessel_reports_in_range(date_str, date_str) def get_vessel_page_id(self, vessel_number: str, year_month: str) -> Optional[str]: """ 获取指定船次在指定月份的页面 ID Args: vessel_number: 船次号 year_month: 年月格式 "YYYY.MM" Returns: 页面 ID,未找到返回 None """ month_page_id = self._get_monthly_page_id(year_month) if not month_page_id: return None vessel_pages = self.client.get_child_pages(month_page_id, limit=100) for page in vessel_pages: title = page.get("title", "") # 匹配船次号: "FZ 433#实船报告..." if f" {vessel_number}#" in title or f"{vessel_number}#" in title: return page.get("id") return None