"""日期解析工具""" import re from datetime import datetime, timedelta from typing import Optional def parse_aae397(date_str: str) -> Optional[datetime]: """ 解析发布日期字段 aae397 支持格式: - "今天" - "1月13日" - "12月31日" """ if not date_str: return None date_str = date_str.strip() today = datetime.now() # 处理 "今天" if date_str == "今天": return today # 处理 "X月X日" 格式 pattern = r"(\d{1,2})月(\d{1,2})日" match = re.match(pattern, date_str) if match: month = int(match.group(1)) day = int(match.group(2)) year = today.year try: parsed_date = datetime(year, month, day) if parsed_date > today: parsed_date = datetime(year - 1, month, day) return parsed_date except ValueError: return None return None def parse_collect_time(date_str: str) -> Optional[datetime]: """ 解析采集时间字段 Collect_time 格式: "2026-01-15" """ if not date_str: return None try: return datetime.strptime(date_str.strip(), "%Y-%m-%d") except ValueError: return None def is_within_days(date_str: str, collect_time_str: str, days: int = 7) -> bool: """ 判断数据是否在指定天数内 条件: 发布日期 AND 采集时间 都在指定天数内 """ today = datetime.now() cutoff_date = today - timedelta(days=days) publish_date = parse_aae397(date_str) if publish_date is None: return False collect_date = parse_collect_time(collect_time_str) if collect_date is None: return False return publish_date >= cutoff_date and collect_date >= cutoff_date