75 lines
1.8 KiB
Python
75 lines
1.8 KiB
Python
|
|
"""日期解析工具"""
|
||
|
|
import re
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
from typing import Optional
|
||
|
|
|
||
|
|
|
||
|
|
def parse_aae397(date_str: str) -> Optional[datetime]:
|
||
|
|
"""
|
||
|
|
解析发布日期字段 aae397
|
||
|
|
支持格式:
|
||
|
|
- "今天"
|
||
|
|
- "1月13日"
|
||
|
|
- "12月31日"
|
||
|
|
"""
|
||
|
|
if not date_str:
|
||
|
|
return None
|
||
|
|
|
||
|
|
date_str = date_str.strip()
|
||
|
|
today = datetime.now()
|
||
|
|
|
||
|
|
# 处理 "今天"
|
||
|
|
if date_str == "今天":
|
||
|
|
return today
|
||
|
|
|
||
|
|
# 处理 "X月X日" 格式
|
||
|
|
pattern = r"(\d{1,2})月(\d{1,2})日"
|
||
|
|
match = re.match(pattern, date_str)
|
||
|
|
if match:
|
||
|
|
month = int(match.group(1))
|
||
|
|
day = int(match.group(2))
|
||
|
|
year = today.year
|
||
|
|
|
||
|
|
try:
|
||
|
|
parsed_date = datetime(year, month, day)
|
||
|
|
if parsed_date > today:
|
||
|
|
parsed_date = datetime(year - 1, month, day)
|
||
|
|
return parsed_date
|
||
|
|
except ValueError:
|
||
|
|
return None
|
||
|
|
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def parse_collect_time(date_str: str) -> Optional[datetime]:
|
||
|
|
"""
|
||
|
|
解析采集时间字段 Collect_time
|
||
|
|
格式: "2026-01-15"
|
||
|
|
"""
|
||
|
|
if not date_str:
|
||
|
|
return None
|
||
|
|
|
||
|
|
try:
|
||
|
|
return datetime.strptime(date_str.strip(), "%Y-%m-%d")
|
||
|
|
except ValueError:
|
||
|
|
return None
|
||
|
|
|
||
|
|
|
||
|
|
def is_within_days(date_str: str, collect_time_str: str, days: int = 7) -> bool:
|
||
|
|
"""
|
||
|
|
判断数据是否在指定天数内
|
||
|
|
条件: 发布日期 AND 采集时间 都在指定天数内
|
||
|
|
"""
|
||
|
|
today = datetime.now()
|
||
|
|
cutoff_date = today - timedelta(days=days)
|
||
|
|
|
||
|
|
publish_date = parse_aae397(date_str)
|
||
|
|
if publish_date is None:
|
||
|
|
return False
|
||
|
|
|
||
|
|
collect_date = parse_collect_time(collect_time_str)
|
||
|
|
if collect_date is None:
|
||
|
|
return False
|
||
|
|
|
||
|
|
return publish_date >= cutoff_date and collect_date >= cutoff_date
|