Files
ocups-kafka/job_crawler/app/utils/date_parser.py
李顺东 ae681575b9 feat(job_crawler): initialize job crawler service with kafka integration
- Add technical documentation (技术方案.md) with system architecture and design details
- Create FastAPI application structure with modular organization (api, core, models, services, utils)
- Implement job data crawler service with incremental collection from third-party API
- Add Kafka service integration with Docker Compose configuration for message queue
- Create data models for job listings, progress tracking, and API responses
- Implement REST API endpoints for data consumption (/consume, /status) and task management
- Add progress persistence layer using SQLite for tracking collection offsets
- Implement date filtering logic to extract data published within 7 days
- Create API client service for third-party data source integration
- Add configuration management with environment-based settings
- Include Docker support with Dockerfile and docker-compose.yml for containerized deployment
- Add logging configuration and utility functions for date parsing
- Include requirements.txt with all Python dependencies and README documentation
2026-01-15 17:09:43 +08:00

75 lines
1.8 KiB
Python

"""日期解析工具"""
import re
from datetime import datetime, timedelta
from typing import Optional
def parse_aae397(date_str: str) -> Optional[datetime]:
"""
解析发布日期字段 aae397
支持格式:
- "今天"
- "1月13日"
- "12月31日"
"""
if not date_str:
return None
date_str = date_str.strip()
today = datetime.now()
# 处理 "今天"
if date_str == "今天":
return today
# 处理 "X月X日" 格式
pattern = r"(\d{1,2})月(\d{1,2})日"
match = re.match(pattern, date_str)
if match:
month = int(match.group(1))
day = int(match.group(2))
year = today.year
try:
parsed_date = datetime(year, month, day)
if parsed_date > today:
parsed_date = datetime(year - 1, month, day)
return parsed_date
except ValueError:
return None
return None
def parse_collect_time(date_str: str) -> Optional[datetime]:
"""
解析采集时间字段 Collect_time
格式: "2026-01-15"
"""
if not date_str:
return None
try:
return datetime.strptime(date_str.strip(), "%Y-%m-%d")
except ValueError:
return None
def is_within_days(date_str: str, collect_time_str: str, days: int = 7) -> bool:
"""
判断数据是否在指定天数内
条件: 发布日期 AND 采集时间 都在指定天数内
"""
today = datetime.now()
cutoff_date = today - timedelta(days=days)
publish_date = parse_aae397(date_str)
if publish_date is None:
return False
collect_date = parse_collect_time(collect_time_str)
if collect_date is None:
return False
return publish_date >= cutoff_date and collect_date >= cutoff_date