- Add technical documentation (技术方案.md) with system architecture and design details - Create FastAPI application structure with modular organization (api, core, models, services, utils) - Implement job data crawler service with incremental collection from third-party API - Add Kafka service integration with Docker Compose configuration for message queue - Create data models for job listings, progress tracking, and API responses - Implement REST API endpoints for data consumption (/consume, /status) and task management - Add progress persistence layer using SQLite for tracking collection offsets - Implement date filtering logic to extract data published within 7 days - Create API client service for third-party data source integration - Add configuration management with environment-based settings - Include Docker support with Dockerfile and docker-compose.yml for containerized deployment - Add logging configuration and utility functions for date parsing - Include requirements.txt with all Python dependencies and README documentation
90 lines
2.3 KiB
Python
90 lines
2.3 KiB
Python
"""配置管理"""
|
|
import os
|
|
import yaml
|
|
from typing import Optional, List
|
|
from pydantic import BaseModel
|
|
from functools import lru_cache
|
|
|
|
|
|
class AppConfig(BaseModel):
|
|
name: str = "job-crawler"
|
|
version: str = "1.0.0"
|
|
debug: bool = False
|
|
|
|
|
|
class TaskConfig(BaseModel):
|
|
"""单个任务配置"""
|
|
id: str
|
|
name: str = ""
|
|
enabled: bool = True
|
|
|
|
|
|
class ApiConfig(BaseModel):
|
|
base_url: str = "https://openapi.bazhuayu.com"
|
|
username: str = ""
|
|
password: str = ""
|
|
batch_size: int = 100
|
|
tasks: List[TaskConfig] = []
|
|
|
|
|
|
class KafkaConfig(BaseModel):
|
|
bootstrap_servers: str = "localhost:9092"
|
|
topic: str = "job_data"
|
|
consumer_group: str = "job_consumer_group"
|
|
|
|
|
|
class CrawlerConfig(BaseModel):
|
|
interval: int = 300
|
|
filter_days: int = 7
|
|
max_workers: int = 5
|
|
|
|
|
|
class DatabaseConfig(BaseModel):
|
|
path: str = "data/crawl_progress.db"
|
|
|
|
|
|
class Settings(BaseModel):
|
|
"""应用配置"""
|
|
app: AppConfig = AppConfig()
|
|
api: ApiConfig = ApiConfig()
|
|
kafka: KafkaConfig = KafkaConfig()
|
|
crawler: CrawlerConfig = CrawlerConfig()
|
|
database: DatabaseConfig = DatabaseConfig()
|
|
|
|
@classmethod
|
|
def from_yaml(cls, config_path: str) -> "Settings":
|
|
"""从YAML文件加载配置"""
|
|
if not os.path.exists(config_path):
|
|
return cls()
|
|
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f) or {}
|
|
|
|
# 解析tasks
|
|
api_data = data.get('api', {})
|
|
tasks_data = api_data.pop('tasks', [])
|
|
tasks = [TaskConfig(**t) for t in tasks_data]
|
|
api_config = ApiConfig(**api_data, tasks=tasks)
|
|
|
|
return cls(
|
|
app=AppConfig(**data.get('app', {})),
|
|
api=api_config,
|
|
kafka=KafkaConfig(**data.get('kafka', {})),
|
|
crawler=CrawlerConfig(**data.get('crawler', {})),
|
|
database=DatabaseConfig(**data.get('database', {}))
|
|
)
|
|
|
|
def get_enabled_tasks(self) -> List[TaskConfig]:
|
|
"""获取启用的任务列表"""
|
|
return [t for t in self.api.tasks if t.enabled]
|
|
|
|
|
|
@lru_cache()
|
|
def get_settings() -> Settings:
|
|
"""获取配置"""
|
|
config_path = os.environ.get("CONFIG_PATH", "config/config.yml")
|
|
return Settings.from_yaml(config_path)
|
|
|
|
|
|
settings = get_settings()
|