Files
ocups-kafka/job_crawler/app/core/config.py
李顺东 ae681575b9 feat(job_crawler): initialize job crawler service with kafka integration
- Add technical documentation (技术方案.md) with system architecture and design details
- Create FastAPI application structure with modular organization (api, core, models, services, utils)
- Implement job data crawler service with incremental collection from third-party API
- Add Kafka service integration with Docker Compose configuration for message queue
- Create data models for job listings, progress tracking, and API responses
- Implement REST API endpoints for data consumption (/consume, /status) and task management
- Add progress persistence layer using SQLite for tracking collection offsets
- Implement date filtering logic to extract data published within 7 days
- Create API client service for third-party data source integration
- Add configuration management with environment-based settings
- Include Docker support with Dockerfile and docker-compose.yml for containerized deployment
- Add logging configuration and utility functions for date parsing
- Include requirements.txt with all Python dependencies and README documentation
2026-01-15 17:09:43 +08:00

90 lines
2.3 KiB
Python

"""配置管理"""
import os
import yaml
from typing import Optional, List
from pydantic import BaseModel
from functools import lru_cache
class AppConfig(BaseModel):
name: str = "job-crawler"
version: str = "1.0.0"
debug: bool = False
class TaskConfig(BaseModel):
"""单个任务配置"""
id: str
name: str = ""
enabled: bool = True
class ApiConfig(BaseModel):
base_url: str = "https://openapi.bazhuayu.com"
username: str = ""
password: str = ""
batch_size: int = 100
tasks: List[TaskConfig] = []
class KafkaConfig(BaseModel):
bootstrap_servers: str = "localhost:9092"
topic: str = "job_data"
consumer_group: str = "job_consumer_group"
class CrawlerConfig(BaseModel):
interval: int = 300
filter_days: int = 7
max_workers: int = 5
class DatabaseConfig(BaseModel):
path: str = "data/crawl_progress.db"
class Settings(BaseModel):
"""应用配置"""
app: AppConfig = AppConfig()
api: ApiConfig = ApiConfig()
kafka: KafkaConfig = KafkaConfig()
crawler: CrawlerConfig = CrawlerConfig()
database: DatabaseConfig = DatabaseConfig()
@classmethod
def from_yaml(cls, config_path: str) -> "Settings":
"""从YAML文件加载配置"""
if not os.path.exists(config_path):
return cls()
with open(config_path, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f) or {}
# 解析tasks
api_data = data.get('api', {})
tasks_data = api_data.pop('tasks', [])
tasks = [TaskConfig(**t) for t in tasks_data]
api_config = ApiConfig(**api_data, tasks=tasks)
return cls(
app=AppConfig(**data.get('app', {})),
api=api_config,
kafka=KafkaConfig(**data.get('kafka', {})),
crawler=CrawlerConfig(**data.get('crawler', {})),
database=DatabaseConfig(**data.get('database', {}))
)
def get_enabled_tasks(self) -> List[TaskConfig]:
"""获取启用的任务列表"""
return [t for t in self.api.tasks if t.enabled]
@lru_cache()
def get_settings() -> Settings:
"""获取配置"""
config_path = os.environ.get("CONFIG_PATH", "config/config.yml")
return Settings.from_yaml(config_path)
settings = get_settings()