diff --git a/job_crawler/app/services/crawler.py b/job_crawler/app/services/crawler.py index bbc06ed..f8c88f3 100644 --- a/job_crawler/app/services/crawler.py +++ b/job_crawler/app/services/crawler.py @@ -11,11 +11,14 @@ from app.core.config import settings logger = logging.getLogger(__name__) +# 任务完成后重新执行的延迟时间(秒) +RESCHEDULE_DELAY_SECONDS = 3600 # 1小时 + class TaskCrawler: """单个任务采集器 - 从后往前采集""" - def __init__(self, task_config): + def __init__(self, task_config, on_complete_callback=None): self.task_id = task_config.id self.task_name = task_config.name or task_config.id self.batch_size = settings.api.batch_size @@ -24,6 +27,8 @@ class TaskCrawler: self._running = False self._total_filtered = 0 self._total_produced = 0 + self._on_complete_callback = on_complete_callback # 完成回调 + self._scheduled_task: Optional[asyncio.Task] = None # 延迟调度任务 @property def is_running(self) -> bool: