diff --git a/job_crawler/app/services/crawler.py b/job_crawler/app/services/crawler.py index 70dbdbd..6e8bf5b 100644 --- a/job_crawler/app/services/crawler.py +++ b/job_crawler/app/services/crawler.py @@ -125,6 +125,7 @@ class TaskCrawler: result = await api_client.fetch_data(self.task_id, offset, self.batch_size) data_list = result.get("data", {}).get("data", []) if not data_list: + logger.info(f"[{self.task_name}] offset={offset}, 返回数据为空") return 0 # 过滤数据 @@ -138,13 +139,16 @@ class TaskCrawler: filtered_jobs.append(job) valid_count = len(filtered_jobs) + expired_count = len(data_list) - valid_count self._total_filtered += valid_count # 立即发送到Kafka + produced = 0 if filtered_jobs: produced = kafka_service.produce_batch(filtered_jobs) self._total_produced += produced - logger.debug(f"[{self.task_name}] offset={offset}, 过滤={valid_count}, 发送={produced}") + + logger.info(f"[{self.task_name}] offset={offset}, 获取={len(data_list)}, 有效={valid_count}, 过期={expired_count}, 发送Kafka={produced}") return valid_count