@@ -1,12 +1,12 @@
|
||||
"""服务模块"""
|
||||
from .api_client import api_client, BazhuayuClient
|
||||
from .rabbitmq_service import rabbitmq_service, RabbitMQService
|
||||
from .kafka_service import kafka_service, KafkaService
|
||||
from .progress_store import progress_store, ProgressStore
|
||||
from .crawler import crawler_manager, CrawlerManager, TaskCrawler
|
||||
|
||||
__all__ = [
|
||||
"api_client", "BazhuayuClient",
|
||||
"rabbitmq_service", "RabbitMQService",
|
||||
"kafka_service", "KafkaService",
|
||||
"progress_store", "ProgressStore",
|
||||
"crawler_manager", "CrawlerManager", "TaskCrawler"
|
||||
]
|
||||
|
||||
@@ -4,7 +4,7 @@ import logging
|
||||
from typing import Dict, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from app.services.api_client import api_client
|
||||
from app.services.rabbitmq_service import rabbitmq_service
|
||||
from app.services.kafka_service import kafka_service
|
||||
from app.services.progress_store import progress_store
|
||||
from app.utils import is_within_days
|
||||
from app.models import JobData
|
||||
@@ -134,20 +134,21 @@ class TaskCrawler:
|
||||
aae397 = raw.get("aae397", "")
|
||||
collect_time = raw.get("Collect_time", "")
|
||||
if is_within_days(aae397, collect_time, self.filter_days):
|
||||
job = JobData.from_raw(raw, self.task_id)
|
||||
job = JobData.from_raw(raw)
|
||||
job.task_id = self.task_id
|
||||
filtered_jobs.append(job)
|
||||
|
||||
valid_count = len(filtered_jobs)
|
||||
expired_count = len(data_list) - valid_count
|
||||
self._total_filtered += valid_count
|
||||
|
||||
# 立即发送到RabbitMQ
|
||||
# 立即发送到Kafka
|
||||
produced = 0
|
||||
if filtered_jobs:
|
||||
produced = rabbitmq_service.produce_batch(filtered_jobs)
|
||||
produced = kafka_service.produce_batch(filtered_jobs)
|
||||
self._total_produced += produced
|
||||
|
||||
logger.info(f"[{self.task_name}] offset={offset}, 获取={len(data_list)}, 有效={valid_count}, 过期={expired_count}, 发送MQ={produced}")
|
||||
logger.info(f"[{self.task_name}] offset={offset}, 获取={len(data_list)}, 有效={valid_count}, 过期={expired_count}, 发送Kafka={produced}")
|
||||
|
||||
return valid_count
|
||||
|
||||
@@ -235,7 +236,7 @@ class CrawlerManager:
|
||||
return crawler.get_status() if crawler else {}
|
||||
return {
|
||||
"tasks": [c.get_status() for c in self._crawlers.values()],
|
||||
"queue_size": rabbitmq_service.get_queue_size(),
|
||||
"kafka_lag": kafka_service.get_lag(),
|
||||
"running_count": sum(1 for c in self._crawlers.values() if c.is_running)
|
||||
}
|
||||
|
||||
|
||||
@@ -66,8 +66,7 @@ class KafkaService:
|
||||
def produce(self, job_data: JobData) -> bool:
|
||||
"""发送消息到Kafka"""
|
||||
try:
|
||||
data = job_data.to_dict()
|
||||
future = self.producer.send(self.topic, key=data.get("_id"), value=data)
|
||||
future = self.producer.send(self.topic, key=job_data.id, value=job_data.model_dump())
|
||||
future.get(timeout=10)
|
||||
return True
|
||||
except KafkaError as e:
|
||||
|
||||
Reference in New Issue
Block a user