rabbitmq
This commit is contained in:
@@ -1,60 +1,24 @@
|
||||
"""招聘数据模型"""
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
|
||||
class JobData(BaseModel):
|
||||
"""招聘数据模型"""
|
||||
id: str = ""
|
||||
task_id: str = "" # 任务ID
|
||||
job_category: str = "" # Std_class - 职位分类
|
||||
job_title: str = "" # aca112 - 职位名称
|
||||
company: str = "" # AAB004 - 公司名称
|
||||
company_type: str = "" # AAB019 - 企业类型
|
||||
salary: str = "" # acb241 - 薪资范围
|
||||
location: str = "" # aab302 - 工作地点
|
||||
address: str = "" # AAE006 - 详细地址
|
||||
publish_date: str = "" # aae397 - 发布日期
|
||||
collect_time: str = "" # Collect_time - 采集时间
|
||||
url: str = "" # ACE760 - 职位链接
|
||||
description: str = "" # acb22a - 职位描述
|
||||
experience: str = "" # Experience - 经验要求
|
||||
education: str = "" # aac011 - 学历要求
|
||||
headcount: str = "" # acb240 - 招聘人数
|
||||
industry: str = "" # AAB022 - 行业
|
||||
company_size: str = "" # Num_employers - 公司规模
|
||||
contact: str = "" # AAE004 - 联系人
|
||||
company_intro: str = "" # AAB092 - 公司简介
|
||||
crawl_time: str = "" # 入库时间
|
||||
class JobData:
|
||||
"""招聘数据 - 保留原始数据格式"""
|
||||
|
||||
def __init__(self, **data):
|
||||
super().__init__(**data)
|
||||
if not self.id:
|
||||
self.id = str(uuid.uuid4())
|
||||
if not self.crawl_time:
|
||||
self.crawl_time = datetime.now().isoformat()
|
||||
def __init__(self, raw_data: dict, task_id: str = ""):
|
||||
self.raw_data = raw_data
|
||||
self.task_id = task_id
|
||||
# 添加元数据
|
||||
self.raw_data["_id"] = str(uuid.uuid4())
|
||||
self.raw_data["_task_id"] = task_id
|
||||
self.raw_data["_crawl_time"] = datetime.now().isoformat()
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""转换为字典(原始数据 + 元数据)"""
|
||||
return self.raw_data
|
||||
|
||||
@classmethod
|
||||
def from_raw(cls, raw: dict) -> "JobData":
|
||||
"""从原始API数据转换"""
|
||||
return cls(
|
||||
job_category=raw.get("Std_class", ""),
|
||||
job_title=raw.get("aca112", ""),
|
||||
company=raw.get("AAB004", ""),
|
||||
company_type=raw.get("AAB019", "").strip(),
|
||||
salary=raw.get("acb241", ""),
|
||||
location=raw.get("aab302", ""),
|
||||
address=raw.get("AAE006", ""),
|
||||
publish_date=raw.get("aae397", ""),
|
||||
collect_time=raw.get("Collect_time", ""),
|
||||
url=raw.get("ACE760", ""),
|
||||
description=raw.get("acb22a", ""),
|
||||
experience=raw.get("Experience", ""),
|
||||
education=raw.get("aac011", ""),
|
||||
headcount=raw.get("acb240", ""),
|
||||
industry=raw.get("AAB022", ""),
|
||||
company_size=raw.get("Num_employers", ""),
|
||||
contact=raw.get("AAE004", ""),
|
||||
company_intro=raw.get("AAB092", ""),
|
||||
)
|
||||
def from_raw(cls, raw: dict, task_id: str = "") -> "JobData":
|
||||
"""从原始API数据创建"""
|
||||
return cls(raw.copy(), task_id)
|
||||
|
||||
@@ -18,7 +18,7 @@ class CrawlStatus(BaseModel):
|
||||
total: int
|
||||
last_start_offset: Optional[int] = None
|
||||
progress: str
|
||||
kafka_lag: int = 0
|
||||
queue_size: int = 0
|
||||
status: str
|
||||
last_update: str
|
||||
filtered_count: int = 0
|
||||
|
||||
Reference in New Issue
Block a user