ocups-kafka/job_crawler/app/models/job.py

"""招聘数据模型"""
from pydantic import BaseModel
from datetime import datetime
import uuid


class JobData(BaseModel):
    """招聘数据模型"""
    id: str = ""
    task_id: str = ""               # 任务ID
    job_category: str = ""          # Std_class - 职位分类
    job_title: str = ""             # aca112 - 职位名称
    company: str = ""               # AAB004 - 公司名称
    company_type: str = ""          # AAB019 - 企业类型
    salary: str = ""                # acb241 - 薪资范围
    location: str = ""              # aab302 - 工作地点
    address: str = ""               # AAE006 - 详细地址
    publish_date: str = ""          # aae397 - 发布日期
    collect_time: str = ""          # Collect_time - 采集时间
    url: str = ""                   # ACE760 - 职位链接
    description: str = ""           # acb22a - 职位描述
    experience: str = ""            # Experience - 经验要求
    education: str = ""             # aac011 - 学历要求
    headcount: str = ""             # acb240 - 招聘人数
    industry: str = ""              # AAB022 - 行业
    company_size: str = ""          # Num_employers - 公司规模
    contact: str = ""               # AAE004 - 联系人
    company_intro: str = ""         # AAB092 - 公司简介
    crawl_time: str = ""            # 入库时间

    def __init__(self, **data):
        super().__init__(**data)
        if not self.id:
            self.id = str(uuid.uuid4())
        if not self.crawl_time:
            self.crawl_time = datetime.now().isoformat()

    @classmethod
    def from_raw(cls, raw: dict) -> "JobData":
        """从原始API数据转换"""
        return cls(
            job_category=raw.get("Std_class", ""),
            job_title=raw.get("aca112", ""),
            company=raw.get("AAB004", ""),
            company_type=raw.get("AAB019", "").strip(),
            salary=raw.get("acb241", ""),
            location=raw.get("aab302", ""),
            address=raw.get("AAE006", ""),
            publish_date=raw.get("aae397", ""),
            collect_time=raw.get("Collect_time", ""),
            url=raw.get("ACE760", ""),
            description=raw.get("acb22a", ""),
            experience=raw.get("Experience", ""),
            education=raw.get("aac011", ""),
            headcount=raw.get("acb240", ""),
            industry=raw.get("AAB022", ""),
            company_size=raw.get("Num_employers", ""),
            contact=raw.get("AAE004", ""),
            company_intro=raw.get("AAB092", ""),
        )