Files
wechat_crawler/view_jobs.py

117 lines
3.3 KiB
Python
Raw Permalink Normal View History

# -*- coding: utf-8 -*-
"""
查看提取的岗位数据
"""
import json
import os
from datetime import datetime
def format_salary(salary_min, salary_max):
"""格式化薪资显示"""
if salary_min and salary_max:
return f"{salary_min}-{salary_max}"
elif salary_min:
return f"{salary_min}元起"
elif salary_max:
return f"{salary_max}元以下"
else:
return "面议"
def display_job(job, index):
"""显示单个岗位信息"""
print(f"\n{'=' * 80}")
print(f"岗位 #{index}")
print(f"{'=' * 80}")
print(f"工作名称: {job.get('job_name', '未知')}")
print(f"公司名称: {job.get('company_name', '未知')}")
print(f"工作地点: {job.get('job_location', '未知')}")
print(f"薪资范围: {format_salary(job.get('salary_min'), job.get('salary_max'))}")
if job.get('job_description'):
print(f"\n工作描述:")
print(f" {job['job_description']}")
if job.get('contact_person') or job.get('contact_info'):
print(f"\n联系方式:")
if job.get('contact_person'):
print(f" 联系人: {job['contact_person']}")
if job.get('contact_info'):
print(f" 联系方式: {job['contact_info']}")
# 显示元数据
if '_metadata' in job:
meta = job['_metadata']
print(f"\n来源信息:")
print(f" 群组: {meta.get('group_name', '未知')}")
print(f" 发送者: {meta.get('sender', '未知')}")
print(f" 提取时间: {meta.get('extract_time', '未知')}")
if meta.get('original_message'):
print(f"\n原始消息:")
msg = meta['original_message']
if len(msg) > 200:
msg = msg[:200] + "..."
print(f" {msg}")
def main():
data_file = "jobs_data.json"
if not os.path.exists(data_file):
print(f"数据文件不存在: {data_file}")
print("请先运行 job_extractor.py 提取岗位数据")
return
try:
with open(data_file, "r", encoding="utf-8") as f:
jobs = json.load(f)
except Exception as e:
print(f"读取数据文件失败: {e}")
return
if not jobs:
print("暂无岗位数据")
return
print(f"{'=' * 80}")
print(f"岗位数据查看器")
print(f"{'=' * 80}")
print(f"数据文件: {data_file}")
print(f"岗位总数: {len(jobs)}")
# 统计信息
locations = {}
companies = {}
for job in jobs:
loc = job.get('job_location', '未知')
locations[loc] = locations.get(loc, 0) + 1
comp = job.get('company_name', '未知')
companies[comp] = companies.get(comp, 0) + 1
print(f"\n地点分布:")
for loc, count in sorted(locations.items(), key=lambda x: x[1], reverse=True)[:5]:
print(f" {loc}: {count}个岗位")
print(f"\n公司分布:")
for comp, count in sorted(companies.items(), key=lambda x: x[1], reverse=True)[:5]:
print(f" {comp}: {count}个岗位")
# 显示详细信息
print(f"\n{'=' * 80}")
print("岗位详情")
for i, job in enumerate(jobs, 1):
display_job(job, i)
print(f"\n{'=' * 80}")
print(f"共显示 {len(jobs)} 个岗位")
print(f"{'=' * 80}")
if __name__ == "__main__":
main()