224 lines
6.6 KiB
Python
224 lines
6.6 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
从指定微信群拉取消息,使用百炼API提取岗位结构化数据
|
|||
|
|
运行前请确保:1) 已安装依赖 pip install -e .
|
|||
|
|
2) 电脑已登录微信 3.9 版本,且主窗口已打开
|
|||
|
|
3) 已配置 config.json 文件
|
|||
|
|
"""
|
|||
|
|
import sys
|
|||
|
|
import os
|
|||
|
|
import json
|
|||
|
|
import time
|
|||
|
|
import requests
|
|||
|
|
from datetime import datetime
|
|||
|
|
|
|||
|
|
_script_dir = os.path.dirname(os.path.abspath(__file__))
|
|||
|
|
if _script_dir not in sys.path:
|
|||
|
|
sys.path.insert(0, _script_dir)
|
|||
|
|
|
|||
|
|
from wxauto import WeChat
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_config():
|
|||
|
|
"""加载配置文件"""
|
|||
|
|
config_path = os.path.join(_script_dir, "config.json")
|
|||
|
|
if not os.path.exists(config_path):
|
|||
|
|
print(f"配置文件不存在: {config_path}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|||
|
|
return json.load(f)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def extract_job_info(message_content, api_url, api_key):
|
|||
|
|
"""使用百炼API提取岗位信息"""
|
|||
|
|
prompt = f"""请从以下消息中提取招聘岗位信息,并以JSON格式返回。如果消息不包含招聘信息,返回空对象。
|
|||
|
|
|
|||
|
|
要提取的字段:
|
|||
|
|
- job_name: 工作名称
|
|||
|
|
- job_description: 工作描述
|
|||
|
|
- job_location: 工作地点
|
|||
|
|
- salary_min: 月薪最低(数字,单位:元)
|
|||
|
|
- salary_max: 月薪最高(数字,单位:元)
|
|||
|
|
- company_name: 公司名称
|
|||
|
|
- contact_person: 联系人
|
|||
|
|
- contact_info: 联系方式(电话/微信等)
|
|||
|
|
|
|||
|
|
消息内容:
|
|||
|
|
{message_content}
|
|||
|
|
|
|||
|
|
请直接返回JSON格式,不要包含其他说明文字。"""
|
|||
|
|
|
|||
|
|
headers = {
|
|||
|
|
"Content-Type": "application/json",
|
|||
|
|
"Authorization": f"Bearer {api_key}"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
payload = {
|
|||
|
|
"model": "qwen-plus",
|
|||
|
|
"input": {
|
|||
|
|
"messages": [
|
|||
|
|
{
|
|||
|
|
"role": "system",
|
|||
|
|
"content": "你是一个专业的招聘信息提取助手,擅长从文本中提取结构化的岗位信息。"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"role": "user",
|
|||
|
|
"content": prompt
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"parameters": {
|
|||
|
|
"result_format": "message"
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = requests.post(api_url, headers=headers, json=payload, timeout=30)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
result = response.json()
|
|||
|
|
|
|||
|
|
if "output" in result and "choices" in result["output"]:
|
|||
|
|
content = result["output"]["choices"][0]["message"]["content"]
|
|||
|
|
# 尝试解析JSON
|
|||
|
|
try:
|
|||
|
|
job_data = json.loads(content)
|
|||
|
|
return job_data if job_data else None
|
|||
|
|
except json.JSONDecodeError:
|
|||
|
|
print(f"API返回内容无法解析为JSON: {content}")
|
|||
|
|
return None
|
|||
|
|
else:
|
|||
|
|
print(f"API返回格式异常: {result}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
except requests.exceptions.RequestException as e:
|
|||
|
|
print(f"API请求失败: {e}")
|
|||
|
|
return None
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"提取岗位信息时发生错误: {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def save_job_data(job_data, output_file):
|
|||
|
|
"""保存岗位数据到文件"""
|
|||
|
|
output_path = os.path.join(_script_dir, output_file)
|
|||
|
|
|
|||
|
|
# 读取现有数据
|
|||
|
|
existing_data = []
|
|||
|
|
if os.path.exists(output_path):
|
|||
|
|
try:
|
|||
|
|
with open(output_path, "r", encoding="utf-8") as f:
|
|||
|
|
existing_data = json.load(f)
|
|||
|
|
except:
|
|||
|
|
existing_data = []
|
|||
|
|
|
|||
|
|
# 添加新数据
|
|||
|
|
existing_data.append(job_data)
|
|||
|
|
|
|||
|
|
# 保存
|
|||
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|||
|
|
json.dump(existing_data, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
print(f"岗位数据已保存到: {output_path}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def on_message(msg, chat, config):
|
|||
|
|
"""消息处理回调函数"""
|
|||
|
|
print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 收到新消息")
|
|||
|
|
print(f"发送者: {msg.sender}")
|
|||
|
|
print(f"消息类型: {msg.type}")
|
|||
|
|
print(f"消息内容: {msg.content}")
|
|||
|
|
|
|||
|
|
# 只处理文本消息
|
|||
|
|
if msg.type != "text" or not msg.content:
|
|||
|
|
print("跳过非文本消息")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 使用百炼API提取岗位信息
|
|||
|
|
print("正在分析消息内容...")
|
|||
|
|
job_info = extract_job_info(
|
|||
|
|
msg.content,
|
|||
|
|
config["bailian_api_url"],
|
|||
|
|
config["api_key"]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if job_info and any(job_info.values()):
|
|||
|
|
print("✓ 提取到岗位信息:")
|
|||
|
|
print(json.dumps(job_info, ensure_ascii=False, indent=2))
|
|||
|
|
|
|||
|
|
# 添加元数据
|
|||
|
|
job_info["_metadata"] = {
|
|||
|
|
"source": "wechat_group",
|
|||
|
|
"group_name": config["target_group"],
|
|||
|
|
"sender": msg.sender,
|
|||
|
|
"extract_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|||
|
|
"original_message": msg.content
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 保存数据
|
|||
|
|
save_job_data(job_info, config["output_file"])
|
|||
|
|
else:
|
|||
|
|
print("× 未提取到有效岗位信息")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("微信群岗位信息提取工具")
|
|||
|
|
print("=" * 60)
|
|||
|
|
|
|||
|
|
# 加载配置
|
|||
|
|
config = load_config()
|
|||
|
|
if not config:
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
target_group = config.get("target_group", "")
|
|||
|
|
if not target_group:
|
|||
|
|
print("错误: 配置文件中未指定 target_group")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"\n配置信息:")
|
|||
|
|
print(f" 目标群组: {target_group}")
|
|||
|
|
print(f" 输出文件: {config.get('output_file', 'jobs_data.json')}")
|
|||
|
|
print(f" 检查间隔: {config.get('check_interval', 5)}秒")
|
|||
|
|
|
|||
|
|
# 连接微信
|
|||
|
|
print("\n正在连接微信...")
|
|||
|
|
try:
|
|||
|
|
wx = WeChat()
|
|||
|
|
print(f"✓ 已连接微信,当前用户: {wx.nickname}")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"× 连接失败: {e}")
|
|||
|
|
print("请确保:")
|
|||
|
|
print(" 1. 已安装依赖: pip install -e .")
|
|||
|
|
print(" 2. 微信 3.9 已登录并保持主窗口打开")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 添加监听
|
|||
|
|
print(f"\n正在添加监听: {target_group}")
|
|||
|
|
result = wx.AddListenChat(
|
|||
|
|
nickname=target_group,
|
|||
|
|
callback=lambda msg, chat: on_message(msg, chat, config)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if isinstance(result, str) and "失败" in result:
|
|||
|
|
print(f"× 添加监听失败: {result}")
|
|||
|
|
print(f"提示: 请确保群名称 '{target_group}' 正确")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"✓ 成功监听群组: {target_group}")
|
|||
|
|
print("\n开始监听消息...")
|
|||
|
|
print("按 Ctrl+C 停止监听\n")
|
|||
|
|
print("-" * 60)
|
|||
|
|
|
|||
|
|
# 保持运行
|
|||
|
|
try:
|
|||
|
|
wx.KeepRunning()
|
|||
|
|
except KeyboardInterrupt:
|
|||
|
|
print("\n\n正在停止监听...")
|
|||
|
|
wx.StopListening()
|
|||
|
|
print("程序已退出")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|