- Add wxauto package with WeChat UI automation and message handling capabilities - Implement job_extractor.py for automated job posting extraction from WeChat groups - Add job_extractor_gui.py providing graphical interface for job extraction tool - Create comprehensive documentation in Chinese covering GUI usage, multi-group support, and quick start guides - Add build configuration files (build_exe.py, build_exe.spec) for packaging as standalone executable - Include utility scripts for WeChat interaction (auto_send_msg.py, get_history.py, receive_file_transfer.py) - Add project configuration files (pyproject.toml, setup.cfg, requirements.txt) - Include test files (test_api.py, test_com_fix.py) for API and compatibility validation - Add Apache 2.0 LICENSE and comprehensive README documentation - Configure .gitignore to exclude build artifacts, logs, and temporary files
224 lines
6.6 KiB
Python
224 lines
6.6 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
从指定微信群拉取消息,使用百炼API提取岗位结构化数据
|
||
运行前请确保:1) 已安装依赖 pip install -e .
|
||
2) 电脑已登录微信 3.9 版本,且主窗口已打开
|
||
3) 已配置 config.json 文件
|
||
"""
|
||
import sys
|
||
import os
|
||
import json
|
||
import time
|
||
import requests
|
||
from datetime import datetime
|
||
|
||
_script_dir = os.path.dirname(os.path.abspath(__file__))
|
||
if _script_dir not in sys.path:
|
||
sys.path.insert(0, _script_dir)
|
||
|
||
from wxauto import WeChat
|
||
|
||
|
||
def load_config():
|
||
"""加载配置文件"""
|
||
config_path = os.path.join(_script_dir, "config.json")
|
||
if not os.path.exists(config_path):
|
||
print(f"配置文件不存在: {config_path}")
|
||
return None
|
||
|
||
with open(config_path, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
|
||
|
||
def extract_job_info(message_content, api_url, api_key):
|
||
"""使用百炼API提取岗位信息"""
|
||
prompt = f"""请从以下消息中提取招聘岗位信息,并以JSON格式返回。如果消息不包含招聘信息,返回空对象。
|
||
|
||
要提取的字段:
|
||
- job_name: 工作名称
|
||
- job_description: 工作描述
|
||
- job_location: 工作地点
|
||
- salary_min: 月薪最低(数字,单位:元)
|
||
- salary_max: 月薪最高(数字,单位:元)
|
||
- company_name: 公司名称
|
||
- contact_person: 联系人
|
||
- contact_info: 联系方式(电话/微信等)
|
||
|
||
消息内容:
|
||
{message_content}
|
||
|
||
请直接返回JSON格式,不要包含其他说明文字。"""
|
||
|
||
headers = {
|
||
"Content-Type": "application/json",
|
||
"Authorization": f"Bearer {api_key}"
|
||
}
|
||
|
||
payload = {
|
||
"model": "qwen-plus",
|
||
"input": {
|
||
"messages": [
|
||
{
|
||
"role": "system",
|
||
"content": "你是一个专业的招聘信息提取助手,擅长从文本中提取结构化的岗位信息。"
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": prompt
|
||
}
|
||
]
|
||
},
|
||
"parameters": {
|
||
"result_format": "message"
|
||
}
|
||
}
|
||
|
||
try:
|
||
response = requests.post(api_url, headers=headers, json=payload, timeout=30)
|
||
response.raise_for_status()
|
||
result = response.json()
|
||
|
||
if "output" in result and "choices" in result["output"]:
|
||
content = result["output"]["choices"][0]["message"]["content"]
|
||
# 尝试解析JSON
|
||
try:
|
||
job_data = json.loads(content)
|
||
return job_data if job_data else None
|
||
except json.JSONDecodeError:
|
||
print(f"API返回内容无法解析为JSON: {content}")
|
||
return None
|
||
else:
|
||
print(f"API返回格式异常: {result}")
|
||
return None
|
||
|
||
except requests.exceptions.RequestException as e:
|
||
print(f"API请求失败: {e}")
|
||
return None
|
||
except Exception as e:
|
||
print(f"提取岗位信息时发生错误: {e}")
|
||
return None
|
||
|
||
|
||
def save_job_data(job_data, output_file):
|
||
"""保存岗位数据到文件"""
|
||
output_path = os.path.join(_script_dir, output_file)
|
||
|
||
# 读取现有数据
|
||
existing_data = []
|
||
if os.path.exists(output_path):
|
||
try:
|
||
with open(output_path, "r", encoding="utf-8") as f:
|
||
existing_data = json.load(f)
|
||
except:
|
||
existing_data = []
|
||
|
||
# 添加新数据
|
||
existing_data.append(job_data)
|
||
|
||
# 保存
|
||
with open(output_path, "w", encoding="utf-8") as f:
|
||
json.dump(existing_data, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"岗位数据已保存到: {output_path}")
|
||
|
||
|
||
def on_message(msg, chat, config):
|
||
"""消息处理回调函数"""
|
||
print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 收到新消息")
|
||
print(f"发送者: {msg.sender}")
|
||
print(f"消息类型: {msg.type}")
|
||
print(f"消息内容: {msg.content}")
|
||
|
||
# 只处理文本消息
|
||
if msg.type != "text" or not msg.content:
|
||
print("跳过非文本消息")
|
||
return
|
||
|
||
# 使用百炼API提取岗位信息
|
||
print("正在分析消息内容...")
|
||
job_info = extract_job_info(
|
||
msg.content,
|
||
config["bailian_api_url"],
|
||
config["api_key"]
|
||
)
|
||
|
||
if job_info and any(job_info.values()):
|
||
print("✓ 提取到岗位信息:")
|
||
print(json.dumps(job_info, ensure_ascii=False, indent=2))
|
||
|
||
# 添加元数据
|
||
job_info["_metadata"] = {
|
||
"source": "wechat_group",
|
||
"group_name": config["target_group"],
|
||
"sender": msg.sender,
|
||
"extract_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||
"original_message": msg.content
|
||
}
|
||
|
||
# 保存数据
|
||
save_job_data(job_info, config["output_file"])
|
||
else:
|
||
print("× 未提取到有效岗位信息")
|
||
|
||
|
||
def main():
|
||
print("=" * 60)
|
||
print("微信群岗位信息提取工具")
|
||
print("=" * 60)
|
||
|
||
# 加载配置
|
||
config = load_config()
|
||
if not config:
|
||
return
|
||
|
||
target_group = config.get("target_group", "")
|
||
if not target_group:
|
||
print("错误: 配置文件中未指定 target_group")
|
||
return
|
||
|
||
print(f"\n配置信息:")
|
||
print(f" 目标群组: {target_group}")
|
||
print(f" 输出文件: {config.get('output_file', 'jobs_data.json')}")
|
||
print(f" 检查间隔: {config.get('check_interval', 5)}秒")
|
||
|
||
# 连接微信
|
||
print("\n正在连接微信...")
|
||
try:
|
||
wx = WeChat()
|
||
print(f"✓ 已连接微信,当前用户: {wx.nickname}")
|
||
except Exception as e:
|
||
print(f"× 连接失败: {e}")
|
||
print("请确保:")
|
||
print(" 1. 已安装依赖: pip install -e .")
|
||
print(" 2. 微信 3.9 已登录并保持主窗口打开")
|
||
return
|
||
|
||
# 添加监听
|
||
print(f"\n正在添加监听: {target_group}")
|
||
result = wx.AddListenChat(
|
||
nickname=target_group,
|
||
callback=lambda msg, chat: on_message(msg, chat, config)
|
||
)
|
||
|
||
if isinstance(result, str) and "失败" in result:
|
||
print(f"× 添加监听失败: {result}")
|
||
print(f"提示: 请确保群名称 '{target_group}' 正确")
|
||
return
|
||
|
||
print(f"✓ 成功监听群组: {target_group}")
|
||
print("\n开始监听消息...")
|
||
print("按 Ctrl+C 停止监听\n")
|
||
print("-" * 60)
|
||
|
||
# 保持运行
|
||
try:
|
||
wx.KeepRunning()
|
||
except KeyboardInterrupt:
|
||
print("\n\n正在停止监听...")
|
||
wx.StopListening()
|
||
print("程序已退出")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|