feat: Initialize wxauto WeChat automation project with job extraction tools
- Add wxauto package with WeChat UI automation and message handling capabilities - Implement job_extractor.py for automated job posting extraction from WeChat groups - Add job_extractor_gui.py providing graphical interface for job extraction tool - Create comprehensive documentation in Chinese covering GUI usage, multi-group support, and quick start guides - Add build configuration files (build_exe.py, build_exe.spec) for packaging as standalone executable - Include utility scripts for WeChat interaction (auto_send_msg.py, get_history.py, receive_file_transfer.py) - Add project configuration files (pyproject.toml, setup.cfg, requirements.txt) - Include test files (test_api.py, test_com_fix.py) for API and compatibility validation - Add Apache 2.0 LICENSE and comprehensive README documentation - Configure .gitignore to exclude build artifacts, logs, and temporary files
This commit is contained in:
223
job_extractor.py
Normal file
223
job_extractor.py
Normal file
@@ -0,0 +1,223 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从指定微信群拉取消息,使用百炼API提取岗位结构化数据
|
||||
运行前请确保:1) 已安装依赖 pip install -e .
|
||||
2) 电脑已登录微信 3.9 版本,且主窗口已打开
|
||||
3) 已配置 config.json 文件
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
_script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
if _script_dir not in sys.path:
|
||||
sys.path.insert(0, _script_dir)
|
||||
|
||||
from wxauto import WeChat
|
||||
|
||||
|
||||
def load_config():
|
||||
"""加载配置文件"""
|
||||
config_path = os.path.join(_script_dir, "config.json")
|
||||
if not os.path.exists(config_path):
|
||||
print(f"配置文件不存在: {config_path}")
|
||||
return None
|
||||
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def extract_job_info(message_content, api_url, api_key):
|
||||
"""使用百炼API提取岗位信息"""
|
||||
prompt = f"""请从以下消息中提取招聘岗位信息,并以JSON格式返回。如果消息不包含招聘信息,返回空对象。
|
||||
|
||||
要提取的字段:
|
||||
- job_name: 工作名称
|
||||
- job_description: 工作描述
|
||||
- job_location: 工作地点
|
||||
- salary_min: 月薪最低(数字,单位:元)
|
||||
- salary_max: 月薪最高(数字,单位:元)
|
||||
- company_name: 公司名称
|
||||
- contact_person: 联系人
|
||||
- contact_info: 联系方式(电话/微信等)
|
||||
|
||||
消息内容:
|
||||
{message_content}
|
||||
|
||||
请直接返回JSON格式,不要包含其他说明文字。"""
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": "qwen-plus",
|
||||
"input": {
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "你是一个专业的招聘信息提取助手,擅长从文本中提取结构化的岗位信息。"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
]
|
||||
},
|
||||
"parameters": {
|
||||
"result_format": "message"
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(api_url, headers=headers, json=payload, timeout=30)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if "output" in result and "choices" in result["output"]:
|
||||
content = result["output"]["choices"][0]["message"]["content"]
|
||||
# 尝试解析JSON
|
||||
try:
|
||||
job_data = json.loads(content)
|
||||
return job_data if job_data else None
|
||||
except json.JSONDecodeError:
|
||||
print(f"API返回内容无法解析为JSON: {content}")
|
||||
return None
|
||||
else:
|
||||
print(f"API返回格式异常: {result}")
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"API请求失败: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"提取岗位信息时发生错误: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def save_job_data(job_data, output_file):
|
||||
"""保存岗位数据到文件"""
|
||||
output_path = os.path.join(_script_dir, output_file)
|
||||
|
||||
# 读取现有数据
|
||||
existing_data = []
|
||||
if os.path.exists(output_path):
|
||||
try:
|
||||
with open(output_path, "r", encoding="utf-8") as f:
|
||||
existing_data = json.load(f)
|
||||
except:
|
||||
existing_data = []
|
||||
|
||||
# 添加新数据
|
||||
existing_data.append(job_data)
|
||||
|
||||
# 保存
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(existing_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"岗位数据已保存到: {output_path}")
|
||||
|
||||
|
||||
def on_message(msg, chat, config):
|
||||
"""消息处理回调函数"""
|
||||
print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 收到新消息")
|
||||
print(f"发送者: {msg.sender}")
|
||||
print(f"消息类型: {msg.type}")
|
||||
print(f"消息内容: {msg.content}")
|
||||
|
||||
# 只处理文本消息
|
||||
if msg.type != "text" or not msg.content:
|
||||
print("跳过非文本消息")
|
||||
return
|
||||
|
||||
# 使用百炼API提取岗位信息
|
||||
print("正在分析消息内容...")
|
||||
job_info = extract_job_info(
|
||||
msg.content,
|
||||
config["bailian_api_url"],
|
||||
config["api_key"]
|
||||
)
|
||||
|
||||
if job_info and any(job_info.values()):
|
||||
print("✓ 提取到岗位信息:")
|
||||
print(json.dumps(job_info, ensure_ascii=False, indent=2))
|
||||
|
||||
# 添加元数据
|
||||
job_info["_metadata"] = {
|
||||
"source": "wechat_group",
|
||||
"group_name": config["target_group"],
|
||||
"sender": msg.sender,
|
||||
"extract_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
"original_message": msg.content
|
||||
}
|
||||
|
||||
# 保存数据
|
||||
save_job_data(job_info, config["output_file"])
|
||||
else:
|
||||
print("× 未提取到有效岗位信息")
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("微信群岗位信息提取工具")
|
||||
print("=" * 60)
|
||||
|
||||
# 加载配置
|
||||
config = load_config()
|
||||
if not config:
|
||||
return
|
||||
|
||||
target_group = config.get("target_group", "")
|
||||
if not target_group:
|
||||
print("错误: 配置文件中未指定 target_group")
|
||||
return
|
||||
|
||||
print(f"\n配置信息:")
|
||||
print(f" 目标群组: {target_group}")
|
||||
print(f" 输出文件: {config.get('output_file', 'jobs_data.json')}")
|
||||
print(f" 检查间隔: {config.get('check_interval', 5)}秒")
|
||||
|
||||
# 连接微信
|
||||
print("\n正在连接微信...")
|
||||
try:
|
||||
wx = WeChat()
|
||||
print(f"✓ 已连接微信,当前用户: {wx.nickname}")
|
||||
except Exception as e:
|
||||
print(f"× 连接失败: {e}")
|
||||
print("请确保:")
|
||||
print(" 1. 已安装依赖: pip install -e .")
|
||||
print(" 2. 微信 3.9 已登录并保持主窗口打开")
|
||||
return
|
||||
|
||||
# 添加监听
|
||||
print(f"\n正在添加监听: {target_group}")
|
||||
result = wx.AddListenChat(
|
||||
nickname=target_group,
|
||||
callback=lambda msg, chat: on_message(msg, chat, config)
|
||||
)
|
||||
|
||||
if isinstance(result, str) and "失败" in result:
|
||||
print(f"× 添加监听失败: {result}")
|
||||
print(f"提示: 请确保群名称 '{target_group}' 正确")
|
||||
return
|
||||
|
||||
print(f"✓ 成功监听群组: {target_group}")
|
||||
print("\n开始监听消息...")
|
||||
print("按 Ctrl+C 停止监听\n")
|
||||
print("-" * 60)
|
||||
|
||||
# 保持运行
|
||||
try:
|
||||
wx.KeepRunning()
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n正在停止监听...")
|
||||
wx.StopListening()
|
||||
print("程序已退出")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user