feat: Initialize wxauto WeChat automation project with job extraction tools

- Add wxauto package with WeChat UI automation and message handling capabilities - Implement job_extractor.py for automated job posting extraction from WeChat groups - Add job_extractor_gui.py providing graphical interface for job extraction tool - Create comprehensive documentation in Chinese covering GUI usage, multi-group support, and quick start guides - Add build configuration files (build_exe.py, build_exe.spec) for packaging as standalone executable - Include utility scripts for WeChat interaction (auto_send_msg.py, get_history.py, receive_file_transfer.py) - Add project configuration files (pyproject.toml, setup.cfg, requirements.txt) - Include test files (test_api.py, test_com_fix.py) for API and compatibility validation - Add Apache 2.0 LICENSE and comprehensive README documentation - Configure .gitignore to exclude build artifacts, logs, and temporary files
2026-02-11 14:49:38 +08:00
commit b66bac7ca8
52 changed files with 15318 additions and 0 deletions
--- a/job_extractor.py
+++ b/job_extractor.py
@@ -0,0 +1,223 @@
+# -*- coding: utf-8 -*-
+"""
+从指定微信群拉取消息，使用百炼API提取岗位结构化数据
+运行前请确保：1) 已安装依赖 pip install -e .
+              2) 电脑已登录微信 3.9 版本，且主窗口已打开
+              3) 已配置 config.json 文件
+"""
+import sys
+import os
+import json
+import time
+import requests
+from datetime import datetime
+
+_script_dir = os.path.dirname(os.path.abspath(__file__))
+if _script_dir not in sys.path:
+    sys.path.insert(0, _script_dir)
+
+from wxauto import WeChat
+
+
+def load_config():
+    """加载配置文件"""
+    config_path = os.path.join(_script_dir, "config.json")
+    if not os.path.exists(config_path):
+        print(f"配置文件不存在: {config_path}")
+        return None
+    
+    with open(config_path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def extract_job_info(message_content, api_url, api_key):
+    """使用百炼API提取岗位信息"""
+    prompt = f"""请从以下消息中提取招聘岗位信息，并以JSON格式返回。如果消息不包含招聘信息，返回空对象。
+
+要提取的字段：
+- job_name: 工作名称
+- job_description: 工作描述
+- job_location: 工作地点
+- salary_min: 月薪最低（数字，单位：元）
+- salary_max: 月薪最高（数字，单位：元）
+- company_name: 公司名称
+- contact_person: 联系人
+- contact_info: 联系方式（电话/微信等）
+
+消息内容：
+{message_content}
+
+请直接返回JSON格式，不要包含其他说明文字。"""
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    
+    payload = {
+        "model": "qwen-plus",
+        "input": {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "你是一个专业的招聘信息提取助手，擅长从文本中提取结构化的岗位信息。"
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        },
+        "parameters": {
+            "result_format": "message"
+        }
+    }
+    
+    try:
+        response = requests.post(api_url, headers=headers, json=payload, timeout=30)
+        response.raise_for_status()
+        result = response.json()
+        
+        if "output" in result and "choices" in result["output"]:
+            content = result["output"]["choices"][0]["message"]["content"]
+            # 尝试解析JSON
+            try:
+                job_data = json.loads(content)
+                return job_data if job_data else None
+            except json.JSONDecodeError:
+                print(f"API返回内容无法解析为JSON: {content}")
+                return None
+        else:
+            print(f"API返回格式异常: {result}")
+            return None
+            
+    except requests.exceptions.RequestException as e:
+        print(f"API请求失败: {e}")
+        return None
+    except Exception as e:
+        print(f"提取岗位信息时发生错误: {e}")
+        return None
+
+
+def save_job_data(job_data, output_file):
+    """保存岗位数据到文件"""
+    output_path = os.path.join(_script_dir, output_file)
+    
+    # 读取现有数据
+    existing_data = []
+    if os.path.exists(output_path):
+        try:
+            with open(output_path, "r", encoding="utf-8") as f:
+                existing_data = json.load(f)
+        except:
+            existing_data = []
+    
+    # 添加新数据
+    existing_data.append(job_data)
+    
+    # 保存
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(existing_data, f, ensure_ascii=False, indent=2)
+    
+    print(f"岗位数据已保存到: {output_path}")
+
+
+def on_message(msg, chat, config):
+    """消息处理回调函数"""
+    print(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 收到新消息")
+    print(f"发送者: {msg.sender}")
+    print(f"消息类型: {msg.type}")
+    print(f"消息内容: {msg.content}")
+    
+    # 只处理文本消息
+    if msg.type != "text" or not msg.content:
+        print("跳过非文本消息")
+        return
+    
+    # 使用百炼API提取岗位信息
+    print("正在分析消息内容...")
+    job_info = extract_job_info(
+        msg.content,
+        config["bailian_api_url"],
+        config["api_key"]
+    )
+    
+    if job_info and any(job_info.values()):
+        print("✓ 提取到岗位信息:")
+        print(json.dumps(job_info, ensure_ascii=False, indent=2))
+        
+        # 添加元数据
+        job_info["_metadata"] = {
+            "source": "wechat_group",
+            "group_name": config["target_group"],
+            "sender": msg.sender,
+            "extract_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            "original_message": msg.content
+        }
+        
+        # 保存数据
+        save_job_data(job_info, config["output_file"])
+    else:
+        print("× 未提取到有效岗位信息")
+
+
+def main():
+    print("=" * 60)
+    print("微信群岗位信息提取工具")
+    print("=" * 60)
+    
+    # 加载配置
+    config = load_config()
+    if not config:
+        return
+    
+    target_group = config.get("target_group", "")
+    if not target_group:
+        print("错误: 配置文件中未指定 target_group")
+        return
+    
+    print(f"\n配置信息:")
+    print(f"  目标群组: {target_group}")
+    print(f"  输出文件: {config.get('output_file', 'jobs_data.json')}")
+    print(f"  检查间隔: {config.get('check_interval', 5)}秒")
+    
+    # 连接微信
+    print("\n正在连接微信...")
+    try:
+        wx = WeChat()
+        print(f"✓ 已连接微信，当前用户: {wx.nickname}")
+    except Exception as e:
+        print(f"× 连接失败: {e}")
+        print("请确保：")
+        print("  1. 已安装依赖: pip install -e .")
+        print("  2. 微信 3.9 已登录并保持主窗口打开")
+        return
+    
+    # 添加监听
+    print(f"\n正在添加监听: {target_group}")
+    result = wx.AddListenChat(
+        nickname=target_group,
+        callback=lambda msg, chat: on_message(msg, chat, config)
+    )
+    
+    if isinstance(result, str) and "失败" in result:
+        print(f"× 添加监听失败: {result}")
+        print(f"提示: 请确保群名称 '{target_group}' 正确")
+        return
+    
+    print(f"✓ 成功监听群组: {target_group}")
+    print("\n开始监听消息...")
+    print("按 Ctrl+C 停止监听\n")
+    print("-" * 60)
+    
+    # 保持运行
+    try:
+        wx.KeepRunning()
+    except KeyboardInterrupt:
+        print("\n\n正在停止监听...")
+        wx.StopListening()
+        print("程序已退出")
+
+
+if __name__ == "__main__":
+    main()