feat: Initialize wxauto WeChat automation project with job extraction tools

- Add wxauto package with WeChat UI automation and message handling capabilities
- Implement job_extractor.py for automated job posting extraction from WeChat groups
- Add job_extractor_gui.py providing graphical interface for job extraction tool
- Create comprehensive documentation in Chinese covering GUI usage, multi-group support, and quick start guides
- Add build configuration files (build_exe.py, build_exe.spec) for packaging as standalone executable
- Include utility scripts for WeChat interaction (auto_send_msg.py, get_history.py, receive_file_transfer.py)
- Add project configuration files (pyproject.toml, setup.cfg, requirements.txt)
- Include test files (test_api.py, test_com_fix.py) for API and compatibility validation
- Add Apache 2.0 LICENSE and comprehensive README documentation
- Configure .gitignore to exclude build artifacts, logs, and temporary files
This commit is contained in:
2026-02-11 14:49:38 +08:00
commit b66bac7ca8
52 changed files with 15318 additions and 0 deletions

622
job_extractor_gui.py Normal file
View File

@@ -0,0 +1,622 @@
# -*- coding: utf-8 -*-
"""
微信群岗位信息提取工具 - GUI版本
支持多群组监听使用UUID作为岗位ID
"""
import sys
import os
import json
import time
import requests
import threading
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox
from datetime import datetime
from pathlib import Path
import uuid
# 初始化COM组件修复打包后的错误
import pythoncom
_script_dir = os.path.dirname(os.path.abspath(__file__))
if _script_dir not in sys.path:
sys.path.insert(0, _script_dir)
from wxauto import WeChat
class JobExtractorGUI:
def __init__(self, root):
self.root = root
self.root.title("微信群岗位信息提取工具 v1.1")
self.root.geometry("1200x700")
# 变量
self.wx = None
self.is_running = False
self.job_count = 0
self.config_file = "config.json"
self.output_file = "jobs_data.json"
self.active_groups = {} # 存储活跃的群组监听
# 加载配置
self.load_config()
# 创建界面
self.create_widgets()
# 绑定关闭事件
self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
def load_config(self):
"""加载配置"""
if os.path.exists(self.config_file):
try:
with open(self.config_file, "r", encoding="utf-8") as f:
self.config = json.load(f)
except:
self.config = self.get_default_config()
else:
self.config = self.get_default_config()
def get_default_config(self):
"""获取默认配置"""
return {
"target_groups": [],
"bailian_api_url": "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation",
"api_key": "",
"output_file": "jobs_data.json"
}
def save_config(self):
"""保存配置"""
try:
with open(self.config_file, "w", encoding="utf-8") as f:
json.dump(self.config, f, ensure_ascii=False, indent=2)
return True
except Exception as e:
messagebox.showerror("错误", f"保存配置失败: {e}")
return False
def create_widgets(self):
"""创建界面组件"""
# 顶部配置区域
config_frame = ttk.LabelFrame(self.root, text="配置", padding=10)
config_frame.pack(fill=tk.X, padx=10, pady=5)
# 目标群组(支持多个,用逗号分隔)
ttk.Label(config_frame, text="目标群组:").grid(row=0, column=0, sticky=tk.W, pady=5)
ttk.Label(config_frame, text="(多个群组用逗号分隔)", font=("", 8), foreground="gray").grid(
row=0, column=1, sticky=tk.W, padx=5, pady=5
)
self.group_entry = ttk.Entry(config_frame, width=60)
self.group_entry.grid(row=1, column=0, columnspan=2, sticky=tk.W, padx=5, pady=5)
# 从配置加载群组
groups = self.config.get("target_groups", [])
if groups:
self.group_entry.insert(0, ", ".join(groups))
# API密钥只读显示打包时配置
ttk.Label(config_frame, text="API密钥:").grid(row=2, column=0, sticky=tk.W, pady=5)
api_key = self.config.get("api_key", "")
if api_key:
masked_key = api_key[:10] + "..." + api_key[-10:] if len(api_key) > 20 else api_key
ttk.Label(config_frame, text=masked_key, foreground="green").grid(
row=2, column=1, sticky=tk.W, padx=5, pady=5
)
else:
ttk.Label(config_frame, text="未配置", foreground="red").grid(
row=2, column=1, sticky=tk.W, padx=5, pady=5
)
# 保存配置按钮
ttk.Button(config_frame, text="保存群组配置", command=self.save_config_click).grid(
row=1, column=2, padx=10
)
# 控制区域
control_frame = ttk.Frame(self.root, padding=10)
control_frame.pack(fill=tk.X, padx=10)
self.start_btn = ttk.Button(control_frame, text="开始任务", command=self.start_task, width=15)
self.start_btn.pack(side=tk.LEFT, padx=5)
self.stop_btn = ttk.Button(control_frame, text="停止任务", command=self.stop_task,
width=15, state=tk.DISABLED)
self.stop_btn.pack(side=tk.LEFT, padx=5)
ttk.Button(control_frame, text="清空列表", command=self.clear_jobs, width=15).pack(
side=tk.LEFT, padx=5
)
ttk.Button(control_frame, text="导出数据", command=self.export_data, width=15).pack(
side=tk.LEFT, padx=5
)
# 状态栏
status_frame = ttk.Frame(self.root)
status_frame.pack(fill=tk.X, padx=10, pady=5)
ttk.Label(status_frame, text="状态:").pack(side=tk.LEFT)
self.status_label = ttk.Label(status_frame, text="未启动", foreground="gray")
self.status_label.pack(side=tk.LEFT, padx=5)
ttk.Label(status_frame, text="已提取岗位:").pack(side=tk.LEFT, padx=(20, 0))
self.count_label = ttk.Label(status_frame, text="0", foreground="blue")
self.count_label.pack(side=tk.LEFT, padx=5)
# 岗位列表区域
list_frame = ttk.LabelFrame(self.root, text="提取的岗位信息", padding=10)
list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
# 创建Treeview
columns = ("group", "job_name", "company", "location", "salary", "contact", "time")
self.tree = ttk.Treeview(list_frame, columns=columns, show="tree headings", height=15)
# 设置列
self.tree.heading("#0", text="序号")
self.tree.heading("group", text="来源群组")
self.tree.heading("job_name", text="岗位名称")
self.tree.heading("company", text="公司")
self.tree.heading("location", text="地点")
self.tree.heading("salary", text="薪资")
self.tree.heading("contact", text="联系方式")
self.tree.heading("time", text="提取时间")
# 设置列宽
self.tree.column("#0", width=50)
self.tree.column("group", width=120)
self.tree.column("job_name", width=150)
self.tree.column("company", width=130)
self.tree.column("location", width=100)
self.tree.column("salary", width=100)
self.tree.column("contact", width=130)
self.tree.column("time", width=140)
# 滚动条
scrollbar = ttk.Scrollbar(list_frame, orient=tk.VERTICAL, command=self.tree.yview)
self.tree.configure(yscrollcommand=scrollbar.set)
self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# 双击查看详情
self.tree.bind("<Double-1>", self.show_job_detail)
# 日志区域
log_frame = ttk.LabelFrame(self.root, text="运行日志", padding=10)
log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.log_text = scrolledtext.ScrolledText(log_frame, height=8, wrap=tk.WORD)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 加载已有数据
self.load_existing_jobs()
def log(self, message):
"""添加日志"""
timestamp = datetime.now().strftime("%H:%M:%S")
log_msg = f"[{timestamp}] {message}\n"
self.log_text.insert(tk.END, log_msg)
self.log_text.see(tk.END)
self.root.update()
def save_config_click(self):
"""保存配置按钮点击"""
groups_text = self.group_entry.get().strip()
if not groups_text:
messagebox.showwarning("警告", "请输入至少一个目标群组名称")
return
# 解析群组列表(支持逗号、分号、换行分隔)
import re
groups = re.split(r'[,;\n]+', groups_text)
groups = [g.strip() for g in groups if g.strip()]
if not groups:
messagebox.showwarning("警告", "请输入至少一个有效的群组名称")
return
self.config["target_groups"] = groups
if self.save_config():
messagebox.showinfo("成功", f"已保存 {len(groups)} 个群组配置:\n" + "\n".join(f"- {g}" for g in groups))
self.log(f"配置已保存: {len(groups)} 个群组")
def start_task(self):
"""开始任务"""
# 验证配置
groups_text = self.group_entry.get().strip()
api_key = self.config.get("api_key", "")
if not groups_text:
messagebox.showwarning("警告", "请输入目标群组名称")
return
if not api_key:
messagebox.showerror("错误", "API密钥未配置请在config.json中配置后重新打包")
return
# 解析群组列表
import re
groups = re.split(r'[,;\n]+', groups_text)
groups = [g.strip() for g in groups if g.strip()]
if not groups:
messagebox.showwarning("警告", "请输入至少一个有效的群组名称")
return
# 更新配置
self.config["target_groups"] = groups
self.save_config()
# 启动监听线程
self.is_running = True
self.start_btn.config(state=tk.DISABLED)
self.stop_btn.config(state=tk.NORMAL)
self.status_label.config(text="正在启动...", foreground="orange")
threading.Thread(target=self.run_task, daemon=True).start()
def run_task(self):
"""运行任务(在线程中)"""
# 初始化COM组件每个线程都需要
pythoncom.CoInitialize()
try:
self.log("正在连接微信...")
self.wx = WeChat()
self.log(f"✓ 已连接微信,当前用户: {self.wx.nickname}")
# 获取要监听的群组列表
groups = self.config.get("target_groups", [])
if not groups:
self.log("× 错误: 未配置目标群组")
self.root.after(0, lambda: messagebox.showerror("错误", "未配置目标群组"))
self.root.after(0, self.stop_task)
return
# 为每个群组添加监听
success_count = 0
for group_name in groups:
self.log(f"正在添加监听: {group_name}")
# 创建带群组名称的回调函数
def make_callback(gname):
return lambda msg, chat: self.on_message(msg, chat, gname)
result = self.wx.AddListenChat(
nickname=group_name,
callback=make_callback(group_name)
)
if isinstance(result, str) and "失败" in result:
self.log(f"× 添加监听失败: {group_name} - {result}")
else:
self.log(f"✓ 成功监听群组: {group_name}")
self.active_groups[group_name] = result
success_count += 1
if success_count == 0:
self.log("× 错误: 所有群组监听都失败")
self.root.after(0, lambda: messagebox.showerror("错误", "所有群组监听都失败,请检查群组名称"))
self.root.after(0, self.stop_task)
return
self.log(f"✓ 成功监听 {success_count}/{len(groups)} 个群组")
self.root.after(0, lambda: self.status_label.config(
text=f"运行中 ({success_count}个群组)", foreground="green"
))
# 保持运行
while self.is_running:
time.sleep(1)
except Exception as e:
self.log(f"× 错误: {e}")
self.root.after(0, lambda: messagebox.showerror("错误", f"任务执行失败: {e}"))
self.root.after(0, self.stop_task)
finally:
# 清理COM组件
pythoncom.CoUninitialize()
def on_message(self, msg, chat, group_name):
"""消息回调"""
try:
self.log(f"[{group_name}] 收到消息 - 发送者: {msg.sender}, 类型: {msg.type}")
# 只处理文本消息
if msg.type != "text" or not msg.content:
return
self.log(f"[{group_name}] 正在分析消息内容...")
job_info = self.extract_job_info(msg.content)
if job_info and any(job_info.values()):
self.log(f"[{group_name}] ✓ 提取到岗位信息")
# 生成UUID作为岗位ID
job_id = str(uuid.uuid4())
# 添加元数据
job_info["_id"] = job_id
job_info["_metadata"] = {
"source": "wechat_group",
"group_name": group_name,
"sender": msg.sender,
"extract_time": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
"original_message": msg.content
}
# 保存并显示
self.root.after(0, lambda: self.add_job_to_list(job_info))
self.save_job_data(job_info)
else:
self.log(f"[{group_name}] × 未提取到有效岗位信息")
except Exception as e:
self.log(f"[{group_name}] × 处理消息时出错: {e}")
def extract_job_info(self, message_content):
"""提取岗位信息"""
prompt = f"""请从以下消息中提取招聘岗位信息并以JSON格式返回。如果消息不包含招聘信息返回空对象。
要提取的字段:
- job_name: 工作名称
- job_description: 工作描述
- job_location: 工作地点
- salary_min: 月薪最低(数字,单位:元)
- salary_max: 月薪最高(数字,单位:元)
- company_name: 公司名称
- contact_person: 联系人
- contact_info: 联系方式(电话/微信等)
消息内容:
{message_content}
请直接返回JSON格式不要包含其他说明文字。"""
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.config['api_key']}"
}
payload = {
"model": "qwen-plus",
"input": {
"messages": [
{
"role": "system",
"content": "你是一个专业的招聘信息提取助手,擅长从文本中提取结构化的岗位信息。"
},
{
"role": "user",
"content": prompt
}
]
},
"parameters": {
"result_format": "message"
}
}
try:
response = requests.post(
self.config["bailian_api_url"],
headers=headers,
json=payload,
timeout=30
)
response.raise_for_status()
result = response.json()
if "output" in result and "choices" in result["output"]:
content = result["output"]["choices"][0]["message"]["content"]
try:
job_data = json.loads(content)
return job_data if job_data else None
except json.JSONDecodeError:
return None
return None
except Exception as e:
self.log(f"API调用失败: {e}")
return None
def add_job_to_list(self, job_info):
"""添加岗位到列表"""
self.job_count += 1
# 格式化薪资
salary_min = job_info.get("salary_min", "")
salary_max = job_info.get("salary_max", "")
if salary_min and salary_max:
salary = f"{salary_min}-{salary_max}"
elif salary_min:
salary = f"{salary_min}+"
elif salary_max:
salary = f"<{salary_max}"
else:
salary = "面议"
# 联系方式
contact = job_info.get("contact_person", "")
if job_info.get("contact_info"):
contact += f" {job_info['contact_info']}" if contact else job_info['contact_info']
# 来源群组
group_name = job_info.get("_metadata", {}).get("group_name", "未知")
# 插入到树形视图
self.tree.insert("", 0, text=str(self.job_count), values=(
group_name,
job_info.get("job_name", "未知"),
job_info.get("company_name", "未知"),
job_info.get("job_location", "未知"),
salary,
contact,
job_info.get("_metadata", {}).get("extract_time", "")
), tags=(json.dumps(job_info, ensure_ascii=False),))
# 更新计数
self.count_label.config(text=str(self.job_count))
def save_job_data(self, job_data):
"""保存岗位数据"""
try:
existing_data = []
if os.path.exists(self.output_file):
with open(self.output_file, "r", encoding="utf-8") as f:
existing_data = json.load(f)
existing_data.append(job_data)
with open(self.output_file, "w", encoding="utf-8") as f:
json.dump(existing_data, f, ensure_ascii=False, indent=2)
except Exception as e:
self.log(f"保存数据失败: {e}")
def load_existing_jobs(self):
"""加载已有岗位数据"""
if not os.path.exists(self.output_file):
return
try:
with open(self.output_file, "r", encoding="utf-8") as f:
jobs = json.load(f)
for job in jobs:
self.add_job_to_list(job)
if jobs:
self.log(f"已加载 {len(jobs)} 条历史岗位数据")
except Exception as e:
self.log(f"加载历史数据失败: {e}")
def show_job_detail(self, event):
"""显示岗位详情"""
selection = self.tree.selection()
if not selection:
return
item = self.tree.item(selection[0])
tags = item.get("tags", ())
if not tags:
return
try:
job_info = json.loads(tags[0])
# 创建详情窗口
detail_win = tk.Toplevel(self.root)
detail_win.title("岗位详情")
detail_win.geometry("600x550")
text = scrolledtext.ScrolledText(detail_win, wrap=tk.WORD, padx=10, pady=10)
text.pack(fill=tk.BOTH, expand=True)
# 显示详情
text.insert(tk.END, f"岗位ID: {job_info.get('_id', '未知')}\n\n")
text.insert(tk.END, f"岗位名称: {job_info.get('job_name', '未知')}\n\n")
text.insert(tk.END, f"公司名称: {job_info.get('company_name', '未知')}\n\n")
text.insert(tk.END, f"工作地点: {job_info.get('job_location', '未知')}\n\n")
salary_min = job_info.get("salary_min", "")
salary_max = job_info.get("salary_max", "")
if salary_min or salary_max:
text.insert(tk.END, f"薪资范围: {salary_min}-{salary_max}\n\n")
if job_info.get("job_description"):
text.insert(tk.END, f"工作描述:\n{job_info['job_description']}\n\n")
if job_info.get("contact_person"):
text.insert(tk.END, f"联系人: {job_info['contact_person']}\n")
if job_info.get("contact_info"):
text.insert(tk.END, f"联系方式: {job_info['contact_info']}\n\n")
if "_metadata" in job_info:
meta = job_info["_metadata"]
text.insert(tk.END, "=" * 50 + "\n")
text.insert(tk.END, f"来源群组: {meta.get('group_name', '未知')}\n")
text.insert(tk.END, f"发送者: {meta.get('sender', '未知')}\n")
text.insert(tk.END, f"提取时间: {meta.get('extract_time', '未知')}\n\n")
if meta.get("original_message"):
text.insert(tk.END, "原始消息:\n")
text.insert(tk.END, meta["original_message"])
text.config(state=tk.DISABLED)
except Exception as e:
messagebox.showerror("错误", f"显示详情失败: {e}")
def stop_task(self):
"""停止任务"""
if self.wx and self.is_running:
self.log("正在停止监听...")
self.is_running = False
try:
self.wx.StopListening()
except:
pass
self.wx = None
self.start_btn.config(state=tk.NORMAL)
self.stop_btn.config(state=tk.DISABLED)
self.status_label.config(text="已停止", foreground="gray")
self.log("任务已停止")
def clear_jobs(self):
"""清空岗位列表"""
if messagebox.askyesno("确认", "确定要清空所有岗位数据吗?"):
# 清空树形视图
for item in self.tree.get_children():
self.tree.delete(item)
# 删除数据文件
if os.path.exists(self.output_file):
os.remove(self.output_file)
self.job_count = 0
self.count_label.config(text="0")
self.log("已清空所有岗位数据")
def export_data(self):
"""导出数据"""
if not os.path.exists(self.output_file):
messagebox.showinfo("提示", "暂无数据可导出")
return
try:
export_file = f"jobs_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(self.output_file, "r", encoding="utf-8") as f:
data = f.read()
with open(export_file, "w", encoding="utf-8") as f:
f.write(data)
messagebox.showinfo("成功", f"数据已导出到: {export_file}")
self.log(f"数据已导出到: {export_file}")
except Exception as e:
messagebox.showerror("错误", f"导出失败: {e}")
def on_closing(self):
"""关闭窗口"""
if self.is_running:
if messagebox.askyesno("确认", "任务正在运行,确定要退出吗?"):
self.stop_task()
self.root.destroy()
else:
self.root.destroy()
def main():
root = tk.Tk()
app = JobExtractorGUI(root)
root.mainloop()
if __name__ == "__main__":
main()