Files
ks-app-employment-service/utils/markdownParser.js

157 lines
6.1 KiB
JavaScript
Raw Permalink Normal View History

2025-03-28 15:19:42 +08:00
import MarkdownIt from '@/lib/markdown-it.min.js';
import hljs from "@/lib/highlight/highlight-uni.min.js";
import parseHtml from '@/lib/html-parser.js';
// import DOMPurify from '@/lib/dompurify@3.2.4es.js';
export let codeDataList = []
2025-08-20 13:38:47 +08:00
export let jobMoreMap = new Map()
2025-03-28 15:19:42 +08:00
const md = new MarkdownIt({
html: true, // 允许 HTML 标签
linkify: true, // 自动解析 URL
typographer: true, // 美化标点符号
tables: true,
breaks: true, // 让 \n 自动换行
langPrefix: 'language-', // 代码高亮前缀
// 如果结果以 <pre ... 开头,内部包装器则会跳过。
highlight: function(str, lang) {
2025-04-10 10:59:25 +08:00
if (lang === 'job-json') {
const result = safeExtractJson(str);
2025-08-20 13:38:47 +08:00
if (result) { // json解析成功
const jobId = result.appJobUrl.split('jobId=')[1]
2026-01-22 14:05:22 +08:00
let domContext = `<a class="custom-card" data-job-id="${jobId}"><div class="card-title"><span class="title-text">${result.jobTitle}</span><div class="card-salary">${result.salary}</div></div><div class="card-company">${result.location}·${result.companyName}</div><div class="card-info"><div class="info-item"><div class="card-tag">${result.education}</div><div class="card-tag">${result.experience}</div></div><div class="info-item">查看详情<div class="position-nav"></div></div></div></a>`
2025-08-20 13:38:47 +08:00
if (result.data) {
jobMoreMap.set(jobId, result.data)
2026-01-22 14:05:22 +08:00
domContext += `<a class="custom-more" data-job-id="${jobId}">查看更多岗位<div class="more-icon"></div></a>`
2025-08-20 13:38:47 +08:00
}
return domContext
}
2025-04-10 10:59:25 +08:00
}
// 代码块
2025-03-28 15:19:42 +08:00
let preCode = ""
try {
preCode = hljs.highlightAuto(str).value
} catch (err) {
2026-01-22 14:05:22 +08:00
preCode = md.utils.escapeHtml(str);
2025-03-28 15:19:42 +08:00
}
2025-04-10 10:59:25 +08:00
// 以换行进行分割 , 按行拆分代码
2025-03-28 15:19:42 +08:00
const lines = preCode.split(/\n/).slice(0, -1);
const html = lines
.map((line, index) =>
line ?
`<li><span class="line-num" data-line="${index + 1}"></span>${line}</li>` :
2026-01-22 14:05:22 +08:00
'<li></li>'
2025-03-28 15:19:42 +08:00
)
.join('');
// 代码复制功能
const cacheIndex = codeDataList.length;
codeDataList.push(str);
return `
<div class="code-container">
<div class="code-header">
<span class="lang-label">${lang || 'plaintext'}</span>
<a class="copy-btn" data-copy-index="${cacheIndex}">复制代码</a>
</div>
<pre class="hljs"><code><ol>${html}</ol></code></pre>
</div>
`;
}
})
2025-08-20 13:38:47 +08:00
function extractFirstJson(text) {
let stack = [];
let startIndex = -1;
let endIndex = -1;
for (let i = 0; i < text.length; i++) {
const char = text[i];
if (char === '{') {
if (stack.length === 0) startIndex = i; // 记录第一个 '{' 的位置
stack.push(char);
} else if (char === '}') {
stack.pop();
if (stack.length === 0) {
endIndex = i; // 找到配对的 '}'
break;
}
}
}
if (startIndex !== -1 && endIndex !== -1) {
const jsonString = text.slice(startIndex, endIndex + 1);
try {
const jsonObject = JSON.parse(jsonString);
return jsonObject;
} catch (e) {
return null; // 如果不是有效的 JSON
}
}
return null; // 如果没有找到有效的 JSON 对象
}
2025-04-10 10:59:25 +08:00
function safeExtractJson(text) {
try {
2025-08-20 13:38:47 +08:00
const jsonObject = extractFirstJson(text);
return jsonObject
2025-04-10 10:59:25 +08:00
} catch (e) {
console.error('JSON 解析失败:', e);
}
return null;
}
2025-08-20 13:38:47 +08:00
export function clearJobMoreMap() { // 切换对话清空
jobMoreMap.clear()
}
2025-03-28 15:19:42 +08:00
export function parseMarkdown(content) {
if (!content) {
2026-01-22 14:05:22 +08:00
return [] //处理特殊情况,比如网络异常导致的响应的 content 的值为空
2025-03-28 15:19:42 +08:00
}
2026-01-22 14:05:22 +08:00
// 过滤掉<think>标签及其内容这些是AI内部思考过程不应该显示给用户
// 1. 处理原始标签(支持多行)
content = content.replace(/<\s*think\s*>[\s\S]*?<\s*\/\s*think\s*>/gi, '')
// 2. 处理HTML编码的标签
content = content.replace(/&lt;\s*think\s*&gt;[\s\S]*?&lt;\s*\/\s*think\s*&gt;/gi, '')
// 3. 处理部分编码的标签
content = content.replace(/&lt;\s*think\s*>/gi, '')
content = content.replace(/<\s*\/\s*think\s*&gt;/gi, '')
2025-04-10 10:59:25 +08:00
codeDataList = []
2025-03-28 15:19:42 +08:00
const unsafeHtml = md.render(content || '')
2026-01-22 14:05:22 +08:00
// 在markdown渲染后再次过滤确保没有遗漏
let filteredHtml = unsafeHtml
// 1. 处理原始标签(支持多行)
filteredHtml = filteredHtml.replace(/<\s*think\s*>[\s\S]*?<\s*\/\s*think\s*>/gi, '')
// 2. 处理HTML编码的标签
filteredHtml = filteredHtml.replace(/&lt;\s*think\s*&gt;[\s\S]*?&lt;\s*\/\s*think\s*&gt;/gi, '')
// 3. 处理部分编码的标签
filteredHtml = filteredHtml.replace(/&lt;\s*think\s*>/gi, '')
filteredHtml = filteredHtml.replace(/<\s*\/\s*think\s*&gt;/gi, '')
// 4. 单独处理剩余的think标签对
filteredHtml = filteredHtml.replace(/&lt;think&gt;/gi, '')
filteredHtml = filteredHtml.replace(/&lt;\/think&gt;/gi, '')
filteredHtml = filteredHtml.replace(/<think>/gi, '')
filteredHtml = filteredHtml.replace(/<\/think>/gi, '')
// 根据平台返回不同的内容格式
// 微信小程序返回rich-text组件支持的nodes格式
// H5直接返回HTML字符串避免HTML解析错误
if (process.env.UNI_PLATFORM === 'mp-weixin') {
try {
return parseHtml(filteredHtml)
} catch (error) {
console.error('HTML解析失败:', error)
// 解析失败时返回空数组,避免页面崩溃
return []
}
} else {
// H5端直接返回HTML字符串
return filteredHtml
}
2025-03-28 15:19:42 +08:00
}