Files
ks-app-employment-service/utils/markdownParser.js
2026-01-22 14:05:22 +08:00

157 lines
6.1 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import MarkdownIt from '@/lib/markdown-it.min.js';
import hljs from "@/lib/highlight/highlight-uni.min.js";
import parseHtml from '@/lib/html-parser.js';
// import DOMPurify from '@/lib/dompurify@3.2.4es.js';
export let codeDataList = []
export let jobMoreMap = new Map()
const md = new MarkdownIt({
html: true, // 允许 HTML 标签
linkify: true, // 自动解析 URL
typographer: true, // 美化标点符号
tables: true,
breaks: true, // 让 \n 自动换行
langPrefix: 'language-', // 代码高亮前缀
// 如果结果以 <pre ... 开头,内部包装器则会跳过。
highlight: function(str, lang) {
if (lang === 'job-json') {
const result = safeExtractJson(str);
if (result) { // json解析成功
const jobId = result.appJobUrl.split('jobId=')[1]
let domContext = `<a class="custom-card" data-job-id="${jobId}"><div class="card-title"><span class="title-text">${result.jobTitle}</span><div class="card-salary">${result.salary}</div></div><div class="card-company">${result.location}·${result.companyName}</div><div class="card-info"><div class="info-item"><div class="card-tag">${result.education}</div><div class="card-tag">${result.experience}</div></div><div class="info-item">查看详情<div class="position-nav"></div></div></div></a>`
if (result.data) {
jobMoreMap.set(jobId, result.data)
domContext += `<a class="custom-more" data-job-id="${jobId}">查看更多岗位<div class="more-icon"></div></a>`
}
return domContext
}
}
// 代码块
let preCode = ""
try {
preCode = hljs.highlightAuto(str).value
} catch (err) {
preCode = md.utils.escapeHtml(str);
}
// 以换行进行分割 , 按行拆分代码
const lines = preCode.split(/\n/).slice(0, -1);
const html = lines
.map((line, index) =>
line ?
`<li><span class="line-num" data-line="${index + 1}"></span>${line}</li>` :
'<li></li>'
)
.join('');
// 代码复制功能
const cacheIndex = codeDataList.length;
codeDataList.push(str);
return `
<div class="code-container">
<div class="code-header">
<span class="lang-label">${lang || 'plaintext'}</span>
<a class="copy-btn" data-copy-index="${cacheIndex}">复制代码</a>
</div>
<pre class="hljs"><code><ol>${html}</ol></code></pre>
</div>
`;
}
})
function extractFirstJson(text) {
let stack = [];
let startIndex = -1;
let endIndex = -1;
for (let i = 0; i < text.length; i++) {
const char = text[i];
if (char === '{') {
if (stack.length === 0) startIndex = i; // 记录第一个 '{' 的位置
stack.push(char);
} else if (char === '}') {
stack.pop();
if (stack.length === 0) {
endIndex = i; // 找到配对的 '}'
break;
}
}
}
if (startIndex !== -1 && endIndex !== -1) {
const jsonString = text.slice(startIndex, endIndex + 1);
try {
const jsonObject = JSON.parse(jsonString);
return jsonObject;
} catch (e) {
return null; // 如果不是有效的 JSON
}
}
return null; // 如果没有找到有效的 JSON 对象
}
function safeExtractJson(text) {
try {
const jsonObject = extractFirstJson(text);
return jsonObject
} catch (e) {
console.error('JSON 解析失败:', e);
}
return null;
}
export function clearJobMoreMap() { // 切换对话清空
jobMoreMap.clear()
}
export function parseMarkdown(content) {
if (!content) {
return [] //处理特殊情况,比如网络异常导致的响应的 content 的值为空
}
// 过滤掉<think>标签及其内容这些是AI内部思考过程不应该显示给用户
// 1. 处理原始标签(支持多行)
content = content.replace(/<\s*think\s*>[\s\S]*?<\s*\/\s*think\s*>/gi, '')
// 2. 处理HTML编码的标签
content = content.replace(/&lt;\s*think\s*&gt;[\s\S]*?&lt;\s*\/\s*think\s*&gt;/gi, '')
// 3. 处理部分编码的标签
content = content.replace(/&lt;\s*think\s*>/gi, '')
content = content.replace(/<\s*\/\s*think\s*&gt;/gi, '')
codeDataList = []
const unsafeHtml = md.render(content || '')
// 在markdown渲染后再次过滤确保没有遗漏
let filteredHtml = unsafeHtml
// 1. 处理原始标签(支持多行)
filteredHtml = filteredHtml.replace(/<\s*think\s*>[\s\S]*?<\s*\/\s*think\s*>/gi, '')
// 2. 处理HTML编码的标签
filteredHtml = filteredHtml.replace(/&lt;\s*think\s*&gt;[\s\S]*?&lt;\s*\/\s*think\s*&gt;/gi, '')
// 3. 处理部分编码的标签
filteredHtml = filteredHtml.replace(/&lt;\s*think\s*>/gi, '')
filteredHtml = filteredHtml.replace(/<\s*\/\s*think\s*&gt;/gi, '')
// 4. 单独处理剩余的think标签对
filteredHtml = filteredHtml.replace(/&lt;think&gt;/gi, '')
filteredHtml = filteredHtml.replace(/&lt;\/think&gt;/gi, '')
filteredHtml = filteredHtml.replace(/<think>/gi, '')
filteredHtml = filteredHtml.replace(/<\/think>/gi, '')
// 根据平台返回不同的内容格式
// 微信小程序返回rich-text组件支持的nodes格式
// H5直接返回HTML字符串避免HTML解析错误
if (process.env.UNI_PLATFORM === 'mp-weixin') {
try {
return parseHtml(filteredHtml)
} catch (error) {
console.error('HTML解析失败:', error)
// 解析失败时返回空数组,避免页面崩溃
return []
}
} else {
// H5端直接返回HTML字符串
return filteredHtml
}
}