2025-12-19 10:25:10 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* FileValidator.js
|
2025-12-19 10:37:41 +08:00
|
|
|
|
* 封装好的文件安全校验类
|
2025-12-19 10:25:10 +08:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
// ==========================================
|
|
|
|
|
|
// 1. 预定义:已知文件类型的魔数 (Signature Database)
|
|
|
|
|
|
// ==========================================
|
|
|
|
|
|
const KNOWN_SIGNATURES = {
|
|
|
|
|
|
// === 图片 ===
|
|
|
|
|
|
png: '89504E470D0A1A0A',
|
|
|
|
|
|
jpg: 'FFD8FF',
|
|
|
|
|
|
jpeg: 'FFD8FF',
|
|
|
|
|
|
gif: '47494638',
|
|
|
|
|
|
webp: '52494646', // RIFF Header
|
|
|
|
|
|
|
|
|
|
|
|
// === 文档 (Office 新版 - ZIP 格式) ===
|
|
|
|
|
|
docx: '504B0304',
|
|
|
|
|
|
xlsx: '504B0304',
|
|
|
|
|
|
pptx: '504B0304',
|
|
|
|
|
|
|
|
|
|
|
|
// === 文档 (Office 旧版 - OLECF 格式) ===
|
|
|
|
|
|
doc: 'D0CF11E0',
|
|
|
|
|
|
xls: 'D0CF11E0',
|
|
|
|
|
|
ppt: 'D0CF11E0',
|
|
|
|
|
|
|
|
|
|
|
|
// === 其他 ===
|
|
|
|
|
|
pdf: '25504446',
|
|
|
|
|
|
|
|
|
|
|
|
// === 纯文本 (无固定魔数,需特殊算法检测) ===
|
|
|
|
|
|
txt: 'TYPE_TEXT',
|
|
|
|
|
|
csv: 'TYPE_TEXT',
|
|
|
|
|
|
md: 'TYPE_TEXT',
|
|
|
|
|
|
json: 'TYPE_TEXT',
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ==========================================
|
|
|
|
|
|
// 2. 核心类定义
|
|
|
|
|
|
// ==========================================
|
|
|
|
|
|
export class FileValidator {
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 构造函数
|
|
|
|
|
|
* @param {Object} options 配置项
|
|
|
|
|
|
* @param {number} [options.maxSizeMB=10] 最大文件大小 (MB)
|
|
|
|
|
|
* @param {string[]} [options.allowedExtensions] 允许的扩展名列表 (如 ['jpg', 'png']),默认允许全部已知类型
|
|
|
|
|
|
*/
|
|
|
|
|
|
version = '1.0.0';
|
|
|
|
|
|
constructor(options = {}) {
|
|
|
|
|
|
// 配置大小 (默认 10MB)
|
|
|
|
|
|
this.maxSizeMB = options.maxSizeMB || 10;
|
|
|
|
|
|
|
|
|
|
|
|
// 配置允许的类型
|
|
|
|
|
|
// 如果传入了 allowedExtensions,则只使用传入的;否则使用全部 KNOWN_SIGNATURES
|
|
|
|
|
|
if (options.allowedExtensions && Array.isArray(options.allowedExtensions)) {
|
|
|
|
|
|
this.allowedConfig = {};
|
|
|
|
|
|
options.allowedExtensions.forEach((ext) => {
|
|
|
|
|
|
const key = ext.toLowerCase();
|
|
|
|
|
|
if (KNOWN_SIGNATURES[key]) {
|
|
|
|
|
|
this.allowedConfig[key] = KNOWN_SIGNATURES[key];
|
|
|
|
|
|
} else {
|
|
|
|
|
|
console.warn(`[FileValidator] 未知的文件类型: .${key},已忽略`);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
} else {
|
|
|
|
|
|
this.allowedConfig = {
|
|
|
|
|
|
...KNOWN_SIGNATURES
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-19 11:35:00 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* 改进版:检查是否为有效的 UTF-8 文本
|
|
|
|
|
|
*/
|
|
|
|
|
|
_isValidUTF8(buffer) {
|
|
|
|
|
|
try {
|
|
|
|
|
|
// fatal: true 会在遇到无效编码时抛出错误,而不是用 替换
|
|
|
|
|
|
const decoder = new TextDecoder('utf-8', {
|
|
|
|
|
|
fatal: true
|
|
|
|
|
|
});
|
|
|
|
|
|
decoder.decode(buffer);
|
|
|
|
|
|
return true;
|
|
|
|
|
|
} catch (e) {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-19 10:25:10 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* 辅助:ArrayBuffer 转 Hex 字符串
|
|
|
|
|
|
*/
|
|
|
|
|
|
_bufferToHex(buffer) {
|
|
|
|
|
|
return Array.prototype.map
|
|
|
|
|
|
.call(new Uint8Array(buffer), (x) => ('00' + x.toString(16)).slice(-2))
|
|
|
|
|
|
.join('')
|
|
|
|
|
|
.toUpperCase();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2025-12-19 11:35:00 +08:00
|
|
|
|
* 【新增】统计 CSV 行数(严谨版:忽略引号内的换行符)
|
|
|
|
|
|
* 性能:对于 10MB 文件,现代浏览器处理通常在 100ms 以内
|
|
|
|
|
|
*/
|
|
|
|
|
|
_countCSVRows(buffer) {
|
|
|
|
|
|
const decoder = new TextDecoder('utf-8');
|
|
|
|
|
|
const text = decoder.decode(buffer);
|
|
|
|
|
|
|
|
|
|
|
|
let rowCount = 0;
|
|
|
|
|
|
let inQuote = false;
|
|
|
|
|
|
let len = text.length;
|
|
|
|
|
|
|
|
|
|
|
|
// 遍历每一个字符
|
|
|
|
|
|
for (let i = 0; i < len; i++) {
|
|
|
|
|
|
const char = text[i];
|
|
|
|
|
|
|
|
|
|
|
|
// 切换引号状态
|
|
|
|
|
|
if (char === '"') {
|
|
|
|
|
|
inQuote = !inQuote;
|
|
|
|
|
|
}
|
|
|
|
|
|
// 只有在非引号状态下的换行符,才算作一行结束
|
|
|
|
|
|
else if (char === '\n' && !inQuote) {
|
|
|
|
|
|
rowCount++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 处理最后一行没有换行符的情况(且文件不为空)
|
|
|
|
|
|
if (len > 0 && text[len - 1] !== '\n') {
|
|
|
|
|
|
rowCount++;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return rowCount;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 【核心】:校验纯文本内容
|
|
|
|
|
|
* 1. 检查是否包含乱码 (非 UTF-8)
|
|
|
|
|
|
* 2. 针对特定格式 (JSON) 进行语法解析
|
2025-12-19 10:25:10 +08:00
|
|
|
|
*/
|
2025-12-19 11:35:00 +08:00
|
|
|
|
_validateTextContent(buffer, extension) {
|
|
|
|
|
|
// 1. 尝试解码为 UTF-8
|
|
|
|
|
|
let contentStr = '';
|
|
|
|
|
|
try {
|
|
|
|
|
|
const decoder = new TextDecoder('utf-8', {
|
|
|
|
|
|
fatal: true
|
|
|
|
|
|
});
|
|
|
|
|
|
contentStr = decoder.decode(buffer);
|
|
|
|
|
|
} catch (e) {
|
|
|
|
|
|
// 如果解码失败,说明包含非文本的二进制数据
|
|
|
|
|
|
console.warn('UTF-8 解码失败', e);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 2. 检查是否存在过多的空字符 (二进制文件特征)
|
|
|
|
|
|
// 某些二进制文件可能勉强通过 UTF-8 解码,但会包含大量 \0
|
|
|
|
|
|
if (contentStr.includes('\u0000')) {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 3. 针对特定后缀进行语法校验 (可选,更严格)
|
|
|
|
|
|
if (extension === 'json') {
|
|
|
|
|
|
try {
|
|
|
|
|
|
JSON.parse(contentStr);
|
|
|
|
|
|
} catch (e) {
|
|
|
|
|
|
console.warn('无效的 JSON 格式');
|
|
|
|
|
|
return false;
|
2025-12-19 10:25:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-12-19 11:35:00 +08:00
|
|
|
|
|
|
|
|
|
|
// 如果是 CSV,可以简单检查行数(可选)
|
|
|
|
|
|
// if (extension === 'csv') { ... }
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
2025-12-19 10:25:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* 执行校验
|
|
|
|
|
|
* @param {File} file 文件对象
|
|
|
|
|
|
* @returns {Promise<boolean>}
|
|
|
|
|
|
*/
|
|
|
|
|
|
validate(file) {
|
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
|
// 1. 基础对象检查
|
|
|
|
|
|
if (!file || !file.name) return reject('无效的文件对象');
|
|
|
|
|
|
|
|
|
|
|
|
// 2. 大小检查
|
|
|
|
|
|
if (file.size > this.maxSizeMB * 1024 * 1024) {
|
|
|
|
|
|
return reject(`文件大小超出限制 (最大 ${this.maxSizeMB}MB)`);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 3. 后缀名检查
|
|
|
|
|
|
const fileName = file.name.toLowerCase();
|
|
|
|
|
|
const extension = fileName.substring(fileName.lastIndexOf('.') + 1);
|
|
|
|
|
|
|
|
|
|
|
|
// 检查是否在配置的白名单中
|
|
|
|
|
|
const expectedMagic = this.allowedConfig[extension];
|
|
|
|
|
|
if (!expectedMagic) {
|
|
|
|
|
|
return reject(`不支持的文件格式: .${extension}`);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 4. 读取二进制头进行魔数校验
|
|
|
|
|
|
const reader = new FileReader();
|
|
|
|
|
|
|
|
|
|
|
|
reader.onload = (e) => {
|
|
|
|
|
|
const buffer = e.target.result;
|
|
|
|
|
|
let isSafe = false;
|
|
|
|
|
|
|
|
|
|
|
|
// 分支处理:纯文本 vs 二进制
|
|
|
|
|
|
if (expectedMagic === 'TYPE_TEXT') {
|
2025-12-19 11:35:00 +08:00
|
|
|
|
if (this._validateTextContent(buffer, extension)) {
|
2025-12-19 10:25:10 +08:00
|
|
|
|
isSafe = true;
|
|
|
|
|
|
} else {
|
2025-12-19 11:35:00 +08:00
|
|
|
|
// 细化报错信息
|
|
|
|
|
|
if (extension === 'json') {
|
|
|
|
|
|
return reject(`文件异常:不是有效的 JSON 文件`);
|
|
|
|
|
|
}
|
|
|
|
|
|
return reject(`文件异常:.${extension} 包含非法二进制内容或编码错误`);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 【新增】专门针对 CSV 的行数检查
|
|
|
|
|
|
if (extension === 'csv' && this.csvMaxRows > 0) {
|
|
|
|
|
|
const rows = this._countCSVRows(buffer);
|
|
|
|
|
|
// 注意:这里通常把表头也算作 1 行,如果不算表头可以将 limit + 1
|
|
|
|
|
|
if (rows > this.csvMaxRows) {
|
|
|
|
|
|
return reject(`CSV 行数超出限制 (当前 ${rows} 行,最大允许 ${this.csvMaxRows} 行)`);
|
|
|
|
|
|
}
|
2025-12-19 10:25:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// 获取文件头 Hex (读取足够长的字节以覆盖最长的魔数,PNG需8字节)
|
|
|
|
|
|
const fileHeader = this._bufferToHex(buffer.slice(0, 8));
|
|
|
|
|
|
|
2025-12-19 10:37:41 +08:00
|
|
|
|
// 使用 startsWith 匹配
|
2025-12-19 10:25:10 +08:00
|
|
|
|
if (fileHeader.startsWith(expectedMagic)) {
|
|
|
|
|
|
isSafe = true;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
return reject(`文件可能已被篡改 (真实类型与 .${extension} 不符)`);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (isSafe) resolve(true);
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
reader.onerror = () => reject('文件读取失败,无法校验');
|
|
|
|
|
|
|
2025-12-19 10:37:41 +08:00
|
|
|
|
// 读取前 1KB 进行判断
|
2025-12-19 11:35:00 +08:00
|
|
|
|
if (expectedMagic === 'TYPE_TEXT' && extension === 'json') {
|
|
|
|
|
|
// JSON 必须读全量才能 parse,建议限制 JSON 文件大小
|
|
|
|
|
|
reader.readAsArrayBuffer(file);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// 图片/普通文本 读取前 2KB 足够判断头部和编码特征
|
|
|
|
|
|
reader.readAsArrayBuffer(file.slice(0, 2048));
|
|
|
|
|
|
}
|
2025-12-19 10:25:10 +08:00
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 【demo】
|
|
|
|
|
|
// 如果传入了 allowedExtensions,则只使用传入的;否则使用全部 KNOWN_SIGNATURES
|
|
|
|
|
|
// const imageValidator = new FileValidator({
|
|
|
|
|
|
// maxSizeMB: 5,
|
|
|
|
|
|
// allowedExtensions: ['png', 'jpg', 'jpeg'],
|
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
|
|
// imageValidator
|
|
|
|
|
|
// .validate(file)
|
|
|
|
|
|
// .then(() => {
|
2025-12-19 10:37:41 +08:00
|
|
|
|
// statusDiv.textContent = `检测通过: ${file.name}`;
|
2025-12-19 10:25:10 +08:00
|
|
|
|
// statusDiv.style.color = 'green';
|
|
|
|
|
|
// console.log('图片校验通过,开始上传...');
|
|
|
|
|
|
// // upload(file)...
|
|
|
|
|
|
// })
|
|
|
|
|
|
// .catch((err) => {
|
2025-12-19 10:37:41 +08:00
|
|
|
|
// statusDiv.textContent = `检测失败: ${err}`;
|
2025-12-19 10:25:10 +08:00
|
|
|
|
// statusDiv.style.color = 'red';
|
|
|
|
|
|
// });
|