当前季度和当月招聘会详情
This commit is contained in:
@@ -77,6 +77,13 @@
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- doc文件支持 -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-scratchpad</artifactId>
|
||||
<version>4.1.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- yml解析器 -->
|
||||
<dependency>
|
||||
<groupId>org.yaml</groupId>
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
package com.ruoyi.common.utils.file;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.*;
|
||||
|
||||
/**
|
||||
* Doc/Docx 文件转换工具
|
||||
* 将 doc/docx 文件批量转换为 txt 文件
|
||||
*/
|
||||
public class DocConverter {
|
||||
|
||||
/**
|
||||
* 批量转换目录下的 doc/docx 文件为 txt
|
||||
* @param inputDir 输入目录
|
||||
* @param outputDir 输出目录
|
||||
*/
|
||||
public static void convertDir(String inputDir, String outputDir) {
|
||||
File outDir = new File(outputDir);
|
||||
if (!outDir.exists()) {
|
||||
outDir.mkdirs();
|
||||
}
|
||||
|
||||
File dir = new File(inputDir);
|
||||
File[] files = dir.listFiles((d, name) ->
|
||||
name.toLowerCase().endsWith(".doc") || name.toLowerCase().endsWith(".docx"));
|
||||
|
||||
if (files == null || files.length == 0) {
|
||||
System.out.println("No doc/docx files found in: " + inputDir);
|
||||
return;
|
||||
}
|
||||
|
||||
System.out.println("Found " + files.length + " files to convert");
|
||||
|
||||
int success = 0;
|
||||
int fail = 0;
|
||||
for (File file : files) {
|
||||
try {
|
||||
String content;
|
||||
if (file.getName().toLowerCase().endsWith(".docx")) {
|
||||
content = readDocx(file);
|
||||
} else {
|
||||
content = readDoc(file);
|
||||
}
|
||||
|
||||
String outputName = file.getName().replaceAll("\\.(doc|docx)$", ".txt");
|
||||
Path outputPath = Paths.get(outputDir, outputName);
|
||||
Files.write(outputPath, content.getBytes(StandardCharsets.UTF_8));
|
||||
System.out.println("OK: " + file.getName());
|
||||
success++;
|
||||
} catch (Exception e) {
|
||||
System.out.println("FAIL: " + file.getName() + " - " + e.getMessage());
|
||||
fail++;
|
||||
}
|
||||
}
|
||||
System.out.println("Done! Success: " + success + ", Failed: " + fail);
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取 docx 文件内容
|
||||
*/
|
||||
public static String readDocx(File file) throws IOException {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
XWPFDocument doc = new XWPFDocument(fis)) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (XWPFParagraph para : doc.getParagraphs()) {
|
||||
sb.append(para.getText()).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取 doc 文件内容
|
||||
*/
|
||||
public static String readDoc(File file) throws IOException {
|
||||
try (FileInputStream fis = new FileInputStream(file);
|
||||
HWPFDocument doc = new HWPFDocument(fis);
|
||||
WordExtractor extractor = new WordExtractor(doc)) {
|
||||
return extractor.getText();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取 doc 或 docx 文件内容
|
||||
*/
|
||||
public static String readWord(File file) throws IOException {
|
||||
if (file.getName().toLowerCase().endsWith(".docx")) {
|
||||
return readDocx(file);
|
||||
} else {
|
||||
return readDoc(file);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 命令行入口
|
||||
* 用法: java -cp xxx.jar com.ruoyi.common.utils.file.DocConverter <inputDir> [outputDir]
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
convertDir("D:\\new_to_here\\code\\shz-backend\\政策文件1", "D:\\new_to_here\\code\\shz-backend\\政策文件1\\outputDir");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user