769 lines
17 KiB
Markdown
769 lines
17 KiB
Markdown
|
|
# OCR服务 Docker 部署文档
|
|||
|
|
|
|||
|
|
## 项目概述
|
|||
|
|
|
|||
|
|
这是一个基于FastAPI的OCR文字识别服务,支持多种文件格式:
|
|||
|
|
- 图片格式:JPG, PNG, BMP等
|
|||
|
|
- PDF文档
|
|||
|
|
- Excel文件:.xlsx, .xls
|
|||
|
|
- PowerPoint文件:.pptx, .ppt
|
|||
|
|
|
|||
|
|
## 快速开始
|
|||
|
|
|
|||
|
|
### 1. 基础构建和启动
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 构建镜像
|
|||
|
|
docker build -t my-ocr-service:v1.0 .
|
|||
|
|
|
|||
|
|
# 启动容器
|
|||
|
|
docker run -d -p 9000:9000 --name ocr-server my-ocr-service:v1.0
|
|||
|
|
|
|||
|
|
# 或使用docker-compose(推荐)
|
|||
|
|
docker-compose up -d --build
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 2. 验证服务
|
|||
|
|
|
|||
|
|
#### 基础健康检查
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 健康检查
|
|||
|
|
curl http://localhost:9000/health
|
|||
|
|
|
|||
|
|
# 预期返回
|
|||
|
|
{"status":"ok"}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 完整功能测试
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 测试PDF文件OCR(使用项目测试文件)
|
|||
|
|
curl -X POST \
|
|||
|
|
-F "file=@example/test_resume.pdf" \
|
|||
|
|
http://localhost:9000/ocr
|
|||
|
|
|
|||
|
|
# 预期返回格式
|
|||
|
|
{
|
|||
|
|
"code": 200,
|
|||
|
|
"data": "--- Page 1 ---\n识别出的文字内容...",
|
|||
|
|
"cost_time_ms": 1234.56
|
|||
|
|
}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 详细验证脚本
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 创建完整的验证脚本
|
|||
|
|
cat > verify-service.sh << 'EOF'
|
|||
|
|
#!/bin/bash
|
|||
|
|
|
|||
|
|
BASE_URL="http://localhost:9000"
|
|||
|
|
TEST_FILE="example/test_resume.pdf"
|
|||
|
|
|
|||
|
|
echo "=== OCR服务验证测试 ==="
|
|||
|
|
echo "服务地址: $BASE_URL"
|
|||
|
|
echo "测试时间: $(date)"
|
|||
|
|
echo
|
|||
|
|
|
|||
|
|
# 1. 健康检查
|
|||
|
|
echo "1. 健康检查测试..."
|
|||
|
|
HEALTH_RESPONSE=$(curl -s -w "%{http_code}" -o /tmp/health_response.json $BASE_URL/health)
|
|||
|
|
HTTP_CODE=${HEALTH_RESPONSE: -3}
|
|||
|
|
|
|||
|
|
if [ "$HTTP_CODE" = "200" ]; then
|
|||
|
|
echo "✅ 健康检查通过 (HTTP $HTTP_CODE)"
|
|||
|
|
echo " 响应: $(cat /tmp/health_response.json)"
|
|||
|
|
else
|
|||
|
|
echo "❌ 健康检查失败 (HTTP $HTTP_CODE)"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
echo
|
|||
|
|
|
|||
|
|
# 2. 检查测试文件
|
|||
|
|
echo "2. 检查测试文件..."
|
|||
|
|
if [ -f "$TEST_FILE" ]; then
|
|||
|
|
echo "✅ 测试文件存在: $TEST_FILE"
|
|||
|
|
echo " 文件大小: $(ls -lh $TEST_FILE | awk '{print $5}')"
|
|||
|
|
else
|
|||
|
|
echo "❌ 测试文件不存在: $TEST_FILE"
|
|||
|
|
echo " 请确保example/test_resume.pdf文件存在"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
echo
|
|||
|
|
|
|||
|
|
# 3. PDF OCR测试
|
|||
|
|
echo "3. PDF OCR功能测试..."
|
|||
|
|
START_TIME=$(date +%s.%N)
|
|||
|
|
|
|||
|
|
OCR_RESPONSE=$(curl -s -w "%{http_code}" \
|
|||
|
|
-X POST \
|
|||
|
|
-F "file=@$TEST_FILE" \
|
|||
|
|
-o /tmp/ocr_response.json \
|
|||
|
|
$BASE_URL/ocr)
|
|||
|
|
|
|||
|
|
END_TIME=$(date +%s.%N)
|
|||
|
|
HTTP_CODE=${OCR_RESPONSE: -3}
|
|||
|
|
DURATION=$(echo "$END_TIME - $START_TIME" | bc)
|
|||
|
|
|
|||
|
|
if [ "$HTTP_CODE" = "200" ]; then
|
|||
|
|
echo "✅ OCR测试通过 (HTTP $HTTP_CODE)"
|
|||
|
|
echo " 请求耗时: ${DURATION}s"
|
|||
|
|
|
|||
|
|
# 解析响应
|
|||
|
|
CODE=$(cat /tmp/ocr_response.json | python3 -c "import sys, json; print(json.load(sys.stdin).get('code', 'N/A'))")
|
|||
|
|
COST_TIME=$(cat /tmp/ocr_response.json | python3 -c "import sys, json; print(json.load(sys.stdin).get('cost_time_ms', 'N/A'))")
|
|||
|
|
DATA_LENGTH=$(cat /tmp/ocr_response.json | python3 -c "import sys, json; print(len(json.load(sys.stdin).get('data', '')))")
|
|||
|
|
|
|||
|
|
echo " 响应码: $CODE"
|
|||
|
|
echo " 服务耗时: ${COST_TIME}ms"
|
|||
|
|
echo " 识别文本长度: ${DATA_LENGTH}字符"
|
|||
|
|
|
|||
|
|
if [ "$CODE" = "200" ] && [ "$DATA_LENGTH" -gt "0" ]; then
|
|||
|
|
echo "✅ OCR识别成功,提取到文本内容"
|
|||
|
|
else
|
|||
|
|
echo "⚠️ OCR响应异常,请检查日志"
|
|||
|
|
fi
|
|||
|
|
else
|
|||
|
|
echo "❌ OCR测试失败 (HTTP $HTTP_CODE)"
|
|||
|
|
echo " 响应内容: $(cat /tmp/ocr_response.json)"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
echo
|
|||
|
|
|
|||
|
|
# 4. URL OCR测试(可选)
|
|||
|
|
echo "4. URL OCR功能测试(可选)..."
|
|||
|
|
URL_TEST="https://httpbin.org/status/200"
|
|||
|
|
echo " 跳过URL测试,如需测试请提供有效的文档URL"
|
|||
|
|
echo
|
|||
|
|
|
|||
|
|
# 5. 性能基准测试
|
|||
|
|
echo "5. 性能基准测试..."
|
|||
|
|
echo " 执行3次OCR测试,计算平均耗时..."
|
|||
|
|
|
|||
|
|
TOTAL_TIME=0
|
|||
|
|
SUCCESS_COUNT=0
|
|||
|
|
|
|||
|
|
for i in {1..3}; do
|
|||
|
|
echo -n " 测试 $i/3: "
|
|||
|
|
START_TIME=$(date +%s.%N)
|
|||
|
|
|
|||
|
|
RESPONSE=$(curl -s -w "%{http_code}" \
|
|||
|
|
-X POST \
|
|||
|
|
-F "file=@$TEST_FILE" \
|
|||
|
|
-o /tmp/perf_test_$i.json \
|
|||
|
|
$BASE_URL/ocr)
|
|||
|
|
|
|||
|
|
END_TIME=$(date +%s.%N)
|
|||
|
|
HTTP_CODE=${RESPONSE: -3}
|
|||
|
|
DURATION=$(echo "$END_TIME - $START_TIME" | bc)
|
|||
|
|
|
|||
|
|
if [ "$HTTP_CODE" = "200" ]; then
|
|||
|
|
echo "${DURATION}s ✅"
|
|||
|
|
TOTAL_TIME=$(echo "$TOTAL_TIME + $DURATION" | bc)
|
|||
|
|
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
|
|||
|
|
else
|
|||
|
|
echo "失败 (HTTP $HTTP_CODE) ❌"
|
|||
|
|
fi
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
if [ "$SUCCESS_COUNT" -gt "0" ]; then
|
|||
|
|
AVG_TIME=$(echo "scale=3; $TOTAL_TIME / $SUCCESS_COUNT" | bc)
|
|||
|
|
echo " 平均耗时: ${AVG_TIME}s"
|
|||
|
|
echo " 成功率: $SUCCESS_COUNT/3"
|
|||
|
|
fi
|
|||
|
|
echo
|
|||
|
|
|
|||
|
|
# 清理临时文件
|
|||
|
|
rm -f /tmp/health_response.json /tmp/ocr_response.json /tmp/perf_test_*.json
|
|||
|
|
|
|||
|
|
echo "=== 验证测试完成 ==="
|
|||
|
|
echo "如果所有测试都通过,服务已正常运行!"
|
|||
|
|
EOF
|
|||
|
|
|
|||
|
|
chmod +x verify-service.sh
|
|||
|
|
./verify-service.sh
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 快速验证命令
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 一键验证所有功能
|
|||
|
|
curl -s http://localhost:9000/health && \
|
|||
|
|
curl -X POST -F "file=@example/test_resume.pdf" http://localhost:9000/ocr | \
|
|||
|
|
python3 -m json.tool
|
|||
|
|
|
|||
|
|
# 简化版验证
|
|||
|
|
echo "健康检查:" && curl -s http://localhost:9000/health && echo
|
|||
|
|
echo "OCR测试:" && curl -X POST -F "file=@example/test_resume.pdf" http://localhost:9000/ocr
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 其他文件格式测试
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 如果有其他测试文件,可以测试更多格式
|
|||
|
|
|
|||
|
|
# 测试图片文件(需要准备测试图片)
|
|||
|
|
curl -X POST -F "file=@test_image.jpg" http://localhost:9000/ocr
|
|||
|
|
|
|||
|
|
# 测试Excel文件(需要准备测试文件)
|
|||
|
|
curl -X POST -F "file=@test_document.xlsx" http://localhost:9000/ocr
|
|||
|
|
|
|||
|
|
# 测试PowerPoint文件(需要准备测试文件)
|
|||
|
|
curl -X POST -F "file=@test_presentation.pptx" http://localhost:9000/ocr
|
|||
|
|
|
|||
|
|
# 测试URL识别
|
|||
|
|
curl -X POST \
|
|||
|
|
-H "Content-Type: application/json" \
|
|||
|
|
-d '{"url": "https://example.com/document.pdf"}' \
|
|||
|
|
http://localhost:9000/ocr/url
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## 详细部署指南
|
|||
|
|
|
|||
|
|
### 环境要求
|
|||
|
|
|
|||
|
|
- Docker 20.10+
|
|||
|
|
- Docker Compose 2.0+
|
|||
|
|
- 系统内存:建议4GB以上
|
|||
|
|
- 磁盘空间:至少2GB可用空间
|
|||
|
|
|
|||
|
|
### 构建选项
|
|||
|
|
|
|||
|
|
#### 选项1:Docker Compose(推荐)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 停止旧服务
|
|||
|
|
docker-compose down
|
|||
|
|
|
|||
|
|
# 构建并启动
|
|||
|
|
docker-compose up -d --build
|
|||
|
|
|
|||
|
|
# 查看日志
|
|||
|
|
docker-compose logs -f ocr-server
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 选项2:手动构建
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 构建镜像
|
|||
|
|
docker build -t my-ocr-service:v1.0 .
|
|||
|
|
|
|||
|
|
# 启动容器
|
|||
|
|
docker run -d \
|
|||
|
|
--name ocr-server \
|
|||
|
|
-p 9000:9000 \
|
|||
|
|
--restart always \
|
|||
|
|
-e TZ=Asia/Shanghai \
|
|||
|
|
my-ocr-service:v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 多架构支持
|
|||
|
|
|
|||
|
|
#### AMD64平台(x86_64)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 使用默认Dockerfile
|
|||
|
|
docker build -t my-ocr-service:v1.0-amd64 .
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### ARM64平台(Apple Silicon, ARM服务器)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 使用ARM优化版Dockerfile
|
|||
|
|
docker build -f Dockerfile.arm -t my-ocr-service:v1.0-arm64 .
|
|||
|
|
|
|||
|
|
# 或使用ARM版docker-compose
|
|||
|
|
docker-compose -f docker-compose.arm.yml up -d --build
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 多架构构建(跨平台)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 启用buildx
|
|||
|
|
docker buildx create --name multiarch --use
|
|||
|
|
|
|||
|
|
# 构建多架构镜像
|
|||
|
|
docker buildx build \
|
|||
|
|
--platform linux/amd64,linux/arm64 \
|
|||
|
|
-t my-ocr-service:v1.0 \
|
|||
|
|
--push .
|
|||
|
|
|
|||
|
|
# 或使用提供的脚本
|
|||
|
|
chmod +x build-multiarch.sh
|
|||
|
|
./build-multiarch.sh
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## 配置说明
|
|||
|
|
|
|||
|
|
### 环境变量
|
|||
|
|
|
|||
|
|
| 变量名 | 默认值 | 说明 |
|
|||
|
|
|--------|--------|------|
|
|||
|
|
| TZ | Asia/Shanghai | 时区设置 |
|
|||
|
|
| OMP_NUM_THREADS | 4 | OpenMP线程数(ARM平台) |
|
|||
|
|
| OPENBLAS_NUM_THREADS | 4 | OpenBLAS线程数(ARM平台) |
|
|||
|
|
|
|||
|
|
### 端口配置
|
|||
|
|
|
|||
|
|
- 服务端口:9000
|
|||
|
|
- 健康检查:GET /health
|
|||
|
|
- OCR接口:POST /ocr
|
|||
|
|
- URL识别:POST /ocr/url
|
|||
|
|
|
|||
|
|
### 资源限制
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
# docker-compose.yml中的资源配置
|
|||
|
|
deploy:
|
|||
|
|
resources:
|
|||
|
|
limits:
|
|||
|
|
memory: 2G
|
|||
|
|
cpus: '2.0'
|
|||
|
|
reservations:
|
|||
|
|
memory: 1G
|
|||
|
|
cpus: '1.0'
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## 镜像管理
|
|||
|
|
|
|||
|
|
### 架构检测和选择
|
|||
|
|
|
|||
|
|
#### 检测当前系统架构
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 查看系统架构
|
|||
|
|
uname -m
|
|||
|
|
# 输出示例:
|
|||
|
|
# x86_64 -> AMD64架构
|
|||
|
|
# aarch64 -> ARM64架构
|
|||
|
|
# arm64 -> ARM64架构(macOS)
|
|||
|
|
|
|||
|
|
# 查看Docker支持的架构
|
|||
|
|
docker version --format '{{.Server.Arch}}'
|
|||
|
|
|
|||
|
|
# 检查镜像支持的架构
|
|||
|
|
docker buildx imagetools inspect my-ocr-service:v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 自动选择合适的镜像
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 创建架构检测脚本
|
|||
|
|
cat > select-image.sh << 'EOF'
|
|||
|
|
#!/bin/bash
|
|||
|
|
ARCH=$(uname -m)
|
|||
|
|
IMAGE_NAME="my-ocr-service"
|
|||
|
|
VERSION="v1.0"
|
|||
|
|
|
|||
|
|
case $ARCH in
|
|||
|
|
x86_64)
|
|||
|
|
DOCKER_ARCH="amd64"
|
|||
|
|
;;
|
|||
|
|
aarch64|arm64)
|
|||
|
|
DOCKER_ARCH="arm64"
|
|||
|
|
;;
|
|||
|
|
*)
|
|||
|
|
echo "不支持的架构: $ARCH"
|
|||
|
|
exit 1
|
|||
|
|
;;
|
|||
|
|
esac
|
|||
|
|
|
|||
|
|
echo "检测到架构: $ARCH -> Docker架构: $DOCKER_ARCH"
|
|||
|
|
echo "使用镜像: ${IMAGE_NAME}:${VERSION}-${DOCKER_ARCH}"
|
|||
|
|
|
|||
|
|
# 检查镜像是否存在
|
|||
|
|
if docker image inspect ${IMAGE_NAME}:${VERSION}-${DOCKER_ARCH} >/dev/null 2>&1; then
|
|||
|
|
echo "镜像已存在,启动容器..."
|
|||
|
|
docker run -d -p 9000:9000 --name ocr-server ${IMAGE_NAME}:${VERSION}-${DOCKER_ARCH}
|
|||
|
|
else
|
|||
|
|
echo "镜像不存在,请先导入对应架构的镜像文件"
|
|||
|
|
echo "AMD64: docker load -i ocr-server-${VERSION}-amd64.tar"
|
|||
|
|
echo "ARM64: docker load -i ocr-server-${VERSION}-arm64.tar"
|
|||
|
|
fi
|
|||
|
|
EOF
|
|||
|
|
|
|||
|
|
chmod +x select-image.sh
|
|||
|
|
./select-image.sh
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 导出镜像
|
|||
|
|
|
|||
|
|
#### AMD64架构导出
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 导出AMD64镜像为tar文件
|
|||
|
|
docker save -o ocr-server-v1.0-amd64.tar my-ocr-service:v1.0-amd64
|
|||
|
|
|
|||
|
|
# 压缩导出AMD64镜像
|
|||
|
|
docker save my-ocr-service:v1.0-amd64 | gzip > ocr-server-v1.0-amd64.tar.gz
|
|||
|
|
|
|||
|
|
# 查看导出文件大小
|
|||
|
|
ls -lh ocr-server-v1.0-amd64.tar*
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### ARM64架构导出
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 导出ARM64镜像为tar文件
|
|||
|
|
docker save -o ocr-server-v1.0-arm64.tar my-ocr-service:v1.0-arm64
|
|||
|
|
|
|||
|
|
# 压缩导出ARM64镜像
|
|||
|
|
docker save my-ocr-service:v1.0-arm64 | gzip > ocr-server-v1.0-arm64.tar.gz
|
|||
|
|
|
|||
|
|
# 查看导出文件大小
|
|||
|
|
ls -lh ocr-server-v1.0-arm64.tar*
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 多架构镜像导出
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 导出包含多架构的镜像
|
|||
|
|
docker save -o ocr-server-v1.0-multiarch.tar my-ocr-service:v1.0
|
|||
|
|
|
|||
|
|
# 批量导出所有架构版本
|
|||
|
|
docker save -o ocr-server-v1.0-all.tar \
|
|||
|
|
my-ocr-service:v1.0-amd64 \
|
|||
|
|
my-ocr-service:v1.0-arm64
|
|||
|
|
|
|||
|
|
# 压缩多架构镜像
|
|||
|
|
docker save my-ocr-service:v1.0-amd64 my-ocr-service:v1.0-arm64 | \
|
|||
|
|
gzip > ocr-server-v1.0-multiarch.tar.gz
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 自动化导出脚本
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 创建导出脚本
|
|||
|
|
cat > export-images.sh << 'EOF'
|
|||
|
|
#!/bin/bash
|
|||
|
|
VERSION=${1:-v1.0}
|
|||
|
|
IMAGE_NAME="my-ocr-service"
|
|||
|
|
|
|||
|
|
echo "导出镜像版本: $VERSION"
|
|||
|
|
|
|||
|
|
# 检查镜像是否存在
|
|||
|
|
if docker image inspect ${IMAGE_NAME}:${VERSION}-amd64 >/dev/null 2>&1; then
|
|||
|
|
echo "导出AMD64镜像..."
|
|||
|
|
docker save -o ${IMAGE_NAME}-${VERSION}-amd64.tar ${IMAGE_NAME}:${VERSION}-amd64
|
|||
|
|
gzip ${IMAGE_NAME}-${VERSION}-amd64.tar
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if docker image inspect ${IMAGE_NAME}:${VERSION}-arm64 >/dev/null 2>&1; then
|
|||
|
|
echo "导出ARM64镜像..."
|
|||
|
|
docker save -o ${IMAGE_NAME}-${VERSION}-arm64.tar ${IMAGE_NAME}:${VERSION}-arm64
|
|||
|
|
gzip ${IMAGE_NAME}-${VERSION}-arm64.tar
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
echo "导出完成!"
|
|||
|
|
ls -lh ${IMAGE_NAME}-${VERSION}-*.tar.gz
|
|||
|
|
EOF
|
|||
|
|
|
|||
|
|
chmod +x export-images.sh
|
|||
|
|
./export-images.sh v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 导入镜像
|
|||
|
|
|
|||
|
|
#### AMD64架构(x86_64)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 从tar文件导入AMD64镜像
|
|||
|
|
docker load -i ocr-server-v1.0-amd64.tar
|
|||
|
|
|
|||
|
|
# 从压缩文件导入AMD64镜像
|
|||
|
|
gunzip -c ocr-server-v1.0-amd64.tar.gz | docker load
|
|||
|
|
|
|||
|
|
# 验证导入的AMD64镜像
|
|||
|
|
docker images | grep ocr-server
|
|||
|
|
docker inspect my-ocr-service:v1.0-amd64 | grep Architecture
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### ARM64架构(Apple Silicon, ARM服务器)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 从tar文件导入ARM64镜像
|
|||
|
|
docker load -i ocr-server-v1.0-arm64.tar
|
|||
|
|
|
|||
|
|
# 从压缩文件导入ARM64镜像
|
|||
|
|
gunzip -c ocr-server-v1.0-arm64.tar.gz | docker load
|
|||
|
|
|
|||
|
|
# 验证导入的ARM64镜像
|
|||
|
|
docker images | grep ocr-server
|
|||
|
|
docker inspect my-ocr-service:v1.0-arm64 | grep Architecture
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 通用导入(自动检测架构)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 导入通用镜像(包含多架构)
|
|||
|
|
docker load -i ocr-server-v1.0.tar
|
|||
|
|
|
|||
|
|
# 查看支持的架构
|
|||
|
|
docker buildx imagetools inspect my-ocr-service:v1.0
|
|||
|
|
|
|||
|
|
# Docker会自动选择匹配当前系统的架构
|
|||
|
|
docker run --rm my-ocr-service:v1.0 uname -m
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 跨架构导入和使用
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 在AMD64系统上导入ARM64镜像(需要启用实验性功能)
|
|||
|
|
export DOCKER_CLI_EXPERIMENTAL=enabled
|
|||
|
|
docker load -i ocr-server-v1.0-arm64.tar
|
|||
|
|
|
|||
|
|
# 在ARM64系统上导入AMD64镜像
|
|||
|
|
docker load -i ocr-server-v1.0-amd64.tar
|
|||
|
|
|
|||
|
|
# 强制运行非原生架构镜像(性能会下降)
|
|||
|
|
docker run --platform linux/arm64 my-ocr-service:v1.0-arm64
|
|||
|
|
docker run --platform linux/amd64 my-ocr-service:v1.0-amd64
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 推送到仓库
|
|||
|
|
|
|||
|
|
#### 单架构推送
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 推送AMD64镜像
|
|||
|
|
docker tag my-ocr-service:v1.0-amd64 your-registry.com/ocr-service:v1.0-amd64
|
|||
|
|
docker push your-registry.com/ocr-service:v1.0-amd64
|
|||
|
|
|
|||
|
|
# 推送ARM64镜像
|
|||
|
|
docker tag my-ocr-service:v1.0-arm64 your-registry.com/ocr-service:v1.0-arm64
|
|||
|
|
docker push your-registry.com/ocr-service:v1.0-arm64
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 多架构推送(推荐)
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 创建并推送多架构manifest
|
|||
|
|
docker buildx create --name multiarch --use
|
|||
|
|
|
|||
|
|
# 构建并推送多架构镜像
|
|||
|
|
docker buildx build \
|
|||
|
|
--platform linux/amd64,linux/arm64 \
|
|||
|
|
-t your-registry.com/ocr-service:v1.0 \
|
|||
|
|
--push .
|
|||
|
|
|
|||
|
|
# 验证多架构镜像
|
|||
|
|
docker buildx imagetools inspect your-registry.com/ocr-service:v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
#### 从本地镜像创建多架构推送
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 推送各架构镜像
|
|||
|
|
docker push your-registry.com/ocr-service:v1.0-amd64
|
|||
|
|
docker push your-registry.com/ocr-service:v1.0-arm64
|
|||
|
|
|
|||
|
|
# 创建多架构manifest
|
|||
|
|
docker manifest create your-registry.com/ocr-service:v1.0 \
|
|||
|
|
your-registry.com/ocr-service:v1.0-amd64 \
|
|||
|
|
your-registry.com/ocr-service:v1.0-arm64
|
|||
|
|
|
|||
|
|
# 设置架构信息
|
|||
|
|
docker manifest annotate your-registry.com/ocr-service:v1.0 \
|
|||
|
|
your-registry.com/ocr-service:v1.0-amd64 --arch amd64
|
|||
|
|
|
|||
|
|
docker manifest annotate your-registry.com/ocr-service:v1.0 \
|
|||
|
|
your-registry.com/ocr-service:v1.0-arm64 --arch arm64
|
|||
|
|
|
|||
|
|
# 推送manifest
|
|||
|
|
docker manifest push your-registry.com/ocr-service:v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## 生产环境部署
|
|||
|
|
|
|||
|
|
### 1. 服务器部署
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 上传镜像到服务器
|
|||
|
|
scp ocr-server-v1.0.tar root@your-server:/opt/docker-apps/
|
|||
|
|
|
|||
|
|
# 在服务器上导入
|
|||
|
|
ssh root@your-server
|
|||
|
|
cd /opt/docker-apps/
|
|||
|
|
docker load -i ocr-server-v1.0.tar
|
|||
|
|
|
|||
|
|
# 启动服务
|
|||
|
|
docker-compose up -d --force-recreate
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 2. 反向代理配置(Nginx)
|
|||
|
|
|
|||
|
|
```nginx
|
|||
|
|
server {
|
|||
|
|
listen 80;
|
|||
|
|
server_name ocr.yourdomain.com;
|
|||
|
|
|
|||
|
|
location / {
|
|||
|
|
proxy_pass http://localhost:9000;
|
|||
|
|
proxy_set_header Host $host;
|
|||
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|||
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|||
|
|
|
|||
|
|
# 文件上传大小限制
|
|||
|
|
client_max_body_size 50M;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 3. 监控和日志
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 查看容器状态
|
|||
|
|
docker ps
|
|||
|
|
docker stats ocr-server
|
|||
|
|
|
|||
|
|
# 查看日志
|
|||
|
|
docker logs -f ocr-server
|
|||
|
|
docker-compose logs -f
|
|||
|
|
|
|||
|
|
# 健康检查
|
|||
|
|
curl -f http://localhost:9000/health
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## 故障排除
|
|||
|
|
|
|||
|
|
### 常见问题
|
|||
|
|
|
|||
|
|
1. **内存不足**
|
|||
|
|
```bash
|
|||
|
|
# 增加内存限制
|
|||
|
|
docker run --memory=4g my-ocr-service:v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
2. **端口冲突**
|
|||
|
|
```bash
|
|||
|
|
# 检查端口占用
|
|||
|
|
netstat -tlnp | grep 9000
|
|||
|
|
# 更换端口
|
|||
|
|
docker run -p 9001:9000 my-ocr-service:v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
3. **ARM平台性能问题**
|
|||
|
|
```bash
|
|||
|
|
# 使用ARM优化版本
|
|||
|
|
docker-compose -f docker-compose.arm.yml up -d
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
4. **依赖库问题**
|
|||
|
|
```bash
|
|||
|
|
# 重新构建镜像
|
|||
|
|
docker build --no-cache -t my-ocr-service:v1.0 .
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 调试命令
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 进入容器调试
|
|||
|
|
docker exec -it ocr-server bash
|
|||
|
|
|
|||
|
|
# 查看容器资源使用
|
|||
|
|
docker stats ocr-server
|
|||
|
|
|
|||
|
|
# 查看镜像层信息
|
|||
|
|
docker history my-ocr-service:v1.0
|
|||
|
|
|
|||
|
|
# 检查容器健康状态
|
|||
|
|
docker inspect ocr-server | grep Health -A 10
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## API使用示例
|
|||
|
|
|
|||
|
|
### 文件上传识别
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 上传PDF文件
|
|||
|
|
curl -X POST \
|
|||
|
|
-F "file=@document.pdf" \
|
|||
|
|
http://localhost:9000/ocr
|
|||
|
|
|
|||
|
|
# 上传图片
|
|||
|
|
curl -X POST \
|
|||
|
|
-F "file=@image.jpg" \
|
|||
|
|
http://localhost:9000/ocr
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### URL识别
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 识别网络文件
|
|||
|
|
curl -X POST \
|
|||
|
|
-H "Content-Type: application/json" \
|
|||
|
|
-d '{"url": "https://example.com/document.pdf"}' \
|
|||
|
|
http://localhost:9000/ocr/url
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## 性能优化
|
|||
|
|
|
|||
|
|
### 1. 镜像优化
|
|||
|
|
|
|||
|
|
- 使用多阶段构建减小镜像大小
|
|||
|
|
- 清理不必要的依赖和缓存
|
|||
|
|
- 使用.dockerignore排除无关文件
|
|||
|
|
|
|||
|
|
### 2. 运行时优化
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 设置合适的线程数
|
|||
|
|
docker run -e OMP_NUM_THREADS=4 my-ocr-service:v1.0
|
|||
|
|
|
|||
|
|
# 使用内存映射
|
|||
|
|
docker run --shm-size=1g my-ocr-service:v1.0
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 3. 集群部署
|
|||
|
|
|
|||
|
|
```yaml
|
|||
|
|
# docker-compose.yml 扩展配置
|
|||
|
|
services:
|
|||
|
|
ocr-server:
|
|||
|
|
deploy:
|
|||
|
|
replicas: 3
|
|||
|
|
update_config:
|
|||
|
|
parallelism: 1
|
|||
|
|
delay: 10s
|
|||
|
|
restart_policy:
|
|||
|
|
condition: on-failure
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## 版本管理
|
|||
|
|
|
|||
|
|
### 版本标记策略
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 开发版本
|
|||
|
|
docker tag my-ocr-service:latest my-ocr-service:dev
|
|||
|
|
|
|||
|
|
# 测试版本
|
|||
|
|
docker tag my-ocr-service:latest my-ocr-service:test
|
|||
|
|
|
|||
|
|
# 生产版本
|
|||
|
|
docker tag my-ocr-service:latest my-ocr-service:v1.0
|
|||
|
|
docker tag my-ocr-service:latest my-ocr-service:stable
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
### 回滚策略
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
# 保存当前版本
|
|||
|
|
docker tag my-ocr-service:latest my-ocr-service:backup-$(date +%Y%m%d)
|
|||
|
|
|
|||
|
|
# 回滚到上一版本
|
|||
|
|
docker-compose down
|
|||
|
|
docker tag my-ocr-service:v0.9 my-ocr-service:latest
|
|||
|
|
docker-compose up -d
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
---
|
|||
|
|
|
|||
|
|
## 联系支持
|
|||
|
|
|
|||
|
|
如遇到部署问题,请提供以下信息:
|
|||
|
|
- 操作系统和架构
|
|||
|
|
- Docker版本
|
|||
|
|
- 错误日志
|
|||
|
|
- 部署配置文件
|