690601:1929 ADR-032-232 #08
This commit is contained in:
@@ -8,6 +8,8 @@
|
||||
// - 2026-05-30: เพิ่ม VRAM insufficiency guard สำหรับ Typhoon OCR engine (T016a, ADR-032)
|
||||
// - 2026-05-30: ปรับปรุงสำหรับ Dynamic OCR Engine selection, Caching, และ Graceful Fallback (T013, T014, T016, T022, T023, US1)
|
||||
// - 2026-06-01: ปรับปรุง remapPath ให้รองรับ Windows absolute และ relative path ได้แม่นยำ 100%
|
||||
// - 2026-06-01: เปลี่ยน processWithTesseract/processWithTyphoon ให้ส่ง file content ผ่าน multipart
|
||||
// ไปยัง /ocr-upload แทนการส่ง path (แก้ปัญหา Docker WSL2 mount ไม่ได้)
|
||||
|
||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
@@ -16,6 +18,7 @@ import Redis from 'ioredis';
|
||||
import { InjectRepository } from '@nestjs/typeorm';
|
||||
import { Repository, EntityManager } from 'typeorm';
|
||||
import axios from 'axios';
|
||||
import * as fs from 'fs';
|
||||
import {
|
||||
OcrEngineConfiguration,
|
||||
OcrEngineType,
|
||||
@@ -96,8 +99,6 @@ export class OcrService {
|
||||
private readonly logger = new Logger(OcrService.name);
|
||||
private readonly threshold: number;
|
||||
private readonly ocrApiUrl: string;
|
||||
private readonly localUploadBase: string;
|
||||
private readonly sidecarUploadBase: string;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
@@ -114,13 +115,6 @@ export class OcrService {
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
this.localUploadBase = this.configService
|
||||
.get<string>('UPLOAD_PERMANENT_DIR', '/app/uploads/permanent')
|
||||
.replace(/\/permanent$/, '');
|
||||
this.sidecarUploadBase = this.configService.get<string>(
|
||||
'OCR_SIDECAR_UPLOAD_BASE',
|
||||
'/mnt/uploads'
|
||||
);
|
||||
}
|
||||
|
||||
/** ดึงรายการ OCR Engines ทั้งหมด พร้อมตรวจสอบตัวที่กำลัง Active */
|
||||
@@ -198,57 +192,6 @@ export class OcrService {
|
||||
}
|
||||
}
|
||||
|
||||
/** แปลง local upload path เป็น path ที่ sidecar เห็นผ่าน CIFS/Windows bind mount */
|
||||
private remapPath(localPath: string): string {
|
||||
if (!localPath) return localPath;
|
||||
|
||||
// 1. แปลง Backslash (\) ทั้งหมดให้เป็น Forward slash (/) และรวม slash ที่ซ้ำซ้อน
|
||||
const normalizedPath = localPath.replace(/\\/g, '/').replace(/\/+/g, '/');
|
||||
const sidecarBase = this.sidecarUploadBase.replace(/\/+$/, '');
|
||||
|
||||
// 2. สกัดเอาส่วนของ path ที่อยู่หลัง /uploads/
|
||||
const uploadsMatch = normalizedPath.match(/\/uploads\/(.+)$/i);
|
||||
if (uploadsMatch && uploadsMatch[1]) {
|
||||
const relativePart = uploadsMatch[1].replace(/^\/+/, '');
|
||||
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||
this.logger.debug(
|
||||
`Mapped Windows path "${localPath}" to Sidecar path "${mappedPath}"`
|
||||
);
|
||||
return mappedPath;
|
||||
}
|
||||
|
||||
// 3. กรณี Relative path ที่ขึ้นต้นด้วย uploads/ เช่น "uploads/temp/xxx.pdf"
|
||||
if (normalizedPath.startsWith('uploads/')) {
|
||||
const relativePart = normalizedPath.substring(8).replace(/^\/+/, '');
|
||||
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||
this.logger.debug(
|
||||
`Mapped relative path "${localPath}" to "${mappedPath}"`
|
||||
);
|
||||
return mappedPath;
|
||||
}
|
||||
|
||||
// 4. กรณีสำรอง: ถ้าเริ่มด้วย localUploadBase
|
||||
const normalizedLocalBase = this.localUploadBase
|
||||
.replace(/\\/g, '/')
|
||||
.replace(/\/+/g, '/');
|
||||
if (normalizedLocalBase && normalizedPath.includes(normalizedLocalBase)) {
|
||||
const relativePart = normalizedPath
|
||||
.substring(
|
||||
normalizedPath.indexOf(normalizedLocalBase) +
|
||||
normalizedLocalBase.length
|
||||
)
|
||||
.replace(/^\/+/, '');
|
||||
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||
this.logger.debug(
|
||||
`Mapped fallback path "${localPath}" to "${mappedPath}"`
|
||||
);
|
||||
return mappedPath;
|
||||
}
|
||||
|
||||
return normalizedPath;
|
||||
}
|
||||
|
||||
/** ตรวจสอบสุขภาพและ latency ของ OCR sidecar (Tesseract) ผ่าน GET /health */
|
||||
async checkHealth(): Promise<OcrHealthResult> {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
@@ -295,26 +238,28 @@ export class OcrService {
|
||||
}
|
||||
}
|
||||
|
||||
/** ประมวลผลผ่าน Tesseract OCR */
|
||||
/** ประมวลผลผ่าน Tesseract OCR โดยส่ง file content ผ่าน multipart */
|
||||
private async processWithTesseract(
|
||||
input: OcrDetectionInput
|
||||
): Promise<OcrDetectionResult> {
|
||||
const startTime = Date.now();
|
||||
const sidecarPath = this.remapPath(input.pdfPath!);
|
||||
|
||||
try {
|
||||
this.logger.debug(
|
||||
`Tesseract OCR processing: ${input.pdfPath} → ${sidecarPath}`
|
||||
this.logger.debug(`Tesseract OCR processing: ${input.pdfPath}`);
|
||||
const fileBuffer = fs.readFileSync(input.pdfPath!);
|
||||
const form = new FormData();
|
||||
form.append(
|
||||
'file',
|
||||
new Blob([fileBuffer], { type: 'application/pdf' }),
|
||||
'upload.pdf'
|
||||
);
|
||||
form.append('engine', 'auto');
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{ pdfPath: sidecarPath },
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
form,
|
||||
{ timeout: 90000 }
|
||||
);
|
||||
|
||||
const text = response.data.text ?? '';
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
@@ -324,26 +269,9 @@ export class OcrService {
|
||||
processingTimeMs: durationMs,
|
||||
cacheHit: false,
|
||||
});
|
||||
|
||||
return {
|
||||
text,
|
||||
ocrUsed: true,
|
||||
};
|
||||
return { text, ocrUsed: true };
|
||||
} catch (err: unknown) {
|
||||
const durationMs = Date.now() - startTime;
|
||||
// ดึง axios response body detail ออกมาด้วย (เช่น ไม่พบไฟล์: /mnt/uploads/...)
|
||||
const axiosDetail =
|
||||
err !== null &&
|
||||
typeof err === 'object' &&
|
||||
'response' in err &&
|
||||
err.response !== null &&
|
||||
typeof err.response === 'object' &&
|
||||
'data' in err.response &&
|
||||
err.response.data !== null &&
|
||||
typeof err.response.data === 'object' &&
|
||||
'detail' in err.response.data
|
||||
? String((err.response.data as { detail: unknown }).detail)
|
||||
: null;
|
||||
const cause =
|
||||
err instanceof AggregateError && err.errors?.length
|
||||
? err.errors
|
||||
@@ -352,10 +280,6 @@ export class OcrService {
|
||||
: err instanceof Error
|
||||
? err.message
|
||||
: String(err);
|
||||
const fullCause = axiosDetail
|
||||
? `${cause} — sidecar detail: ${axiosDetail} (sidecarPath: ${sidecarPath})`
|
||||
: `${cause} (sidecarPath: ${sidecarPath})`;
|
||||
|
||||
await this.writeAuditLog({
|
||||
documentPublicId: input.documentPublicId,
|
||||
aiModel: 'tesseract',
|
||||
@@ -363,11 +287,10 @@ export class OcrService {
|
||||
modelType: 'tesseract',
|
||||
status: AiAuditStatus.FAILED,
|
||||
processingTimeMs: durationMs,
|
||||
errorMessage: fullCause,
|
||||
errorMessage: cause,
|
||||
cacheHit: false,
|
||||
});
|
||||
|
||||
throw new Error(`Tesseract OCR Sidecar failed: ${fullCause}`);
|
||||
throw new Error(`Tesseract OCR Sidecar failed: ${cause}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -376,8 +299,6 @@ export class OcrService {
|
||||
input: OcrDetectionInput
|
||||
): Promise<OcrDetectionResult> {
|
||||
const startTime = Date.now();
|
||||
const sidecarPath = this.remapPath(input.pdfPath!);
|
||||
|
||||
try {
|
||||
// 1. ตรวจสอบ VRAM insufficiency guard
|
||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||
@@ -390,15 +311,18 @@ export class OcrService {
|
||||
return this.processWithTesseract(input);
|
||||
}
|
||||
|
||||
this.logger.debug(
|
||||
`Typhoon OCR processing: ${input.pdfPath} → ${sidecarPath}`
|
||||
this.logger.debug(`Typhoon OCR processing: ${input.pdfPath}`);
|
||||
const fileBuffer = fs.readFileSync(input.pdfPath!);
|
||||
const form = new FormData();
|
||||
form.append(
|
||||
'file',
|
||||
new Blob([fileBuffer], { type: 'application/pdf' }),
|
||||
'upload.pdf'
|
||||
);
|
||||
form.append('engine', 'typhoon-ocr-3b');
|
||||
const response = await axios.post<OcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{
|
||||
pdfPath: sidecarPath,
|
||||
engine: 'typhoon-ocr-3b',
|
||||
},
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
form,
|
||||
{ timeout: 120000 }
|
||||
);
|
||||
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
// File: src/modules/ai/services/sandbox-ocr-engine.service.ts
|
||||
// Change Log
|
||||
// - 2026-05-30: แยก SandboxOcrEngineService ออกจาก OcrService เพื่อรองรับการเลือก Typhoon OCR เฉพาะ sandbox โดยไม่กระทบ core OCR flow
|
||||
// - 2026-06-01: ปรับปรุง remapPath ให้รองรับ Windows absolute และ relative path ได้แม่นยำ 100%
|
||||
// - 2026-06-01: เปลี่ยนจาก remapPath + pdfPath ไปเป็น multipart file upload ไปยัง /ocr-upload (แก้ปัญหา Docker WSL2 mount)
|
||||
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import axios from 'axios';
|
||||
import * as fs from 'fs';
|
||||
import { OcrService } from './ocr.service';
|
||||
|
||||
export type SandboxOcrEngineType = 'auto' | 'tesseract' | 'typhoon-ocr-3b';
|
||||
@@ -28,8 +29,6 @@ export interface SandboxOcrResult {
|
||||
export class SandboxOcrEngineService {
|
||||
private readonly logger = new Logger(SandboxOcrEngineService.name);
|
||||
private readonly ocrApiUrl: string;
|
||||
private readonly localUploadBase: string;
|
||||
private readonly sidecarUploadBase: string;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
@@ -39,63 +38,6 @@ export class SandboxOcrEngineService {
|
||||
'OCR_API_URL',
|
||||
'http://localhost:8765'
|
||||
);
|
||||
this.localUploadBase = this.configService
|
||||
.get<string>('UPLOAD_PERMANENT_DIR', '/app/uploads/permanent')
|
||||
.replace(/\/permanent$/, '');
|
||||
this.sidecarUploadBase = this.configService.get<string>(
|
||||
'OCR_SIDECAR_UPLOAD_BASE',
|
||||
'/mnt/uploads'
|
||||
);
|
||||
}
|
||||
|
||||
/** แปลง local upload path เป็น path ที่ sidecar เห็นผ่าน CIFS/Windows bind mount */
|
||||
private remapPath(localPath: string): string {
|
||||
if (!localPath) return localPath;
|
||||
|
||||
// 1. แปลง Backslash (\) ทั้งหมดให้เป็น Forward slash (/) และรวม slash ที่ซ้ำซ้อน
|
||||
const normalizedPath = localPath.replace(/\\/g, '/').replace(/\/+/g, '/');
|
||||
const sidecarBase = this.sidecarUploadBase.replace(/\/+$/, '');
|
||||
|
||||
// 2. สกัดเอาส่วนของ path ที่อยู่หลัง /uploads/
|
||||
const uploadsMatch = normalizedPath.match(/\/uploads\/(.+)$/i);
|
||||
if (uploadsMatch && uploadsMatch[1]) {
|
||||
const relativePart = uploadsMatch[1].replace(/^\/+/, '');
|
||||
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||
this.logger.debug(
|
||||
`Mapped Windows path "${localPath}" to Sidecar path "${mappedPath}"`
|
||||
);
|
||||
return mappedPath;
|
||||
}
|
||||
|
||||
// 3. กรณี Relative path ที่ขึ้นต้นด้วย uploads/ เช่น "uploads/temp/xxx.pdf"
|
||||
if (normalizedPath.startsWith('uploads/')) {
|
||||
const relativePart = normalizedPath.substring(8).replace(/^\/+/, '');
|
||||
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||
this.logger.debug(
|
||||
`Mapped relative path "${localPath}" to "${mappedPath}"`
|
||||
);
|
||||
return mappedPath;
|
||||
}
|
||||
|
||||
// 4. กรณีสำรอง: ถ้าเริ่มด้วย localUploadBase
|
||||
const normalizedLocalBase = this.localUploadBase
|
||||
.replace(/\\/g, '/')
|
||||
.replace(/\/+/g, '/');
|
||||
if (normalizedLocalBase && normalizedPath.includes(normalizedLocalBase)) {
|
||||
const relativePart = normalizedPath
|
||||
.substring(
|
||||
normalizedPath.indexOf(normalizedLocalBase) +
|
||||
normalizedLocalBase.length
|
||||
)
|
||||
.replace(/^\/+/, '');
|
||||
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||
this.logger.debug(
|
||||
`Mapped fallback path "${localPath}" to "${mappedPath}"`
|
||||
);
|
||||
return mappedPath;
|
||||
}
|
||||
|
||||
return normalizedPath;
|
||||
}
|
||||
|
||||
/** รัน OCR ตาม engine ที่เลือก โดย fallback กลับไป Tesseract baseline เมื่อ Typhoon ล้มเหลว */
|
||||
@@ -114,12 +56,17 @@ export class SandboxOcrEngineService {
|
||||
}
|
||||
|
||||
try {
|
||||
const fileBuffer = fs.readFileSync(pdfPath);
|
||||
const form = new FormData();
|
||||
form.append(
|
||||
'file',
|
||||
new Blob([fileBuffer], { type: 'application/pdf' }),
|
||||
'upload.pdf'
|
||||
);
|
||||
form.append('engine', engineType);
|
||||
const response = await axios.post<SandboxOcrSidecarResponse>(
|
||||
`${this.ocrApiUrl}/ocr`,
|
||||
{
|
||||
pdfPath: this.remapPath(pdfPath),
|
||||
engine: engineType,
|
||||
},
|
||||
`${this.ocrApiUrl}/ocr-upload`,
|
||||
form,
|
||||
{ timeout: 120000 }
|
||||
);
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
# - 2026-05-30: เปลี่ยน lang='en' เป็น lang='ch' (CTJK) เพื่อรองรับภาษาไทย
|
||||
# - 2026-05-30: เปลี่ยนจาก PaddleOCR เป็น Tesseract OCR เพื่อความเข้ากันได้กับ CPU เก่า
|
||||
# - 2026-05-30: เพิ่ม OpenCV preprocessing (threshold, denoise) และ DPI 300 เพื่อเพิ่มความแม่นยำ
|
||||
# - 2026-06-01: เพิ่ม POST /ocr-upload รับ multipart file โดยตรง ไม่ต้องพึ่ง shared volume mount
|
||||
|
||||
import os
|
||||
import logging
|
||||
@@ -21,7 +22,7 @@ import io
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
|
||||
from pydantic import BaseModel
|
||||
from pythainlp.tokenize import word_tokenize
|
||||
from pythainlp.util import normalize as thai_normalize
|
||||
@@ -122,6 +123,71 @@ def health():
|
||||
return {"status": "ok", "engine": "tesseract"}
|
||||
|
||||
|
||||
def _process_pdf_doc(doc: fitz.Document, selected_engine: str, max_pages: int) -> OcrResponse:
|
||||
"""ประมวลผล fitz.Document ด้วย engine ที่เลือก — shared logic สำหรับ /ocr และ /ocr-upload"""
|
||||
pages_to_process = list(range(min(len(doc), max_pages) if max_pages > 0 else len(doc)))
|
||||
page_count = len(pages_to_process)
|
||||
|
||||
fast_text_parts = []
|
||||
total_chars = 0
|
||||
if selected_engine == "auto":
|
||||
for i in pages_to_process:
|
||||
page = doc[i]
|
||||
fast_text_parts.append(page.get_text())
|
||||
fast_text = "\n".join(fast_text_parts).strip()
|
||||
total_chars = len(fast_text)
|
||||
if total_chars > OCR_CHAR_THRESHOLD:
|
||||
logger.info(f"Fast path: {total_chars} chars extracted")
|
||||
return OcrResponse(
|
||||
text=fast_text,
|
||||
ocrUsed=False,
|
||||
pageCount=page_count,
|
||||
charCount=total_chars,
|
||||
engineUsed="fast-path",
|
||||
)
|
||||
|
||||
if selected_engine == "typhoon-ocr-3b":
|
||||
typhoon_text_parts = []
|
||||
for i in pages_to_process:
|
||||
page = doc[i]
|
||||
pix = page.get_pixmap(dpi=300)
|
||||
img_bytes = pix.tobytes("png")
|
||||
img = Image.open(io.BytesIO(img_bytes))
|
||||
cropped_img = crop_header_footer(img, CROP_TOP_RATIO, CROP_BOTTOM_RATIO)
|
||||
processed_img = preprocess_image(cropped_img)
|
||||
typhoon_text_parts.append(process_with_typhoon_ocr(processed_img))
|
||||
typhoon_text = filter_ocr_noise("\n".join(typhoon_text_parts).strip())
|
||||
return OcrResponse(
|
||||
text=typhoon_text,
|
||||
ocrUsed=True,
|
||||
pageCount=page_count,
|
||||
charCount=len(typhoon_text),
|
||||
engineUsed="typhoon-ocr-3b",
|
||||
)
|
||||
|
||||
logger.info(f"Slow path (Tesseract): {total_chars} chars too few")
|
||||
ocr_text_parts = []
|
||||
for i in pages_to_process:
|
||||
page = doc[i]
|
||||
pix = page.get_pixmap(dpi=300)
|
||||
img_bytes = pix.tobytes("png")
|
||||
img = Image.open(io.BytesIO(img_bytes))
|
||||
cropped_img = crop_header_footer(img, CROP_TOP_RATIO, CROP_BOTTOM_RATIO)
|
||||
processed_img = preprocess_image(cropped_img)
|
||||
text = pytesseract.image_to_string(processed_img, lang=OCR_LANG, config=TESSERACT_CONFIG)
|
||||
ocr_text_parts.append(text.strip())
|
||||
|
||||
ocr_text = filter_ocr_noise("\n".join(ocr_text_parts).strip())
|
||||
logger.info(f"Tesseract extracted {len(ocr_text)} chars")
|
||||
return OcrResponse(
|
||||
text=ocr_text,
|
||||
ocrUsed=True,
|
||||
pageCount=page_count,
|
||||
charCount=len(ocr_text),
|
||||
engineUsed="tesseract",
|
||||
)
|
||||
|
||||
|
||||
def process_with_typhoon_ocr(pil_image: Image.Image) -> str:
|
||||
"""เรียก Typhoon OCR ผ่าน Ollama สำหรับ sandbox option โดยไม่แตะ backend DB/storage"""
|
||||
img_buffer = io.BytesIO()
|
||||
@@ -148,92 +214,35 @@ def process_with_typhoon_ocr(pil_image: Image.Image) -> str:
|
||||
|
||||
@app.post("/ocr", response_model=OcrResponse)
|
||||
def ocr_extract(req: OcrRequest):
|
||||
"""OCR จาก path (legacy — ใช้เมื่อ sidecar และ backend เข้าถึง storage เดียวกัน)"""
|
||||
pdf_path = Path(req.pdfPath)
|
||||
if not pdf_path.exists():
|
||||
raise HTTPException(status_code=404, detail=f"ไม่พบไฟล์: {req.pdfPath}")
|
||||
|
||||
selected_engine = (req.engine or "auto").strip().lower()
|
||||
max_pages = req.maxPages or MAX_PAGES
|
||||
|
||||
try:
|
||||
doc = fitz.open(str(pdf_path))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=422, detail=f"เปิดไฟล์ PDF ล้มเหลว: {e}")
|
||||
return _process_pdf_doc(doc, selected_engine, max_pages)
|
||||
|
||||
pages_to_process = list(range(min(len(doc), max_pages) if max_pages > 0 else len(doc)))
|
||||
page_count = len(pages_to_process)
|
||||
|
||||
fast_text_parts = []
|
||||
total_chars = 0
|
||||
if selected_engine == "auto":
|
||||
# Fast path: ลอง extract text layer ก่อน
|
||||
for i in pages_to_process:
|
||||
page = doc[i]
|
||||
fast_text_parts.append(page.get_text())
|
||||
fast_text = "\n".join(fast_text_parts).strip()
|
||||
total_chars = len(fast_text)
|
||||
if total_chars > OCR_CHAR_THRESHOLD:
|
||||
logger.info(f"Fast path: {total_chars} chars extracted from {pdf_path.name}")
|
||||
return OcrResponse(
|
||||
text=fast_text,
|
||||
ocrUsed=False,
|
||||
pageCount=page_count,
|
||||
charCount=total_chars,
|
||||
engineUsed="fast-path",
|
||||
)
|
||||
|
||||
if selected_engine == "typhoon-ocr-3b":
|
||||
logger.info(f"Typhoon OCR path: {pdf_path.name}")
|
||||
typhoon_text_parts = []
|
||||
for i in pages_to_process:
|
||||
page = doc[i]
|
||||
pix = page.get_pixmap(dpi=300)
|
||||
img_bytes = pix.tobytes("png")
|
||||
img = Image.open(io.BytesIO(img_bytes))
|
||||
cropped_img = crop_header_footer(img, CROP_TOP_RATIO, CROP_BOTTOM_RATIO)
|
||||
processed_img = preprocess_image(cropped_img)
|
||||
typhoon_text_parts.append(process_with_typhoon_ocr(processed_img))
|
||||
typhoon_text = filter_ocr_noise("\n".join(typhoon_text_parts).strip())
|
||||
return OcrResponse(
|
||||
text=typhoon_text,
|
||||
ocrUsed=True,
|
||||
pageCount=page_count,
|
||||
charCount=len(typhoon_text),
|
||||
engineUsed="typhoon-ocr-3b",
|
||||
)
|
||||
|
||||
logger.info(f"Slow path (Tesseract): {total_chars} chars too few for {pdf_path.name}")
|
||||
ocr_text_parts = []
|
||||
for i in pages_to_process:
|
||||
page = doc[i]
|
||||
pix = page.get_pixmap(dpi=300) # เพิ่ม DPI เป็น 300 เพื่อความชัด
|
||||
img_bytes = pix.tobytes("png")
|
||||
img = Image.open(io.BytesIO(img_bytes))
|
||||
|
||||
# Crop header/footer ก่อนเพื่อลบข้อความที่ไม่จำเป็น
|
||||
cropped_img = crop_header_footer(img, CROP_TOP_RATIO, CROP_BOTTOM_RATIO)
|
||||
|
||||
# Preprocess ด้วย OpenCV เพื่อเพิ่มความแม่นยำ
|
||||
processed_img = preprocess_image(cropped_img)
|
||||
|
||||
# OCR ด้วย Tesseract โดยใช้ PSM 6 และ OEM 1
|
||||
text = pytesseract.image_to_string(processed_img, lang=OCR_LANG, config=TESSERACT_CONFIG)
|
||||
ocr_text_parts.append(text.strip())
|
||||
|
||||
ocr_text = "\n".join(ocr_text_parts).strip()
|
||||
|
||||
# Filter ขยะ OCR หลังจากสกัดข้อความแล้ว
|
||||
ocr_text = filter_ocr_noise(ocr_text)
|
||||
|
||||
logger.info(f"Tesseract extracted {len(ocr_text)} chars from {pdf_path.name}")
|
||||
|
||||
return OcrResponse(
|
||||
text=ocr_text,
|
||||
ocrUsed=True,
|
||||
pageCount=page_count,
|
||||
charCount=len(ocr_text),
|
||||
engineUsed="tesseract",
|
||||
)
|
||||
@app.post("/ocr-upload", response_model=OcrResponse)
|
||||
def ocr_upload(
|
||||
file: UploadFile = File(...),
|
||||
engine: str = Form(default="auto"),
|
||||
maxPages: int = Form(default=0),
|
||||
):
|
||||
"""OCR จาก multipart file upload — ไม่ต้องการ shared volume mount"""
|
||||
selected_engine = engine.strip().lower()
|
||||
max_pages = maxPages or MAX_PAGES
|
||||
pdf_bytes = file.file.read()
|
||||
try:
|
||||
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=422, detail=f"เปิดไฟล์ PDF ล้มเหลว: {e}")
|
||||
logger.info(f"OCR upload: {file.filename} engine={selected_engine}")
|
||||
return _process_pdf_doc(doc, selected_engine, max_pages)
|
||||
|
||||
|
||||
class NormalizeRequest(BaseModel):
|
||||
|
||||
+2
-10
@@ -7,9 +7,8 @@
|
||||
# - 2026-05-30: เพิ่ม Typhoon OCR environment variables (T009b, ADR-032)
|
||||
# OLLAMA_API_URL ชี้ไปที่ http://192.168.10.100:11434 (Admin Desktop LAN IP)
|
||||
# - 2026-05-30: Revert volumes กลับไปใช้ Windows Z: drive bind mount (แทน CIFS volume driver ที่พัง)
|
||||
# - 2026-06-01: แก้ volumes เปลี่ยนจาก Z: drive bind mount (ไม่ทำงานบน WSL2)
|
||||
# เป็น CIFS named volume ชี้ตรงไปที่ UNC path \\192.168.10.8\np-dms-as\data\uploads
|
||||
# ต้องสร้างไฟล์ .env ที่ Desk-5439 (ดูตัวอย่างใน .env.example)
|
||||
# - 2026-06-01: ลบ volumes ออกทั้งหมด — backend ส่ง file content ผ่าน multipart /ocr-upload แทน
|
||||
# ไม่ต้องการ shared storage อีกต่อไป
|
||||
#
|
||||
# วิธีรัน:
|
||||
# docker compose up -d --build
|
||||
@@ -40,13 +39,6 @@ services:
|
||||
TYPHOON_OCR_MODEL: "scb10x/typhoon-ocr-3b"
|
||||
# Timeout 120 วินาที/หน้า (budget สำหรับ 3B model บน RTX 2060 Super)
|
||||
TYPHOON_OCR_TIMEOUT: "120"
|
||||
volumes:
|
||||
# Uploads จาก QNAP NAS ผ่าน WSL2 mount path
|
||||
# Z: = \\192.168.10.8\np-dms-as → WSL2 เห็นเป็น /mnt/z
|
||||
# Docker Desktop bind mount จาก Windows path ใช้ //wsl.localhost/ ไม่ได้
|
||||
# แต่ใช้ Windows absolute path ของ Z: ได้ผ่าน Docker Desktop settings
|
||||
# วิธีที่ใช้งานได้: ระบุ source เป็น Windows UNC path โดยตรง
|
||||
- //192.168.10.8/np-dms-as/data/uploads:/mnt/uploads:ro
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
|
||||
Reference in New Issue
Block a user