From 2cc07ee2e5a94aeed7440ae90511bf6b40661636 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 1 Jun 2026 21:43:19 +0700 Subject: [PATCH] 690601:2143 ADR-032-232 #09 --- backend/src/modules/ai/ai.controller.ts | 21 ++++++++++++------- .../ai/processors/typhoon-ocr.processor.ts | 2 +- .../src/modules/ai/services/ocr.service.ts | 4 ++-- .../ai/services/sandbox-ocr-engine.service.ts | 6 +++++- .../Desk-5439/ocr-sidecar/app.py | 7 ++++--- .../Desk-5439/ocr-sidecar/docker-compose.yml | 3 ++- 6 files changed, 27 insertions(+), 16 deletions(-) diff --git a/backend/src/modules/ai/ai.controller.ts b/backend/src/modules/ai/ai.controller.ts index 174917fe..125efa54 100644 --- a/backend/src/modules/ai/ai.controller.ts +++ b/backend/src/modules/ai/ai.controller.ts @@ -86,6 +86,7 @@ import { AiEnabledGuard } from './guards/ai-enabled.guard'; import { InjectRedis } from '@nestjs-modules/ioredis'; import Redis from 'ioredis'; import { FileStorageService } from '../../common/file-storage/file-storage.service'; +import { SandboxOcrEngineType } from './services/sandbox-ocr-engine.service'; import { AiMigrationCheckpointService } from './ai-migration-checkpoint.service'; import { MigrationErrorLogDto, @@ -538,7 +539,7 @@ export class AiController { }, engineType: { type: 'string', - enum: ['auto', 'tesseract', 'typhoon-ocr-3b'], + enum: ['auto', 'tesseract', 'typhoon-ocr-3b', 'typhoon-ocr1.5-3b'], description: 'OCR engine ที่ต้องการใช้ (default: auto)', }, }, @@ -560,13 +561,17 @@ export class AiController { const attachment = await this.fileStorageService.upload(file, user.user_id); const requestPublicId = uuidv7(); // ตรวจสอบและ normalize engineType ให้เป็นค่าที่ valid - const validEngineTypes = ['auto', 'tesseract', 'typhoon-ocr-3b'] as const; - const resolvedEngineType: 'auto' | 'tesseract' | 'typhoon-ocr-3b' = - validEngineTypes.includes( - engineType as 'auto' | 'tesseract' | 'typhoon-ocr-3b' - ) - ? (engineType as 'auto' | 'tesseract' | 'typhoon-ocr-3b') - : 'auto'; + const validEngineTypes = [ + 'auto', + 'tesseract', + 'typhoon-ocr-3b', + 'typhoon-ocr1.5-3b', + ] as const; + const resolvedEngineType: SandboxOcrEngineType = validEngineTypes.includes( + engineType as SandboxOcrEngineType + ) + ? (engineType as SandboxOcrEngineType) + : 'auto'; const jobId = await this.aiQueueService.enqueueSandboxJob( 'sandbox-ocr-only', { diff --git a/backend/src/modules/ai/processors/typhoon-ocr.processor.ts b/backend/src/modules/ai/processors/typhoon-ocr.processor.ts index eb45c95d..d18d9fe6 100644 --- a/backend/src/modules/ai/processors/typhoon-ocr.processor.ts +++ b/backend/src/modules/ai/processors/typhoon-ocr.processor.ts @@ -184,7 +184,7 @@ export class TyphoonOcrProcessor extends WorkerHost { const log = this.auditLogRepo.create({ documentPublicId: params.documentPublicId, aiModel: 'typhoon-ocr', - modelName: 'scb10x/typhoon-ocr-3b', + modelName: 'scb10x/typhoon-ocr1.5-3b', modelType: params.engineType, status: params.status, processingTimeMs: params.processingTimeMs, diff --git a/backend/src/modules/ai/services/ocr.service.ts b/backend/src/modules/ai/services/ocr.service.ts index a2575d97..5984cf44 100644 --- a/backend/src/modules/ai/services/ocr.service.ts +++ b/backend/src/modules/ai/services/ocr.service.ts @@ -319,7 +319,7 @@ export class OcrService { new Blob([fileBuffer], { type: 'application/pdf' }), 'upload.pdf' ); - form.append('engine', 'typhoon-ocr-3b'); + form.append('engine', 'typhoon-ocr1.5-3b'); const response = await axios.post( `${this.ocrApiUrl}/ocr-upload`, form, @@ -332,7 +332,7 @@ export class OcrService { await this.writeAuditLog({ documentPublicId: input.documentPublicId, aiModel: 'typhoon-ocr', - modelName: 'typhoon-ocr-3b', + modelName: 'typhoon-ocr1.5-3b', modelType: 'typhoon-ocr', status: AiAuditStatus.SUCCESS, processingTimeMs: durationMs, diff --git a/backend/src/modules/ai/services/sandbox-ocr-engine.service.ts b/backend/src/modules/ai/services/sandbox-ocr-engine.service.ts index a577e4dd..7f324455 100644 --- a/backend/src/modules/ai/services/sandbox-ocr-engine.service.ts +++ b/backend/src/modules/ai/services/sandbox-ocr-engine.service.ts @@ -9,7 +9,11 @@ import axios from 'axios'; import * as fs from 'fs'; import { OcrService } from './ocr.service'; -export type SandboxOcrEngineType = 'auto' | 'tesseract' | 'typhoon-ocr-3b'; +export type SandboxOcrEngineType = + | 'auto' + | 'tesseract' + | 'typhoon-ocr-3b' + | 'typhoon-ocr1.5-3b'; interface SandboxOcrSidecarResponse { text?: string; diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py index b6662ae4..6014cb88 100644 --- a/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/app.py @@ -7,6 +7,7 @@ # - 2026-05-30: เปลี่ยนจาก PaddleOCR เป็น Tesseract OCR เพื่อความเข้ากันได้กับ CPU เก่า # - 2026-05-30: เพิ่ม OpenCV preprocessing (threshold, denoise) และ DPI 300 เพื่อเพิ่มความแม่นยำ # - 2026-06-01: เพิ่ม POST /ocr-upload รับ multipart file โดยตรง ไม่ต้องพึ่ง shared volume mount +# - 2026-06-01: เปลี่ยน TYPHOON_OCR_MODEL default เป็น scb10x/typhoon-ocr1.5-3b import os import logging @@ -37,7 +38,7 @@ OCR_CHAR_THRESHOLD = int(os.getenv("OCR_CHAR_THRESHOLD", "100")) MAX_PAGES = int(os.getenv("OCR_MAX_PAGES", "0")) # 0 = ทุกหน้า OCR_LANG = os.getenv("OCR_LANG", "tha+eng") # Tesseract language code (tha+eng = Thai + English) OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://host.docker.internal:11434") -TYPHOON_OCR_MODEL = os.getenv("TYPHOON_OCR_MODEL", "scb10x/typhoon-ocr-3b") +TYPHOON_OCR_MODEL = os.getenv("TYPHOON_OCR_MODEL", "scb10x/typhoon-ocr1.5-3b") TYPHOON_OCR_TIMEOUT = int(os.getenv("TYPHOON_OCR_TIMEOUT", "120")) # PSM 3 = Fully automatic page segmentation (เหมาะกับเอกสารที่มี layout หลายส่วน เช่น วันที่/เลขที่) # OEM 1 = LSTM only (ดีกว่า legacy engine) @@ -146,7 +147,7 @@ def _process_pdf_doc(doc: fitz.Document, selected_engine: str, max_pages: int) - engineUsed="fast-path", ) - if selected_engine == "typhoon-ocr-3b": + if selected_engine in ("typhoon-ocr-3b", "typhoon-ocr1.5-3b"): typhoon_text_parts = [] for i in pages_to_process: page = doc[i] @@ -162,7 +163,7 @@ def _process_pdf_doc(doc: fitz.Document, selected_engine: str, max_pages: int) - ocrUsed=True, pageCount=page_count, charCount=len(typhoon_text), - engineUsed="typhoon-ocr-3b", + engineUsed="typhoon-ocr1.5-3b", ) logger.info(f"Slow path (Tesseract): {total_chars} chars too few") diff --git a/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/docker-compose.yml b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/docker-compose.yml index a180f884..4a034d96 100644 --- a/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/docker-compose.yml +++ b/specs/04-Infrastructure-OPS/04-00-docker-compose/Desk-5439/ocr-sidecar/docker-compose.yml @@ -9,6 +9,7 @@ # - 2026-05-30: Revert volumes กลับไปใช้ Windows Z: drive bind mount (แทน CIFS volume driver ที่พัง) # - 2026-06-01: ลบ volumes ออกทั้งหมด — backend ส่ง file content ผ่าน multipart /ocr-upload แทน # ไม่ต้องการ shared storage อีกต่อไป +# - 2026-06-01: เปลี่ยน TYPHOON_OCR_MODEL เป็น scb10x/typhoon-ocr1.5-3b # # วิธีรัน: # docker compose up -d --build @@ -36,7 +37,7 @@ services: # ─── Typhoon OCR via Ollama (ADR-032) ─────────────────────────────────── # ชี้ไปที่ Ollama ที่รันบน Desk-5439 ผ่าน LAN IP (ไม่ใช่ host.docker.internal) OLLAMA_API_URL: "http://192.168.10.100:11434" - TYPHOON_OCR_MODEL: "scb10x/typhoon-ocr-3b" + TYPHOON_OCR_MODEL: "scb10x/typhoon-ocr1.5-3b" # Timeout 120 วินาที/หน้า (budget สำหรับ 3B model บน RTX 2060 Super) TYPHOON_OCR_TIMEOUT: "120" logging: