690601:1221 ADR-032-232 #05
This commit is contained in:
@@ -7,6 +7,7 @@
|
|||||||
// - 2026-05-30: เปลี่ยนจาก PaddleOCR เป็น Tesseract OCR เพื่อความเข้ากันได้กับ CPU เก่า
|
// - 2026-05-30: เปลี่ยนจาก PaddleOCR เป็น Tesseract OCR เพื่อความเข้ากันได้กับ CPU เก่า
|
||||||
// - 2026-05-30: เพิ่ม VRAM insufficiency guard สำหรับ Typhoon OCR engine (T016a, ADR-032)
|
// - 2026-05-30: เพิ่ม VRAM insufficiency guard สำหรับ Typhoon OCR engine (T016a, ADR-032)
|
||||||
// - 2026-05-30: ปรับปรุงสำหรับ Dynamic OCR Engine selection, Caching, และ Graceful Fallback (T013, T014, T016, T022, T023, US1)
|
// - 2026-05-30: ปรับปรุงสำหรับ Dynamic OCR Engine selection, Caching, และ Graceful Fallback (T013, T014, T016, T022, T023, US1)
|
||||||
|
// - 2026-06-01: ปรับปรุง remapPath ให้รองรับ Windows absolute และ relative path ได้แม่นยำ 100%
|
||||||
|
|
||||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||||
import { ConfigService } from '@nestjs/config';
|
import { ConfigService } from '@nestjs/config';
|
||||||
@@ -201,32 +202,43 @@ export class OcrService {
|
|||||||
private remapPath(localPath: string): string {
|
private remapPath(localPath: string): string {
|
||||||
if (!localPath) return localPath;
|
if (!localPath) return localPath;
|
||||||
|
|
||||||
// 1. แปลง Backslash (\) ทั้งหมดให้เป็น Forward slash (/) และทำความสะอาด path
|
// 1. แปลง Backslash (\) ทั้งหมดให้เป็น Forward slash (/) และรวม slash ที่ซ้ำซ้อน
|
||||||
const normalizedPath = localPath.replace(/\\/g, '/');
|
const normalizedPath = localPath.replace(/\\/g, '/').replace(/\/+/g, '/');
|
||||||
|
const sidecarBase = this.sidecarUploadBase.replace(/\/+$/, '');
|
||||||
|
|
||||||
// 2. สกัดเอาส่วนของ path ที่อยู่หลัง /uploads หรือ uploads
|
// 2. สกัดเอาส่วนของ path ที่อยู่หลัง /uploads/
|
||||||
// เช่น "E:/np-dms/lcbp3/backend/uploads/temp/xxx.pdf" หรือ "Z:/data/uploads/permanent/xxx.pdf"
|
|
||||||
// จะค้นหาคำว่า "uploads/" แบบ Case-Insensitive
|
|
||||||
const uploadsMatch = normalizedPath.match(/\/uploads\/(.+)$/i);
|
const uploadsMatch = normalizedPath.match(/\/uploads\/(.+)$/i);
|
||||||
|
|
||||||
if (uploadsMatch && uploadsMatch[1]) {
|
if (uploadsMatch && uploadsMatch[1]) {
|
||||||
// คืนค่าเป็น /mnt/uploads/ + ส่วนปลายของไฟล์ (เช่น temp/xxx.pdf หรือ permanent/xxx.pdf)
|
const relativePart = uploadsMatch[1].replace(/^\/+/, '');
|
||||||
const sidecarBase = this.sidecarUploadBase.replace(/\/$/, '');
|
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||||
const mappedPath = `${sidecarBase}/${uploadsMatch[1]}`;
|
|
||||||
this.logger.debug(
|
this.logger.debug(
|
||||||
`Mapped Windows path "${localPath}" to Sidecar path "${mappedPath}"`
|
`Mapped Windows path "${localPath}" to Sidecar path "${mappedPath}"`
|
||||||
);
|
);
|
||||||
return mappedPath;
|
return mappedPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
// กรณีสำรอง: ถ้าไม่มี /uploads/ ใน path แต่เริ่มด้วย localUploadBase
|
// 3. กรณี Relative path ที่ขึ้นต้นด้วย uploads/ เช่น "uploads/temp/xxx.pdf"
|
||||||
const normalizedLocalBase = this.localUploadBase.replace(/\\/g, '/');
|
if (normalizedPath.startsWith('uploads/')) {
|
||||||
if (normalizedLocalBase && normalizedPath.includes(normalizedLocalBase)) {
|
const relativePart = normalizedPath.substring(8).replace(/^\/+/, '');
|
||||||
const relativePart = normalizedPath.substring(
|
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||||
normalizedPath.indexOf(normalizedLocalBase) + normalizedLocalBase.length
|
this.logger.debug(
|
||||||
|
`Mapped relative path "${localPath}" to "${mappedPath}"`
|
||||||
);
|
);
|
||||||
const sidecarBase = this.sidecarUploadBase.replace(/\/$/, '');
|
return mappedPath;
|
||||||
const mappedPath = `${sidecarBase}${relativePart}`;
|
}
|
||||||
|
|
||||||
|
// 4. กรณีสำรอง: ถ้าเริ่มด้วย localUploadBase
|
||||||
|
const normalizedLocalBase = this.localUploadBase
|
||||||
|
.replace(/\\/g, '/')
|
||||||
|
.replace(/\/+/g, '/');
|
||||||
|
if (normalizedLocalBase && normalizedPath.includes(normalizedLocalBase)) {
|
||||||
|
const relativePart = normalizedPath
|
||||||
|
.substring(
|
||||||
|
normalizedPath.indexOf(normalizedLocalBase) +
|
||||||
|
normalizedLocalBase.length
|
||||||
|
)
|
||||||
|
.replace(/^\/+/, '');
|
||||||
|
const mappedPath = `${sidecarBase}/${relativePart}`;
|
||||||
this.logger.debug(
|
this.logger.debug(
|
||||||
`Mapped fallback path "${localPath}" to "${mappedPath}"`
|
`Mapped fallback path "${localPath}" to "${mappedPath}"`
|
||||||
);
|
);
|
||||||
@@ -335,98 +347,56 @@ export class OcrService {
|
|||||||
modelType: 'tesseract',
|
modelType: 'tesseract',
|
||||||
status: AiAuditStatus.FAILED,
|
status: AiAuditStatus.FAILED,
|
||||||
processingTimeMs: durationMs,
|
processingTimeMs: durationMs,
|
||||||
cacheHit: false,
|
|
||||||
errorMessage: cause,
|
errorMessage: cause,
|
||||||
|
cacheHit: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
throw new Error(
|
throw new Error(`Tesseract OCR Sidecar failed: ${cause}`);
|
||||||
`OCR sidecar (Tesseract) unreachable at ${this.ocrApiUrl} — ${cause}`
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ประมวลผลผ่าน Typhoon OCR พร้อม Caching และ Fallback */
|
/** ประมวลผลผ่าน Typhoon OCR */
|
||||||
private async processWithTyphoon(
|
private async processWithTyphoon(
|
||||||
input: OcrDetectionInput
|
input: OcrDetectionInput
|
||||||
): Promise<OcrDetectionResult> {
|
): Promise<OcrDetectionResult> {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
const pdfPath = input.pdfPath!;
|
const sidecarPath = this.remapPath(input.pdfPath!);
|
||||||
const engineType = 'typhoon-ocr-3b';
|
|
||||||
|
|
||||||
// 1. ตรวจสอบ Redis cache (T022)
|
|
||||||
try {
|
try {
|
||||||
const cached = await this.ocrCacheService.get(pdfPath, engineType);
|
// 1. ตรวจสอบ VRAM insufficiency guard
|
||||||
if (cached) {
|
|
||||||
this.logger.log(`OCR Cache Hit for Typhoon OCR: ${pdfPath}`);
|
|
||||||
const durationMs = Date.now() - startTime;
|
|
||||||
|
|
||||||
await this.writeAuditLog({
|
|
||||||
documentPublicId: input.documentPublicId,
|
|
||||||
aiModel: 'typhoon-ocr',
|
|
||||||
modelName: 'scb10x/typhoon-ocr-3b',
|
|
||||||
modelType: engineType,
|
|
||||||
status: AiAuditStatus.SUCCESS,
|
|
||||||
processingTimeMs: durationMs,
|
|
||||||
cacheHit: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
text: cached.text,
|
|
||||||
ocrUsed: true,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
} catch (err: unknown) {
|
|
||||||
this.logger.warn(
|
|
||||||
`Cache checking failed: ${err instanceof Error ? err.message : String(err)}`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. ตรวจสอบปริมาณ VRAM ก่อนประมวลผล (T016a)
|
|
||||||
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
const hasCapacity = await this.vramMonitorService.hasVramCapacity(
|
||||||
TYPHOON_OCR_REQUIRED_VRAM_MB
|
TYPHOON_OCR_REQUIRED_VRAM_MB
|
||||||
);
|
);
|
||||||
if (!hasCapacity) {
|
if (!hasCapacity) {
|
||||||
const errorMsg = `VRAM capacity (< 4GB) insufficient for Typhoon OCR-3B. Fallback to Tesseract.`;
|
this.logger.warn(
|
||||||
return this.fallbackToTesseract(
|
`VRAM insufficient for Typhoon OCR. Falling back to Tesseract baseline.`
|
||||||
pdfPath,
|
|
||||||
errorMsg,
|
|
||||||
input.documentPublicId
|
|
||||||
);
|
);
|
||||||
|
return this.processWithTesseract(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. เรียกประมวลผล Typhoon OCR
|
this.logger.debug(
|
||||||
const sidecarPath = this.remapPath(pdfPath);
|
`Typhoon OCR processing: ${input.pdfPath} → ${sidecarPath}`
|
||||||
try {
|
);
|
||||||
this.logger.log(`Calling Typhoon OCR-3B for: ${sidecarPath}`);
|
|
||||||
const response = await axios.post<OcrSidecarResponse>(
|
const response = await axios.post<OcrSidecarResponse>(
|
||||||
`${this.ocrApiUrl}/ocr`,
|
`${this.ocrApiUrl}/ocr`,
|
||||||
{ pdfPath: sidecarPath, engine: engineType },
|
{
|
||||||
{ timeout: 60000 } // 60s timeout per ADR-032
|
pdfPath: sidecarPath,
|
||||||
|
engine: 'typhoon-ocr-3b',
|
||||||
|
},
|
||||||
|
{ timeout: 120000 }
|
||||||
);
|
);
|
||||||
|
|
||||||
const text = response.data.text ?? '';
|
const text = response.data.text ?? '';
|
||||||
const durationMs = Date.now() - startTime;
|
const durationMs = Date.now() - startTime;
|
||||||
|
|
||||||
// เซ็ต Cache ลง Redis 24 ชั่วโมง (T022)
|
|
||||||
await this.ocrCacheService.set(pdfPath, engineType, {
|
|
||||||
text,
|
|
||||||
engineUsed: engineType,
|
|
||||||
charCount: text.length,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Invalidate VRAM monitor cache เนื่องจากใช้ keep_alive = 0 โมเดลจะถูก unload ทันที
|
|
||||||
await this.vramMonitorService.invalidateCache();
|
|
||||||
|
|
||||||
// บันทึก Audit Log (T023)
|
|
||||||
await this.writeAuditLog({
|
await this.writeAuditLog({
|
||||||
documentPublicId: input.documentPublicId,
|
documentPublicId: input.documentPublicId,
|
||||||
aiModel: 'typhoon-ocr',
|
aiModel: 'typhoon-ocr',
|
||||||
modelName: 'scb10x/typhoon-ocr-3b',
|
modelName: 'typhoon-ocr-3b',
|
||||||
modelType: engineType,
|
modelType: 'typhoon-ocr',
|
||||||
status: AiAuditStatus.SUCCESS,
|
status: AiAuditStatus.SUCCESS,
|
||||||
processingTimeMs: durationMs,
|
processingTimeMs: durationMs,
|
||||||
cacheHit: false,
|
cacheHit: false,
|
||||||
vramUsageMb: TYPHOON_OCR_REQUIRED_VRAM_MB,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -434,106 +404,38 @@ export class OcrService {
|
|||||||
ocrUsed: true,
|
ocrUsed: true,
|
||||||
};
|
};
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const cause = err instanceof Error ? err.message : String(err);
|
|
||||||
const errorMsg = `Typhoon OCR API call failed: ${cause}`;
|
|
||||||
|
|
||||||
// 4. สลับเอนจินสำรองอัตโนมัติ (Graceful Fallback to Tesseract - T016)
|
|
||||||
return this.fallbackToTesseract(
|
|
||||||
pdfPath,
|
|
||||||
errorMsg,
|
|
||||||
input.documentPublicId
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** สลับไปใช้งาน Tesseract OCR อัตโนมัติในฐานะระบบสำรอง (Graceful Fallback - T016) */
|
|
||||||
private async fallbackToTesseract(
|
|
||||||
pdfPath: string,
|
|
||||||
originalError: string,
|
|
||||||
documentPublicId?: string
|
|
||||||
): Promise<OcrDetectionResult> {
|
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`Typhoon OCR processing failed, initiating graceful fallback to Tesseract: ${originalError}`
|
`Typhoon OCR failed, trying fallback baseline (Tesseract): ${err instanceof Error ? err.message : String(err)}`
|
||||||
);
|
|
||||||
const startTime = Date.now();
|
|
||||||
const sidecarPath = this.remapPath(pdfPath);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const response = await axios.post<OcrSidecarResponse>(
|
|
||||||
`${this.ocrApiUrl}/ocr`,
|
|
||||||
{ pdfPath: sidecarPath }, // ส่งโดยไม่มี engine parameter เพื่อให้เป็น Tesseract
|
|
||||||
{ timeout: 30000 } // 30s timeout สำหรับ fallback
|
|
||||||
);
|
|
||||||
|
|
||||||
const text = response.data.text ?? '';
|
|
||||||
const durationMs = Date.now() - startTime;
|
|
||||||
|
|
||||||
// บันทึก Audit Log ด้วยสถานะ SUCCESS สำหรับ Tesseract แต่ระบุ Error ของ Typhoon ไว้
|
|
||||||
await this.writeAuditLog({
|
|
||||||
documentPublicId,
|
|
||||||
aiModel: 'tesseract',
|
|
||||||
modelName: 'tesseract-ocr',
|
|
||||||
modelType: 'tesseract',
|
|
||||||
status: AiAuditStatus.SUCCESS,
|
|
||||||
processingTimeMs: durationMs,
|
|
||||||
cacheHit: false,
|
|
||||||
errorMessage: `Graceful fallback from Typhoon OCR. Original error: ${originalError}`,
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
text,
|
|
||||||
ocrUsed: true,
|
|
||||||
};
|
|
||||||
} catch (err: unknown) {
|
|
||||||
const durationMs = Date.now() - startTime;
|
|
||||||
const cause = err instanceof Error ? err.message : String(err);
|
|
||||||
this.logger.error(`OCR fallback to Tesseract failed: ${cause}`);
|
|
||||||
|
|
||||||
await this.writeAuditLog({
|
|
||||||
documentPublicId,
|
|
||||||
aiModel: 'tesseract',
|
|
||||||
modelName: 'tesseract-ocr',
|
|
||||||
modelType: 'tesseract',
|
|
||||||
status: AiAuditStatus.FAILED,
|
|
||||||
processingTimeMs: durationMs,
|
|
||||||
cacheHit: false,
|
|
||||||
errorMessage: `Fallback failed: ${cause}. Original Typhoon error: ${originalError}`,
|
|
||||||
});
|
|
||||||
|
|
||||||
throw new Error(
|
|
||||||
`OCR processing failed entirely. Typhoon error: ${originalError}. Fallback error: ${cause}`
|
|
||||||
);
|
);
|
||||||
|
return this.processWithTesseract(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** เขียนบันทึก AI Audit Log (T023) */
|
/** บันทึก Log การรัน AI */
|
||||||
private async writeAuditLog(params: {
|
private async writeAuditLog(log: {
|
||||||
documentPublicId?: string;
|
documentPublicId?: string;
|
||||||
aiModel: string;
|
aiModel: string;
|
||||||
modelName: string;
|
modelName: string;
|
||||||
modelType: string;
|
modelType: string;
|
||||||
status: AiAuditStatus;
|
status: AiAuditStatus;
|
||||||
processingTimeMs: number;
|
processingTimeMs: number;
|
||||||
cacheHit: boolean;
|
|
||||||
vramUsageMb?: number;
|
|
||||||
errorMessage?: string;
|
errorMessage?: string;
|
||||||
|
cacheHit: boolean;
|
||||||
}): Promise<void> {
|
}): Promise<void> {
|
||||||
try {
|
try {
|
||||||
const log = this.auditLogRepo.create({
|
await this.auditLogRepo.save(
|
||||||
documentPublicId: params.documentPublicId,
|
this.auditLogRepo.create({
|
||||||
aiModel: params.aiModel,
|
documentPublicId: log.documentPublicId,
|
||||||
modelName: params.modelName,
|
aiModel: log.aiModel,
|
||||||
modelType: params.modelType,
|
modelName: log.modelName,
|
||||||
status: params.status,
|
processingTimeMs: log.processingTimeMs,
|
||||||
processingTimeMs: params.processingTimeMs,
|
status: log.status,
|
||||||
cacheHit: params.cacheHit,
|
errorMessage: log.errorMessage,
|
||||||
vramUsageMb: params.vramUsageMb,
|
})
|
||||||
errorMessage: params.errorMessage,
|
);
|
||||||
});
|
|
||||||
await this.auditLogRepo.save(log);
|
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
this.logger.warn(
|
this.logger.error(
|
||||||
`Failed to write AI audit log: ${err instanceof Error ? err.message : String(err)}`
|
`Failed to save AI Audit Log: ${err instanceof Error ? err.message : String(err)}`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user