690604:1139 ADR-034-134 #07
This commit is contained in:
@@ -60,7 +60,13 @@ export class SandboxOcrEngineService {
|
|||||||
engineType: SandboxOcrEngineType = 'auto',
|
engineType: SandboxOcrEngineType = 'auto',
|
||||||
typhoonOptions?: OcrTyphoonOptions
|
typhoonOptions?: OcrTyphoonOptions
|
||||||
): Promise<SandboxOcrResult> {
|
): Promise<SandboxOcrResult> {
|
||||||
|
this.logger.log(
|
||||||
|
`detectAndExtract called — engine="${engineType}" pdfPath="${pdfPath}" typhoonOptions=${JSON.stringify(typhoonOptions ?? null)}`
|
||||||
|
);
|
||||||
if (engineType === 'auto' || engineType === 'tesseract') {
|
if (engineType === 'auto' || engineType === 'tesseract') {
|
||||||
|
this.logger.log(
|
||||||
|
`engine="${engineType}" → routing to Tesseract/fast-path`
|
||||||
|
);
|
||||||
const result = await this.ocrService.detectAndExtract({ pdfPath });
|
const result = await this.ocrService.detectAndExtract({ pdfPath });
|
||||||
return {
|
return {
|
||||||
text: result.text,
|
text: result.text,
|
||||||
@@ -70,12 +76,27 @@ export class SandboxOcrEngineService {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.logger.log(
|
||||||
|
`engine="typhoon-np-dms-ocr" → calling sidecar at ${this.ocrApiUrl}/ocr-upload`
|
||||||
|
);
|
||||||
try {
|
try {
|
||||||
const fileBuffer = fs.readFileSync(pdfPath);
|
let fileBuffer: Buffer;
|
||||||
|
try {
|
||||||
|
fileBuffer = fs.readFileSync(pdfPath);
|
||||||
|
this.logger.log(
|
||||||
|
`File read OK — ${fileBuffer.length} bytes from "${pdfPath}"`
|
||||||
|
);
|
||||||
|
} catch (fsErr: unknown) {
|
||||||
|
const fsMsg = fsErr instanceof Error ? fsErr.message : String(fsErr);
|
||||||
|
this.logger.error(
|
||||||
|
`[DIAG] fs.readFileSync FAILED — "${pdfPath}": ${fsMsg}`
|
||||||
|
);
|
||||||
|
throw fsErr;
|
||||||
|
}
|
||||||
const form = new FormData();
|
const form = new FormData();
|
||||||
form.append(
|
form.append(
|
||||||
'file',
|
'file',
|
||||||
new Blob([fileBuffer], { type: 'application/pdf' }),
|
new Blob([new Uint8Array(fileBuffer)], { type: 'application/pdf' }),
|
||||||
'upload.pdf'
|
'upload.pdf'
|
||||||
);
|
);
|
||||||
form.append('engine', engineType);
|
form.append('engine', engineType);
|
||||||
@@ -88,6 +109,9 @@ export class SandboxOcrEngineService {
|
|||||||
if (typhoonOptions?.repeatPenalty !== undefined) {
|
if (typhoonOptions?.repeatPenalty !== undefined) {
|
||||||
form.append('repeatPenalty', String(typhoonOptions.repeatPenalty));
|
form.append('repeatPenalty', String(typhoonOptions.repeatPenalty));
|
||||||
}
|
}
|
||||||
|
this.logger.log(
|
||||||
|
`Sending to sidecar — engine=${engineType} options=${JSON.stringify(typhoonOptions ?? {})}`
|
||||||
|
);
|
||||||
const response = await axios.post<SandboxOcrSidecarResponse>(
|
const response = await axios.post<SandboxOcrSidecarResponse>(
|
||||||
`${this.ocrApiUrl}/ocr-upload`,
|
`${this.ocrApiUrl}/ocr-upload`,
|
||||||
form,
|
form,
|
||||||
@@ -96,6 +120,9 @@ export class SandboxOcrEngineService {
|
|||||||
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
headers: { 'X-API-Key': this.ocrSidecarApiKey },
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
this.logger.log(
|
||||||
|
`Sidecar response OK — engineUsed="${response.data.engineUsed}" ocrUsed=${String(response.data.ocrUsed)} textLen=${String(response.data.text?.length ?? 0)}`
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
text: response.data.text ?? '',
|
text: response.data.text ?? '',
|
||||||
@@ -104,7 +131,16 @@ export class SandboxOcrEngineService {
|
|||||||
fallbackUsed: false,
|
fallbackUsed: false,
|
||||||
};
|
};
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
// ดึง axios response body detail ออกมาด้วย (เช่น ไม่พบไฟล์: /mnt/uploads/...)
|
// ดึง HTTP status + body detail จาก axios error
|
||||||
|
const axiosStatus =
|
||||||
|
error !== null &&
|
||||||
|
typeof error === 'object' &&
|
||||||
|
'response' in error &&
|
||||||
|
error.response !== null &&
|
||||||
|
typeof error.response === 'object' &&
|
||||||
|
'status' in error.response
|
||||||
|
? String((error.response as { status: unknown }).status)
|
||||||
|
: 'N/A';
|
||||||
const axiosDetail =
|
const axiosDetail =
|
||||||
error !== null &&
|
error !== null &&
|
||||||
typeof error === 'object' &&
|
typeof error === 'object' &&
|
||||||
@@ -119,11 +155,12 @@ export class SandboxOcrEngineService {
|
|||||||
: null;
|
: null;
|
||||||
const cause = error instanceof Error ? error.message : String(error);
|
const cause = error instanceof Error ? error.message : String(error);
|
||||||
const fullCause = axiosDetail
|
const fullCause = axiosDetail
|
||||||
? `${cause} — sidecar detail: ${axiosDetail}`
|
? `HTTP ${axiosStatus} — ${cause} — sidecar detail: ${axiosDetail}`
|
||||||
: cause;
|
: `HTTP ${axiosStatus} — ${cause}`;
|
||||||
this.logger.warn(
|
this.logger.error(
|
||||||
`Typhoon OCR failed in sandbox, falling back to Tesseract: ${fullCause}`
|
`[DIAG] Typhoon OCR FAILED — engine="${engineType}" url="${this.ocrApiUrl}/ocr-upload" error: ${fullCause}`
|
||||||
);
|
);
|
||||||
|
this.logger.warn(`Falling back to Tesseract due to: ${fullCause}`);
|
||||||
|
|
||||||
const fallbackResult = await this.ocrService.detectAndExtract({
|
const fallbackResult = await this.ocrService.detectAndExtract({
|
||||||
pdfPath,
|
pdfPath,
|
||||||
|
|||||||
Reference in New Issue
Block a user