690421:1628 Update RAG Module #01
This commit is contained in:
@@ -13,7 +13,7 @@ import { UserModule } from '../../modules/user/user.module';
|
|||||||
TypeOrmModule.forFeature([Attachment]),
|
TypeOrmModule.forFeature([Attachment]),
|
||||||
ScheduleModule.forRoot(), // ✅ เปิดใช้งาน Cron Job],
|
ScheduleModule.forRoot(), // ✅ เปิดใช้งาน Cron Job],
|
||||||
UserModule,
|
UserModule,
|
||||||
BullModule.registerQueue({ name: 'rag:ocr' }),
|
BullModule.registerQueue({ name: 'rag-ocr' }),
|
||||||
],
|
],
|
||||||
controllers: [FileStorageController],
|
controllers: [FileStorageController],
|
||||||
providers: [
|
providers: [
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ export class FileStorageService {
|
|||||||
@InjectRepository(Attachment)
|
@InjectRepository(Attachment)
|
||||||
private attachmentRepository: Repository<Attachment>,
|
private attachmentRepository: Repository<Attachment>,
|
||||||
private configService: ConfigService,
|
private configService: ConfigService,
|
||||||
@Optional() @InjectQueue('rag:ocr') private readonly ragOcrQueue?: Queue
|
@Optional() @InjectQueue('rag-ocr') private readonly ragOcrQueue?: Queue
|
||||||
) {
|
) {
|
||||||
// ใช้ env vars จาก docker-compose สำหรับ Production
|
// ใช้ env vars จาก docker-compose สำหรับ Production
|
||||||
// ถ้าไม่ได้กำหนดจะ fallback เป็น ./uploads/temp และ ./uploads/permanent
|
// ถ้าไม่ได้กำหนดจะ fallback เป็น ./uploads/temp และ ./uploads/permanent
|
||||||
@@ -180,7 +180,7 @@ export class FileStorageService {
|
|||||||
)
|
)
|
||||||
.catch((err: unknown) => {
|
.catch((err: unknown) => {
|
||||||
this.logger.error(
|
this.logger.error(
|
||||||
`Failed to enqueue rag:ocr for ${saved.publicId}`,
|
`Failed to enqueue rag-ocr for ${saved.publicId}`,
|
||||||
err instanceof Error ? err.stack : String(err)
|
err instanceof Error ? err.stack : String(err)
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import { Test, TestingModule } from '@nestjs/testing';
|
|||||||
|
|
||||||
import { IngestionService } from '../ingestion.service';
|
import { IngestionService } from '../ingestion.service';
|
||||||
|
|
||||||
const QUEUE_TOKEN = 'BullQueue_rag:ocr';
|
const QUEUE_TOKEN = 'BullQueue_rag-ocr';
|
||||||
|
|
||||||
const mockOcrQueue = {
|
const mockOcrQueue = {
|
||||||
getJob: jest.fn(),
|
getJob: jest.fn(),
|
||||||
@@ -35,7 +35,7 @@ describe('IngestionService', () => {
|
|||||||
jest.clearAllMocks();
|
jest.clearAllMocks();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should enqueue rag:ocr job with attachmentPublicId as jobId', async () => {
|
it('should enqueue rag-ocr job with attachmentPublicId as jobId', async () => {
|
||||||
mockOcrQueue.getJob.mockResolvedValue(null);
|
mockOcrQueue.getJob.mockResolvedValue(null);
|
||||||
mockOcrQueue.add.mockResolvedValue({ id: baseJobData.attachmentPublicId });
|
mockOcrQueue.add.mockResolvedValue({ id: baseJobData.attachmentPublicId });
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import { OcrJobData } from './processors/ocr.processor';
|
|||||||
export class IngestionService {
|
export class IngestionService {
|
||||||
private readonly logger = new Logger(IngestionService.name);
|
private readonly logger = new Logger(IngestionService.name);
|
||||||
|
|
||||||
constructor(@InjectQueue('rag:ocr') private readonly ocrQueue: Queue) {}
|
constructor(@InjectQueue('rag-ocr') private readonly ocrQueue: Queue) {}
|
||||||
|
|
||||||
async enqueue(data: OcrJobData): Promise<void> {
|
async enqueue(data: OcrJobData): Promise<void> {
|
||||||
const jobId = data.attachmentPublicId;
|
const jobId = data.attachmentPublicId;
|
||||||
@@ -18,13 +18,13 @@ export class IngestionService {
|
|||||||
const state = await existing.getState();
|
const state = await existing.getState();
|
||||||
if (state === 'active' || state === 'waiting' || state === 'delayed') {
|
if (state === 'active' || state === 'waiting' || state === 'delayed') {
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`rag:ocr job already queued for ${jobId} (state: ${state})`
|
`rag-ocr job already queued for ${jobId} (state: ${state})`
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.ocrQueue.add('ocr', data, { jobId });
|
await this.ocrQueue.add('ocr', data, { jobId });
|
||||||
this.logger.log(`Enqueued rag:ocr for attachment ${jobId}`);
|
this.logger.log(`Enqueued rag-ocr for attachment ${jobId}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import { EmbeddingJobData } from './thai-preprocess.processor';
|
|||||||
const CHUNK_SIZE = 512;
|
const CHUNK_SIZE = 512;
|
||||||
const CHUNK_OVERLAP = 50;
|
const CHUNK_OVERLAP = 50;
|
||||||
|
|
||||||
@Processor('rag:embedding')
|
@Processor('rag-embedding')
|
||||||
export class EmbeddingProcessor extends WorkerHost {
|
export class EmbeddingProcessor extends WorkerHost {
|
||||||
private readonly logger = new Logger(EmbeddingProcessor.name);
|
private readonly logger = new Logger(EmbeddingProcessor.name);
|
||||||
|
|
||||||
|
|||||||
@@ -20,12 +20,12 @@ export interface OcrJobData {
|
|||||||
classification: 'PUBLIC' | 'INTERNAL' | 'CONFIDENTIAL';
|
classification: 'PUBLIC' | 'INTERNAL' | 'CONFIDENTIAL';
|
||||||
}
|
}
|
||||||
|
|
||||||
@Processor('rag:ocr')
|
@Processor('rag-ocr')
|
||||||
export class OcrProcessor extends WorkerHost {
|
export class OcrProcessor extends WorkerHost {
|
||||||
private readonly logger = new Logger(OcrProcessor.name);
|
private readonly logger = new Logger(OcrProcessor.name);
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
@InjectQueue('rag:thai-preprocess') private readonly thaiQueue: Queue,
|
@InjectQueue('rag-thai-preprocess') private readonly thaiQueue: Queue,
|
||||||
@InjectRepository(DocumentChunk)
|
@InjectRepository(DocumentChunk)
|
||||||
private readonly chunkRepo: Repository<DocumentChunk>
|
private readonly chunkRepo: Repository<DocumentChunk>
|
||||||
) {
|
) {
|
||||||
@@ -40,7 +40,7 @@ export class OcrProcessor extends WorkerHost {
|
|||||||
});
|
});
|
||||||
if (existing > 0) {
|
if (existing > 0) {
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`rag:ocr job already indexed for ${attachmentPublicId}, skipping`
|
`rag-ocr job already indexed for ${attachmentPublicId}, skipping`
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,14 +14,14 @@ export interface EmbeddingJobData extends ThaiPreprocessJobData {
|
|||||||
normalizedText: string;
|
normalizedText: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Processor('rag:thai-preprocess')
|
@Processor('rag-thai-preprocess')
|
||||||
export class ThaiPreprocessProcessor extends WorkerHost {
|
export class ThaiPreprocessProcessor extends WorkerHost {
|
||||||
private readonly logger = new Logger(ThaiPreprocessProcessor.name);
|
private readonly logger = new Logger(ThaiPreprocessProcessor.name);
|
||||||
private readonly thaiUrl: string;
|
private readonly thaiUrl: string;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly configService: ConfigService,
|
private readonly configService: ConfigService,
|
||||||
@InjectQueue('rag:embedding') private readonly embeddingQueue: Queue
|
@InjectQueue('rag-embedding') private readonly embeddingQueue: Queue
|
||||||
) {
|
) {
|
||||||
super();
|
super();
|
||||||
this.thaiUrl = this.configService.get<string>(
|
this.thaiUrl = this.configService.get<string>(
|
||||||
|
|||||||
@@ -28,9 +28,9 @@ const DLQ_DEFAULTS = {
|
|||||||
UserModule,
|
UserModule,
|
||||||
TypeOrmModule.forFeature([DocumentChunk]),
|
TypeOrmModule.forFeature([DocumentChunk]),
|
||||||
BullModule.registerQueue(
|
BullModule.registerQueue(
|
||||||
{ name: 'rag:ocr', defaultJobOptions: DLQ_DEFAULTS },
|
{ name: 'rag-ocr', defaultJobOptions: DLQ_DEFAULTS },
|
||||||
{ name: 'rag:thai-preprocess', defaultJobOptions: DLQ_DEFAULTS },
|
{ name: 'rag-thai-preprocess', defaultJobOptions: DLQ_DEFAULTS },
|
||||||
{ name: 'rag:embedding', defaultJobOptions: DLQ_DEFAULTS }
|
{ name: 'rag-embedding', defaultJobOptions: DLQ_DEFAULTS }
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
controllers: [RagController],
|
controllers: [RagController],
|
||||||
|
|||||||
Reference in New Issue
Block a user