260306:1709 20260306:1707 fix n8n workflow - file name, timeout, parse node
All checks were successful
Build and Deploy / deploy (push) Successful in 47s

This commit is contained in:
admin
2026-03-06 17:09:03 +07:00
parent 040629aa07
commit 65a758298a

View File

@@ -166,7 +166,7 @@
},
{
"parameters": {
"jsCode": "const cpJson = $input.first()?.json || {};\nconst startIndex = cpJson.last_processed_index || 0;\nconst config = $('Set Configuration').first().json.config;\n\nconst allItems = $('Read Excel').all().map(i => i.json);\nconst remaining = allItems.slice(startIndex);\nconst currentBatch = remaining.slice(0, config.BATCH_SIZE);\n\n// Encoding Normalization\nconst normalize = (str) => {\n if (!str) return '';\n return String(str).normalize('NFC').trim();\n};\n\nreturn currentBatch.map((item, i) => {\n const docNum = item.document_number || item['Document Number'] || item['Corr. No.'];\n // Read file_name from Excel if available, otherwise generate from document_number\n const excelFileName = item.file_name || item['File Name'] || item.filename || item['FileName'] || item.pdf_file || item['PDF File'];\n const fileName = excelFileName ? normalize(excelFileName) : `${normalize(docNum)}.pdf`;\n return {\n json: {\n document_number: normalize(docNum),\n title: normalize(item.title || item.Title || item['Subject']),\n legacy_number: normalize(item.legacy_number || item['Legacy Number'] || item['Response Doc.'] || ''),\n excel_revision: item.revision || item.Revision || item.rev || 1,\n original_index: startIndex + i,\n batch_id: config.BATCH_ID,\n file_name: fileName\n }\n };\n});"
"jsCode": "const cpJson = $input.first()?.json || {};\nconst startIndex = cpJson.last_processed_index || 0;\nconst config = $('Set Configuration').first().json.config;\n\nconst allItems = $('Read Excel').all().map(i => i.json);\nconst remaining = allItems.slice(startIndex);\nconst currentBatch = remaining.slice(0, config.BATCH_SIZE);\n\n// Encoding Normalization\nconst normalize = (str) => {\n if (!str) return '';\n return String(str).normalize('NFC').trim();\n};\n\nreturn currentBatch.map((item, i) => {\n const docNum = item.document_number || item['Document Number'] || item['Corr. No.'];\n // Use File name from Excel directly - must exist\n const excelFileName = item['File name'] || item.file_name || item['File Name'] || item.filename;\n if (!excelFileName) {\n throw new Error(`Missing 'File name' column for row ${i + startIndex + 1}, document: ${docNum}`);\n }\n const fileName = normalize(excelFileName);\n return {\n json: {\n document_number: normalize(docNum),\n title: normalize(item.title || item.Title || item['Subject']),\n legacy_number: normalize(item.legacy_number || item['Legacy Number'] || item['Response Doc.'] || ''),\n excel_revision: item.revision || item.Revision || item.rev || 1,\n original_index: startIndex + i,\n batch_id: config.BATCH_ID,\n file_name: fileName\n }\n };\n});"
},
"id": "49c98c75-456b-4a1d-a203-a5b2bf19fd15",
"name": "Process Batch + Encoding",
@@ -181,7 +181,7 @@
},
{
"parameters": {
"jsCode": "const fs = require('fs');\nconst path = require('path');\nconst config = $('Set Configuration').first().json.config;\n\nconst items = $input.all();\nif (!items || items.length === 0) return [];\n\nconst validated = [];\nconst errors = [];\n\n// Cache directory listing for case-insensitive matching\nlet dirFiles = [];\nlet dirFilesLower = [];\ntry {\n dirFiles = fs.readdirSync(config.SOURCE_PDF_DIR);\n dirFilesLower = dirFiles.map(f => f.toLowerCase());\n} catch (e) {\n return [{ json: { error: `Cannot read directory: ${config.SOURCE_PDF_DIR}`, error_type: 'DIR_ERROR' }}];\n}\n\nfor (const item of items) {\n const fileName = item.json?.file_name;\n if (!fileName) {\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: 'file_name is missing from Excel data', error_type: 'MISSING_FILENAME', file_exists: false }\n });\n continue;\n }\n \n // Sanitize filename from Excel\n const safeName = path.basename(String(fileName).replace(/[^a-zA-Z0-9\\-_.]/g, '_')).normalize('NFC');\n const fileNameLower = safeName.toLowerCase();\n \n // Case-insensitive lookup in directory\n const matchedIndex = dirFilesLower.indexOf(fileNameLower);\n const actualFileName = matchedIndex >= 0 ? dirFiles[matchedIndex] : safeName;\n const filePath = path.resolve(config.SOURCE_PDF_DIR, actualFileName);\n \n // Path traversal check\n if (!filePath.startsWith(path.resolve(config.SOURCE_PDF_DIR))) {\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: 'Path traversal detected in Source PDF path', error_type: 'SECURITY', file_exists: false }\n });\n continue;\n }\n \n try {\n if (matchedIndex >= 0 && fs.existsSync(filePath)) {\n const stats = fs.statSync(filePath);\n validated.push({\n ...item,\n json: { ...item.json, file_valid: true, file_exists: true, file_size: stats.size, file_path: filePath, original_filename: fileName, matched_filename: actualFileName }\n });\n } else {\n // List available files for debugging\n const availableFiles = dirFiles.filter(f => f.toLowerCase().includes(safeName.toLowerCase().slice(0, 5))).slice(0, 10);\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: `File not found: ${safeName} in ${config.SOURCE_PDF_DIR}`, error_type: 'FILE_NOT_FOUND', file_exists: false, similar_files: availableFiles, total_files_in_dir: dirFiles.length }\n });\n }\n } catch (err) {\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: err.message, error_type: 'FILE_ERROR', file_exists: false }\n });\n }\n}\n\nreturn [...validated, ...errors];"
"jsCode": "const fs = require('fs');\nconst path = require('path');\nconst config = $('Set Configuration').first().json.config;\n\nconst items = $input.all();\nif (!items || items.length === 0) return [];\n\nconst validated = [];\nconst errors = [];\n\nfor (const item of items) {\n const fileName = item.json?.file_name;\n if (!fileName) {\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: 'file_name is missing', error_type: 'MISSING_FILENAME', file_exists: false }\n });\n continue;\n }\n \n // Use file name from Excel directly, add .pdf if missing\n let safeName = path.basename(String(fileName)).normalize('NFC');\n if (!safeName.toLowerCase().endsWith('.pdf')) {\n safeName += '.pdf';\n }\n const filePath = path.resolve(config.SOURCE_PDF_DIR, safeName);\n \n // Path traversal check\n if (!filePath.startsWith(path.resolve(config.SOURCE_PDF_DIR))) {\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: 'Path traversal detected', error_type: 'SECURITY', file_exists: false }\n });\n continue;\n }\n \n try {\n if (fs.existsSync(filePath)) {\n const stats = fs.statSync(filePath);\n validated.push({\n ...item,\n json: { ...item.json, file_valid: true, file_exists: true, file_size: stats.size, file_path: filePath }\n });\n } else {\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: `File not found: ${safeName}`, error_type: 'FILE_NOT_FOUND', file_exists: false }\n });\n }\n } catch (err) {\n errors.push({\n ...item,\n json: { ...item.json, file_valid: false, error: err.message, error_type: 'FILE_ERROR', file_exists: false }\n });\n }\n}\n\nreturn [...validated, ...errors];"
},
"id": "51e91c88-98cd-4df4-81ac-e452b25e5c06",
"name": "File Validator",
@@ -239,7 +239,7 @@
"specifyBody": "json",
"jsonBody": "={{ $json.ollama_payload }}",
"options": {
"timeout": 30000
"timeout": 120000
}
},
"id": "ae9b6be5-284c-44db-b7f0-b4839a59230e",
@@ -254,7 +254,7 @@
},
{
"parameters": {
"jsCode": "const items = $input.all();\nconst parsed = [];\nconst parseErrors = [];\n\nfor (const item of items) {\n try {\n let raw = item.json.response || '';\n \n // Clean markdown\n raw = raw.replace(/```json/gi, '').replace(/```/g, '').trim();\n const result = JSON.parse(raw);\n \n // Schema Validation\n if (typeof result.is_valid !== 'boolean') throw new Error('is_valid must be boolean');\n if (typeof result.confidence !== 'number' || result.confidence < 0 || result.confidence > 1) {\n throw new Error('confidence must be float 0.0-1.0');\n }\n if (!Array.isArray(result.detected_issues)) throw new Error('detected_issues must be array');\n \n // Tag Validation - ensure suggested_tags is an array\n if (!Array.isArray(result.suggested_tags)) {\n result.suggested_tags = [];\n }\n // Normalize tags: trim, lowercase, remove duplicates\n result.suggested_tags = [...new Set(result.suggested_tags.map(t => String(t).trim()).filter(t => t.length > 0))];\n \n // Tag confidence validation\n if (typeof result.tag_confidence !== 'number' || result.tag_confidence < 0 || result.tag_confidence > 1) {\n result.tag_confidence = 0.5; // default if missing or invalid\n }\n \n // Enum Validation\n const systemCategories = item.json.system_categories || [];\n if (!systemCategories.includes(result.suggested_category)) {\n throw new Error(`Category \"${result.suggested_category}\" not in system enum`);\n }\n \n parsed.push({\n ...item,\n json: { ...item.json, ai_result: result, parse_error: null }\n });\n } catch (err) {\n parseErrors.push({\n ...item,\n json: {\n ...item.json,\n ai_result: null,\n parse_error: err.message,\n raw_ai_response: item.json.response,\n error_type: 'AI_PARSE_ERROR'\n }\n });\n }\n}\n\nreturn [parsed, parseErrors];"
"jsCode": "const items = $input.all();\nconst parsed = [];\nconst parseErrors = [];\n\nfor (const item of items) {\n try {\n let raw = item.json.response || '';\n \n // Clean markdown\n raw = raw.replace(/```json/gi, '').replace(/```/g, '').trim();\n const result = JSON.parse(raw);\n \n // Schema Validation\n if (typeof result.is_valid !== 'boolean') throw new Error('is_valid must be boolean');\n if (typeof result.confidence !== 'number' || result.confidence < 0 || result.confidence > 1) {\n throw new Error('confidence must be float 0.0-1.0');\n }\n if (!Array.isArray(result.detected_issues)) throw new Error('detected_issues must be array');\n \n // Tag Validation - ensure suggested_tags is an array\n if (!Array.isArray(result.suggested_tags)) {\n result.suggested_tags = [];\n }\n // Normalize tags: trim, lowercase, remove duplicates\n result.suggested_tags = [...new Set(result.suggested_tags.map(t => String(t).trim()).filter(t => t.length > 0))];\n \n // Tag confidence validation\n if (typeof result.tag_confidence !== 'number' || result.tag_confidence < 0 || result.tag_confidence > 1) {\n result.tag_confidence = 0.5; // default if missing or invalid\n }\n \n // Enum Validation\n const systemCategories = item.json.system_categories || [];\n if (!systemCategories.includes(result.suggested_category)) {\n throw new Error(`Category \"${result.suggested_category}\" not in system enum`);\n }\n \n parsed.push({\n ...item,\n json: { ...item.json, ai_result: result, parse_error: null }\n });\n } catch (err) {\n parseErrors.push({\n ...item,\n json: {\n ...item.json,\n ai_result: null,\n parse_error: err.message,\n raw_ai_response: item.json.response,\n error_type: 'AI_PARSE_ERROR'\n }\n });\n }\n}\n\nreturn [parsed.length > 0 ? parsed : [{ json: { _placeholder: true } }], parseErrors.length > 0 ? parseErrors : [{ json: { _placeholder: true } }]];"
},
"id": "281dc950-a3b6-4412-a0b4-76663b8c37ea",
"name": "Parse & Validate AI Response",