Files
admin 11984bfa29
CI Pipeline / build (push) Failing after 12m41s
Build and Deploy / deploy (push) Failing after 2m44s
260322:1648 Correct Coresspondence / Doing RFA / Correct CI
2026-03-22 16:48:12 +07:00

2380 lines
191 KiB
JSON

{
"data": {
"edges": [
{
"animated": false,
"className": "",
"data": {
"sourceHandle": {
"dataType": "File",
"id": "File-5V2fL",
"name": "dataframe",
"output_types": ["DataFrame"]
},
"targetHandle": {
"fieldName": "data",
"id": "LoopComponent-5vFOr",
"inputTypes": ["DataFrame"],
"type": "other"
}
},
"id": "xy-edge__File-5V2fL{œdataTypeœ:œFileœ,œidœ:œFile-5V2fLœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-LoopComponent-5vFOr{œfieldNameœ:œdataœ,œidœ:œLoopComponent-5vFOrœ,œinputTypesœ:[œDataFrameœ],œtypeœ:œotherœ}",
"selected": false,
"source": "File-5V2fL",
"sourceHandle": "{œdataTypeœ:œFileœ,œidœ:œFile-5V2fLœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}",
"target": "LoopComponent-5vFOr",
"targetHandle": "{œfieldNameœ:œdataœ,œidœ:œLoopComponent-5vFOrœ,œinputTypesœ:[œDataFrameœ],œtypeœ:œotherœ}"
},
{
"animated": false,
"className": "",
"data": {
"sourceHandle": {
"dataType": "Prompt Template",
"id": "Prompt Template-dKwcS",
"name": "prompt",
"output_types": ["Message"]
},
"targetHandle": {
"fieldName": "input_value",
"id": "OllamaModel-xJSnu",
"inputTypes": ["Message"],
"type": "str"
}
},
"id": "xy-edge__Prompt Template-dKwcS{œdataTypeœ:œPrompt Templateœ,œidœ:œPrompt Template-dKwcSœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OllamaModel-xJSnu{œfieldNameœ:œsystem_messageœ,œidœ:œOllamaModel-xJSnuœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
"selected": false,
"source": "Prompt Template-dKwcS",
"sourceHandle": "{œdataTypeœ:œPrompt Templateœ,œidœ:œPrompt Template-dKwcSœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}",
"target": "OllamaModel-xJSnu",
"targetHandle": "{œfieldNameœ:œsystem_messageœ,œidœ:œOllamaModel-xJSnuœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "LoopComponent",
"id": "LoopComponent-5vFOr",
"name": "item",
"output_types": ["Data"]
},
"targetHandle": {
"fieldName": "input_data",
"id": "ParserComponent-Xspgr",
"inputTypes": ["DataFrame", "Data"],
"type": "other"
}
},
"id": "xy-edge__LoopComponent-5vFOr{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ]}-ParserComponent-Xspgr{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-Xspgrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
"selected": false,
"source": "LoopComponent-5vFOr",
"sourceHandle": "{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ]}",
"target": "ParserComponent-Xspgr",
"targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-Xspgrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "ParserComponent",
"id": "ParserComponent-Xspgr",
"name": "parsed_text",
"output_types": ["Message"]
},
"targetHandle": {
"fieldName": "extracted_text",
"id": "Prompt Template-dKwcS",
"inputTypes": ["Message"],
"type": "str"
}
},
"id": "xy-edge__ParserComponent-Xspgr{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-Xspgrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt Template-dKwcS{œfieldNameœ:œextracted_textœ,œidœ:œPrompt Template-dKwcSœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
"selected": false,
"source": "ParserComponent-Xspgr",
"sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-Xspgrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}",
"target": "Prompt Template-dKwcS",
"targetHandle": "{œfieldNameœ:œextracted_textœ,œidœ:œPrompt Template-dKwcSœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "OllamaModel",
"id": "OllamaModel-xJSnu",
"name": "text_output",
"output_types": ["Message"]
},
"targetHandle": {
"fieldName": "json_content",
"id": "CustomComponent-WriteJsonIdempotent",
"inputTypes": ["Data", "DataFrame", "Message"],
"type": "other"
}
},
"id": "xy-edge__OllamaModel-xJSnu{œdataTypeœ:œOllamaModelœ,œidœ:œOllamaModel-xJSnuœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-CustomComponent-WriteJsonIdempotent{œfieldNameœ:œinputœ,œidœ:œCustomComponent-WriteJsonIdempotentœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
"selected": false,
"source": "OllamaModel-xJSnu",
"sourceHandle": "{œdataTypeœ:œOllamaModelœ,œidœ:œOllamaModel-xJSnuœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}",
"target": "CustomComponent-WriteJsonIdempotent",
"targetHandle": "{œfieldNameœ:œinputœ,œidœ:œSaveToFile-M0RUYœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "CustomComponent",
"id": "CustomComponent-WriteJsonIdempotent",
"name": "result_path",
"output_types": ["Message"]
},
"targetHandle": {
"dataType": "LoopComponent",
"id": "LoopComponent-5vFOr",
"name": "item",
"output_types": ["Data", "Message"]
}
},
"id": "xy-edge__CustomComponent-WriteJsonIdempotent{œdataTypeœ:œSaveToFileœ,œidœ:œCustomComponent-WriteJsonIdempotentœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-LoopComponent-5vFOr{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ,œMessageœ]}",
"selected": false,
"source": "CustomComponent-WriteJsonIdempotent",
"sourceHandle": "{œdataTypeœ:œSaveToFileœ,œidœ:œSaveToFile-M0RUYœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}",
"target": "LoopComponent-5vFOr",
"targetHandle": "{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ,œMessageœ]}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "LoopComponent",
"id": "LoopComponent-5vFOr",
"name": "item",
"output_types": ["Data"]
},
"targetHandle": {
"fieldName": "loop_item",
"id": "CustomComponent-WriteJsonIdempotent",
"inputTypes": ["Data"],
"type": "Data"
}
},
"id": "xy-edge__LoopComponent-5vFOr-item-CustomComponent-WriteJsonIdempotent-loop_item",
"source": "LoopComponent-5vFOr",
"target": "CustomComponent-WriteJsonIdempotent"
}
],
"nodes": [
{
"data": {
"id": "File-5V2fL",
"node": {
"base_classes": ["Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Loads and returns the content from uploaded files.",
"display_name": "Read File",
"documentation": "https://docs.langflow.org/read-file",
"edited": false,
"field_order": [
"storage_location",
"path",
"file_path",
"separator",
"silent_errors",
"delete_server_file_after_processing",
"ignore_unsupported_extensions",
"ignore_unspecified_files",
"file_path_str",
"aws_access_key_id",
"aws_secret_access_key",
"bucket_name",
"aws_region",
"s3_file_key",
"service_account_key",
"file_id",
"advanced_mode",
"pipeline",
"ocr_engine",
"md_image_placeholder",
"md_page_break_placeholder",
"doc_key",
"use_multithreading",
"concurrency_multithreading",
"markdown"
],
"frozen": false,
"icon": "file-text",
"last_updated": "2026-03-13T07:48:58.791Z",
"legacy": false,
"lf_version": "1.8.0",
"metadata": {
"code_hash": "12a5841f1a03",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
},
{
"name": "langchain_core",
"version": "0.3.83"
},
{
"name": "pydantic",
"version": "2.11.10"
},
{
"name": "googleapiclient",
"version": "2.154.0"
}
],
"total_dependencies": 4
},
"module": "lfx.components.files_and_knowledge.file.FileComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Files",
"group_outputs": false,
"hidden": null,
"loop_types": null,
"method": "load_files",
"name": "dataframe",
"options": null,
"required_inputs": null,
"selected": "DataFrame",
"tool_mode": true,
"types": ["DataFrame"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_frontend_node_flow_id": {
"value": "4a538191-04b4-41cf-98d7-8e62aaccf3a8"
},
"_frontend_node_folder_id": {
"value": "60f723dc-b1f8-4e25-9c31-0a4ee07abd5c"
},
"_type": "Component",
"advanced_mode": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Advanced Parser",
"dynamic": false,
"info": "Enable advanced document processing and export with Docling for PDFs, images, and office documents. Note that advanced document processing can consume significant resources.",
"list": false,
"list_add_label": "Add More",
"name": "advanced_mode",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"aws_access_key_id": {
"_input_type": "SecretStrInput",
"advanced": false,
"display_name": "AWS Access Key ID",
"dynamic": false,
"info": "AWS Access key ID.",
"input_types": [],
"load_from_db": false,
"name": "aws_access_key_id",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"aws_region": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "AWS Region",
"dynamic": false,
"info": "AWS region (e.g., us-east-1, eu-west-1).",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "aws_region",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"aws_secret_access_key": {
"_input_type": "SecretStrInput",
"advanced": false,
"display_name": "AWS Secret Key",
"dynamic": false,
"info": "AWS Secret Key.",
"input_types": [],
"load_from_db": false,
"name": "aws_secret_access_key",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"bucket_name": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "S3 Bucket Name",
"dynamic": false,
"info": "Enter the name of the S3 bucket.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "bucket_name",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, read_file_bytes, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs import SortableListInput\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output, SecretStrInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\nfrom lfx.utils.validate_cloud import is_astra_cloud_environment\n\n\ndef _get_storage_location_options():\n \"\"\"Get storage location options, filtering out Local if in Astra cloud environment.\"\"\"\n all_options = [{\"name\": \"AWS\", \"icon\": \"Amazon\"}, {\"name\": \"Google Drive\", \"icon\": \"google\"}]\n if is_astra_cloud_environment():\n return all_options\n return [{\"name\": \"Local\", \"icon\": \"hard-drive\"}, *all_options]\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n display_name = \"Read File\"\n # description is now a dynamic property - see get_tool_description()\n _base_description = \"Loads content from one or more files.\"\n documentation: str = \"https://docs.langflow.org/read-file\"\n icon = \"file-text\"\n name = \"File\"\n add_tool_output = True # Enable tool mode toggle without requiring tool_mode inputs\n\n # Extensions that can be processed without Docling (using standard text parsing)\n TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n # Extensions that require Docling for processing (images, advanced office formats, etc.)\n DOCLING_ONLY_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"jpg\",\n \"jpeg\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"webp\",\n ]\n\n # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n VALID_EXTENSIONS = [\n *TEXT_EXTENSIONS,\n *DOCLING_ONLY_EXTENSIONS,\n ]\n\n # Fixed export settings used when markdown export is requested.\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n input_item.tool_mode = False # Disable tool mode for file upload input\n input_item.required = False # Make it optional so it doesn't error in tool mode\n break\n\n inputs = [\n SortableListInput(\n name=\"storage_location\",\n display_name=\"Storage Location\",\n placeholder=\"Select Location\",\n info=\"Choose where to read the file from.\",\n options=_get_storage_location_options(),\n real_time_refresh=True,\n limit=1,\n value=[{\"name\": \"Local\", \"icon\": \"hard-drive\"}],\n advanced=True,\n ),\n *_base_inputs,\n StrInput(\n name=\"file_path_str\",\n display_name=\"File Path\",\n info=(\n \"Path to the file to read. Used when component is called as a tool. \"\n \"If not provided, will use the uploaded file from 'path' input.\"\n ),\n show=False,\n advanced=True,\n tool_mode=True, # Required for Toolset toggle, but _get_tools() ignores this parameter\n required=False,\n ),\n # AWS S3 specific inputs\n SecretStrInput(\n name=\"aws_access_key_id\",\n display_name=\"AWS Access Key ID\",\n info=\"AWS Access key ID.\",\n show=False,\n advanced=False,\n required=True,\n ),\n SecretStrInput(\n name=\"aws_secret_access_key\",\n display_name=\"AWS Secret Key\",\n info=\"AWS Secret Key.\",\n show=False,\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"bucket_name\",\n display_name=\"S3 Bucket Name\",\n info=\"Enter the name of the S3 bucket.\",\n show=False,\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"aws_region\",\n display_name=\"AWS Region\",\n info=\"AWS region (e.g., us-east-1, eu-west-1).\",\n show=False,\n advanced=False,\n ),\n StrInput(\n name=\"s3_file_key\",\n display_name=\"S3 File Key\",\n info=\"The key (path) of the file in S3 bucket.\",\n show=False,\n advanced=False,\n required=True,\n ),\n # Google Drive specific inputs\n SecretStrInput(\n name=\"service_account_key\",\n display_name=\"GCP Credentials Secret Key\",\n info=\"Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).\",\n show=False,\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"file_id\",\n display_name=\"Google Drive File ID\",\n info=(\"The Google Drive file ID to read. The file must be shared with the service account email.\"),\n show=False,\n advanced=False,\n required=True,\n ),\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Note that advanced document processing can consume significant resources.\"\n ),\n # Disabled in cloud\n show=not is_astra_cloud_environment(),\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"None\", \"easyocr\"],\n value=\"easyocr\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"<!-- image -->\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n # Deprecated input retained for backward-compatibility.\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n ]\n\n # ------------------------------ Tool description with file names --------------\n\n def get_tool_description(self) -> str:\n \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n This helps the Agent understand which files are available to read.\n \"\"\"\n base_description = \"Loads and returns the content from uploaded files.\"\n\n # Get the list of uploaded file paths\n file_paths = getattr(self, \"path\", None)\n if not file_paths:\n return base_description\n\n # Ensure it's a list\n if not isinstance(file_paths, list):\n file_paths = [file_paths]\n\n # Extract just the file names from the paths\n file_names = []\n for fp in file_paths:\n if fp:\n name = Path(fp).name\n file_names.append(name)\n\n if file_names:\n files_str = \", \".join(file_names)\n return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n return base_description\n\n @property\n def description(self) -> str:\n \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n return self.get_tool_description()\n\n async def _get_tools(self) -> list:\n \"\"\"Override to create a tool without parameters.\n\n The Read File component should use the files already uploaded via UI,\n not accept file paths from the Agent (which wouldn't know the internal paths).\n \"\"\"\n from langchain_core.tools import StructuredTool\n from pydantic import BaseModel\n\n # Empty schema - no parameters needed\n class EmptySchema(BaseModel):\n \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n async def read_files_tool() -> str:\n \"\"\"Read the content of uploaded files.\"\"\"\n try:\n result = self.load_files_message()\n if hasattr(result, \"get_text\"):\n return result.get_text()\n if hasattr(result, \"text\"):\n return result.text\n return str(result)\n except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n return f\"Error reading files: {e}\"\n\n description = self.get_tool_description()\n\n tool = StructuredTool(\n name=\"load_files_message\",\n description=description,\n coroutine=read_files_tool,\n args_schema=EmptySchema,\n handle_tool_error=True,\n tags=[\"load_files_message\"],\n metadata={\n \"display_name\": \"Read File\",\n \"display_description\": description,\n },\n )\n\n return [tool]\n\n # ------------------------------ UI helpers --------------------------------------\n\n def _path_value(self, template: dict) -> list[str]:\n \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def _disable_docling_fields_in_cloud(self, build_config: dict[str, Any]) -> None:\n \"\"\"Disable all Docling-related fields in cloud environments.\"\"\"\n if \"advanced_mode\" in build_config:\n build_config[\"advanced_mode\"][\"show\"] = False\n build_config[\"advanced_mode\"][\"value\"] = False\n # Hide all Docling-related fields\n docling_fields = (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\")\n for field in docling_fields:\n if field in build_config:\n build_config[field][\"show\"] = False\n # Also disable OCR engine specifically\n if \"ocr_engine\" in build_config:\n build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n # Update storage location options dynamically based on cloud environment\n if \"storage_location\" in build_config:\n updated_options = _get_storage_location_options()\n build_config[\"storage_location\"][\"options\"] = updated_options\n\n # Handle storage location selection\n if field_name == \"storage_location\":\n # Extract selected storage location\n selected = [location[\"name\"] for location in field_value] if isinstance(field_value, list) else []\n\n # Hide all storage-specific fields first\n storage_fields = [\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_file_key\",\n \"service_account_key\",\n \"file_id\",\n ]\n\n for f_name in storage_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = False\n\n # Show fields based on selected storage location\n if len(selected) == 1:\n location = selected[0]\n\n if location == \"Local\":\n # Show file upload input for local storage\n if \"path\" in build_config:\n build_config[\"path\"][\"show\"] = True\n\n elif location == \"AWS\":\n # Hide file upload input, show AWS fields\n if \"path\" in build_config:\n build_config[\"path\"][\"show\"] = False\n\n aws_fields = [\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_file_key\",\n ]\n for f_name in aws_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n build_config[f_name][\"advanced\"] = False\n\n elif location == \"Google Drive\":\n # Hide file upload input, show Google Drive fields\n if \"path\" in build_config:\n build_config[\"path\"][\"show\"] = False\n\n gdrive_fields = [\"service_account_key\", \"file_id\"]\n for f_name in gdrive_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n build_config[f_name][\"advanced\"] = False\n # No storage location selected - show file upload by default\n elif \"path\" in build_config:\n build_config[\"path\"][\"show\"] = True\n\n return build_config\n\n if field_name == \"path\":\n paths = self._path_value(build_config)\n\n # Disable in cloud environments\n if is_astra_cloud_environment():\n self._disable_docling_fields_in_cloud(build_config)\n else:\n # If all files can be processed by docling, do so\n allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n if not allow_advanced:\n build_config[\"advanced_mode\"][\"value\"] = False\n docling_fields = (\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n )\n for field in docling_fields:\n if field in build_config:\n build_config[field][\"show\"] = False\n\n # Docling Processing\n elif field_name == \"advanced_mode\":\n # Disable in cloud environments - don't show Docling fields even if advanced_mode is toggled\n if is_astra_cloud_environment():\n self._disable_docling_fields_in_cloud(build_config)\n else:\n docling_fields = (\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n )\n for field in docling_fields:\n if field in build_config:\n build_config[field][\"show\"] = bool(field_value)\n if field == \"pipeline\":\n build_config[field][\"advanced\"] = not bool(field_value)\n\n elif field_name == \"pipeline\":\n # Disable in cloud environments - don't show OCR engine even if pipeline is changed\n if is_astra_cloud_environment():\n self._disable_docling_fields_in_cloud(build_config)\n elif field_value == \"standard\":\n build_config[\"ocr_engine\"][\"show\"] = True\n build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n else:\n build_config[\"ocr_engine\"][\"show\"] = False\n build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n return frontend_node\n\n template = frontend_node.get(\"template\", {})\n paths = self._path_value(template)\n if not paths:\n return frontend_node\n\n frontend_node[\"outputs\"] = []\n if len(paths) == 1:\n file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Structured Content\",\n name=\"dataframe\",\n method=\"load_files_structured\",\n tool_mode=True,\n ),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n )\n\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n if advanced_mode:\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Structured Output\",\n name=\"advanced_dataframe\",\n method=\"load_files_dataframe\",\n tool_mode=True,\n ),\n )\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n ),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n )\n else:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n )\n else:\n # Multiple files => DataFrame output; advanced parser disabled\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n )\n\n return frontend_node\n\n # ------------------------------ Core processing ----------------------------------\n\n def _get_selected_storage_location(self) -> str:\n \"\"\"Get the selected storage location from the SortableListInput.\"\"\"\n if hasattr(self, \"storage_location\") and self.storage_location:\n if isinstance(self.storage_location, list) and len(self.storage_location) > 0:\n return self.storage_location[0].get(\"name\", \"\")\n if isinstance(self.storage_location, dict):\n return self.storage_location.get(\"name\", \"\")\n return \"Local\" # Default to Local if not specified\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Override to handle file_path_str input from tool mode and cloud storage.\n\n Priority:\n 1. Cloud storage (AWS/Google Drive) if selected\n 2. file_path_str (if provided by the tool call)\n 3. path (uploaded file from UI)\n \"\"\"\n storage_location = self._get_selected_storage_location()\n\n # Handle AWS S3\n if storage_location == \"AWS\":\n return self._read_from_aws_s3()\n\n # Handle Google Drive\n if storage_location == \"Google Drive\":\n return self._read_from_google_drive()\n\n # Handle Local storage\n # Check if file_path_str is provided (from tool mode)\n file_path_str = getattr(self, \"file_path_str\", None)\n if file_path_str:\n # Use the string path from tool mode\n from pathlib import Path\n\n from lfx.schema.data import Data\n\n # Use same resolution logic as BaseFileComponent (support storage paths)\n path_str = str(file_path_str)\n if parse_storage_path(path_str):\n try:\n resolved_path = Path(self.get_full_path(path_str))\n except (ValueError, AttributeError):\n resolved_path = Path(self.resolve_path(path_str))\n else:\n resolved_path = Path(self.resolve_path(path_str))\n\n if not resolved_path.exists():\n msg = f\"File or directory not found: {file_path_str}\"\n self.log(msg)\n if not self.silent_errors:\n raise ValueError(msg)\n return []\n\n data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n # Otherwise use the default implementation (uses path FileInput)\n return super()._validate_and_resolve_paths()\n\n def _read_from_aws_s3(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Read file from AWS S3.\"\"\"\n from lfx.base.data.cloud_storage_utils import create_s3_client, validate_aws_credentials\n\n # Validate AWS credentials\n validate_aws_credentials(self)\n if not getattr(self, \"s3_file_key\", None):\n msg = \"S3 File Key is required\"\n raise ValueError(msg)\n\n # Create S3 client\n s3_client = create_s3_client(self)\n\n # Download file to temp location\n import tempfile\n\n # Get file extension from S3 key\n file_extension = Path(self.s3_file_key).suffix or \"\"\n\n with tempfile.NamedTemporaryFile(mode=\"wb\", suffix=file_extension, delete=False) as temp_file:\n temp_file_path = temp_file.name\n try:\n s3_client.download_fileobj(self.bucket_name, self.s3_file_key, temp_file)\n except Exception as e:\n # Clean up temp file on failure\n with contextlib.suppress(OSError):\n Path(temp_file_path).unlink()\n msg = f\"Failed to download file from S3: {e}\"\n raise RuntimeError(msg) from e\n\n # Create BaseFile object\n from lfx.schema.data import Data\n\n temp_path = Path(temp_file_path)\n data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(temp_path)})\n return [BaseFileComponent.BaseFile(data_obj, temp_path, delete_after_processing=True)]\n\n def _read_from_google_drive(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Read file from Google Drive.\"\"\"\n import tempfile\n\n from googleapiclient.http import MediaIoBaseDownload\n\n from lfx.base.data.cloud_storage_utils import create_google_drive_service\n\n # Validate Google Drive credentials\n if not getattr(self, \"service_account_key\", None):\n msg = \"GCP Credentials Secret Key is required for Google Drive storage\"\n raise ValueError(msg)\n if not getattr(self, \"file_id\", None):\n msg = \"Google Drive File ID is required\"\n raise ValueError(msg)\n\n # Create Google Drive service with read-only scope\n drive_service = create_google_drive_service(\n self.service_account_key, scopes=[\"https://www.googleapis.com/auth/drive.readonly\"]\n )\n\n # Get file metadata to determine file name and extension\n try:\n file_metadata = drive_service.files().get(fileId=self.file_id, fields=\"name,mimeType\").execute()\n file_name = file_metadata.get(\"name\", \"download\")\n except Exception as e:\n msg = (\n f\"Unable to access file with ID '{self.file_id}'. \"\n f\"Error: {e!s}. \"\n \"Please ensure: 1) The file ID is correct, 2) The file exists, \"\n \"3) The service account has been granted access to this file.\"\n )\n raise ValueError(msg) from e\n\n # Download file to temp location\n file_extension = Path(file_name).suffix or \"\"\n with tempfile.NamedTemporaryFile(mode=\"wb\", suffix=file_extension, delete=False) as temp_file:\n temp_file_path = temp_file.name\n try:\n request = drive_service.files().get_media(fileId=self.file_id)\n downloader = MediaIoBaseDownload(temp_file, request)\n done = False\n while not done:\n _status, done = downloader.next_chunk()\n except Exception as e:\n # Clean up temp file on failure\n with contextlib.suppress(OSError):\n Path(temp_file_path).unlink()\n msg = f\"Failed to download file from Google Drive: {e}\"\n raise RuntimeError(msg) from e\n\n # Create BaseFile object\n from lfx.schema.data import Data\n\n temp_path = Path(temp_file_path)\n data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(temp_path)})\n return [BaseFileComponent.BaseFile(data_obj, temp_path, delete_after_processing=True)]\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n docling_exts = (\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpg\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n )\n return file_path.lower().endswith(docling_exts)\n\n async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n Args:\n file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n Returns:\n tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n if this is a temporary file that should be cleaned up\n \"\"\"\n settings = get_settings_service().settings\n if settings.storage_type == \"local\":\n return file_path, False\n\n # S3 storage - download to temp file\n parsed = parse_storage_path(file_path)\n if not parsed:\n msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n raise ValueError(msg)\n\n storage_service = get_storage_service()\n flow_id, filename = parsed\n\n # Get file content from S3\n content = await storage_service.get_file(flow_id, filename)\n\n suffix = Path(filename).suffix\n with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n tmp_file.write(content)\n temp_path = tmp_file.name\n\n return temp_path, True\n\n def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n passing JSON config via stdin. The child prints a JSON result to stdout.\n\n For S3 storage, the file is downloaded to a temp file first.\n \"\"\"\n if not file_path:\n return None\n\n settings = get_settings_service().settings\n if settings.storage_type == \"s3\":\n local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n else:\n local_path = file_path\n should_delete = False\n\n try:\n return self._process_docling_subprocess_impl(local_path, file_path)\n finally:\n # Clean up temp file if we created one\n if should_delete:\n with contextlib.suppress(Exception):\n Path(local_path).unlink() # Ignore cleanup errors\n\n def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n \"\"\"Implementation of Docling subprocess processing.\n\n Args:\n local_file_path: Path to local file to process\n original_file_path: Original file path to include in metadata\n Returns:\n Data object with processed content\n \"\"\"\n args: dict[str, Any] = {\n \"file_path\": local_file_path,\n \"markdown\": bool(self.markdown),\n \"image_mode\": str(self.IMAGE_MODE),\n \"md_image_placeholder\": str(self.md_image_placeholder),\n \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n \"pipeline\": str(self.pipeline),\n \"ocr_engine\": (\n self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n ),\n }\n\n # Child script for isolating the docling processing\n child_script = textwrap.dedent(\n r\"\"\"\n import json, sys\n\n def try_imports():\n try:\n from docling.datamodel.base_models import ConversionStatus, InputFormat # type: ignore\n from docling.document_converter import DocumentConverter # type: ignore\n from docling_core.types.doc import ImageRefMode # type: ignore\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n except Exception as e:\n raise e\n\n def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n if pipeline == \"standard\":\n try:\n from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore\n from docling.document_converter import PdfFormatOption # type: ignore\n\n pipe = PdfPipelineOptions()\n pipe.do_ocr = False\n\n if ocr_engine:\n try:\n from docling.models.factories import get_ocr_factory # type: ignore\n pipe.do_ocr = True\n fac = get_ocr_factory(allow_external_plugins=False)\n pipe.ocr_options = fac.create_options(kind=ocr_engine)\n except Exception:\n # If OCR setup fails, disable it\n pipe.do_ocr = False\n\n fmt = {}\n if hasattr(input_format, \"PDF\"):\n fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n if hasattr(input_format, \"IMAGE\"):\n fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n return DocumentConverter(format_options=fmt)\n except Exception:\n return DocumentConverter()\n\n # --- Vision-Language Model (VLM) pipeline ---\n if pipeline == \"vlm\":\n try:\n from docling.datamodel.pipeline_options import VlmPipelineOptions\n from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n from docling.document_converter import PdfFormatOption\n from docling.pipeline.vlm_pipeline import VlmPipeline\n\n vl_pipe = VlmPipelineOptions(\n vlm_options=GRANITEDOCLING_TRANSFORMERS,\n )\n\n if sys.platform == \"darwin\":\n try:\n import mlx_vlm\n vl_pipe.vlm_options = GRANITEDOCLING_MLX\n except ImportError as e:\n raise e\n\n # VLM paths generally don't need OCR; keep OCR off by default here.\n fmt = {}\n if hasattr(input_format, \"PDF\"):\n fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n pipeline_cls=VlmPipeline,\n pipeline_options=vl_pipe\n )\n if hasattr(input_format, \"IMAGE\"):\n fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n pipeline_cls=VlmPipeline,\n pipeline_options=vl_pipe\n )\n\n return DocumentConverter(format_options=fmt)\n except Exception as e:\n raise e\n\n # --- Fallback: default converter with no special options ---\n return DocumentConverter()\n\n def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n try:\n mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n return document.export_to_markdown(\n image_mode=mode,\n image_placeholder=img_ph,\n page_break_placeholder=pg_ph,\n )\n except Exception:\n try:\n return document.export_to_text()\n except Exception:\n return str(document)\n\n def to_rows(doc_dict):\n rows = []\n for t in doc_dict.get(\"texts\", []):\n prov = t.get(\"prov\") or []\n page_no = None\n if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n page_no = prov[0].get(\"page_no\")\n rows.append({\n \"page_no\": page_no,\n \"label\": t.get(\"label\"),\n \"text\": t.get(\"text\"),\n \"level\": t.get(\"level\"),\n })\n return rows\n\n def main():\n cfg = json.loads(sys.stdin.read())\n file_path = cfg[\"file_path\"]\n markdown = cfg[\"markdown\"]\n image_mode = cfg[\"image_mode\"]\n img_ph = cfg[\"md_image_placeholder\"]\n pg_ph = cfg[\"md_page_break_placeholder\"]\n pipeline = cfg[\"pipeline\"]\n ocr_engine = cfg.get(\"ocr_engine\")\n meta = {\"file_path\": file_path}\n\n try:\n ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n try:\n res = converter.convert(file_path)\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n return\n\n ok = False\n if hasattr(res, \"status\"):\n try:\n ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n except Exception:\n ok = (str(res.status).lower() == \"success\")\n if not ok and hasattr(res, \"document\"):\n ok = getattr(res, \"document\", None) is not None\n if not ok:\n print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n return\n\n doc = getattr(res, \"document\", None)\n if doc is None:\n print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n return\n\n if markdown:\n text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n return\n\n # structured\n try:\n doc_dict = doc.export_to_dict()\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n return\n\n rows = to_rows(doc_dict)\n print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n except Exception as e:\n print(\n json.dumps({\n \"ok\": False,\n \"error\": f\"Docling processing error: {e}\",\n \"meta\": {\"file_path\": file_path},\n })\n )\n\n if __name__ == \"__main__\":\n main()\n \"\"\"\n )\n\n # Validate file_path to avoid command injection or unsafe input\n if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n proc = subprocess.run( # noqa: S603\n [sys.executable, \"-u\", \"-c\", child_script],\n input=json.dumps(args).encode(\"utf-8\"),\n capture_output=True,\n check=False,\n )\n\n if not proc.stdout:\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") if proc.stderr else \"no output from child process\"\n return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n try:\n result = json.loads(proc.stdout.decode(\"utf-8\"))\n except Exception as e: # noqa: BLE001\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n return Data(\n data={\n \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n \"file_path\": original_file_path,\n },\n )\n\n if not result.get(\"ok\"):\n error_msg = result.get(\"error\", \"Unknown Docling error\")\n # Override meta file_path with original_file_path to ensure correct path matching\n meta = result.get(\"meta\", {})\n meta[\"file_path\"] = original_file_path\n return Data(data={\"error\": error_msg, **meta})\n\n meta = result.get(\"meta\", {})\n # Override meta file_path with original_file_path to ensure correct path matching\n # The subprocess returns the temp file path, but we need the original S3/local path for rollup_data\n meta[\"file_path\"] = original_file_path\n if result.get(\"mode\") == \"markdown\":\n exported_content = str(result.get(\"text\", \"\"))\n return Data(\n text=exported_content,\n data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n )\n\n rows = list(result.get(\"doc\", []))\n return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n def process_files(\n self,\n file_list: list[BaseFileComponent.BaseFile],\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process input files.\n\n - advanced_mode => Docling in a separate process.\n - Otherwise => standard parsing in current process (optionally threaded).\n \"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n # Validate image files to detect content/extension mismatches\n # This prevents API errors like \"Image does not match the provided media type\"\n image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n settings = get_settings_service().settings\n for file in file_list:\n extension = file.path.suffix[1:].lower()\n if extension in image_extensions:\n # Read bytes based on storage type\n try:\n if settings.storage_type == \"s3\":\n # For S3 storage, use storage service to read file bytes\n file_path_str = str(file.path)\n content = run_until_complete(read_file_bytes(file_path_str))\n else:\n # For local storage, read bytes directly from filesystem\n content = file.path.read_bytes()\n\n is_valid, error_msg = validate_image_content_type(\n str(file.path),\n content=content,\n )\n if not is_valid:\n self.log(error_msg)\n if not self.silent_errors:\n raise ValueError(error_msg)\n except (OSError, FileNotFoundError) as e:\n self.log(f\"Could not read file for validation: {e}\")\n # Continue - let it fail later with better error\n\n # Validate that files requiring Docling are only processed when advanced mode is enabled\n if not self.advanced_mode:\n for file in file_list:\n extension = file.path.suffix[1:].lower()\n if extension in self.DOCLING_ONLY_EXTENSIONS:\n if is_astra_cloud_environment():\n msg = (\n f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n f\"Advanced Parser mode. Advanced Parser is not available in cloud environments.\"\n )\n else:\n msg = (\n f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n )\n self.log(msg)\n raise ValueError(msg)\n\n def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n try:\n return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n except FileNotFoundError as e:\n self.log(f\"File not found: {file_path}. Error: {e}\")\n if not silent_errors:\n raise\n return None\n except Exception as e:\n self.log(f\"Unexpected error processing {file_path}: {e}\")\n if not silent_errors:\n raise\n return None\n\n docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n # Advanced path: Check if ALL files are compatible with Docling\n if self.advanced_mode and docling_compatible:\n final_return: list[BaseFileComponent.BaseFile] = []\n for file in file_list:\n file_path = str(file.path)\n advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n # Handle None case - Docling processing failed or returned None\n if advanced_data is None:\n error_data = Data(\n data={\n \"file_path\": file_path,\n \"error\": \"Docling processing returned no result. Check logs for details.\",\n },\n )\n final_return.extend(self.rollup_data([file], [error_data]))\n continue\n\n # --- UNNEST: expand each element in `doc` to its own Data row\n payload = getattr(advanced_data, \"data\", {}) or {}\n\n # Check for errors first\n if \"error\" in payload:\n error_msg = payload.get(\"error\", \"Unknown error\")\n error_data = Data(\n data={\n \"file_path\": file_path,\n \"error\": error_msg,\n **{k: v for k, v in payload.items() if k not in (\"error\", \"file_path\")},\n },\n )\n final_return.extend(self.rollup_data([file], [error_data]))\n continue\n\n doc_rows = payload.get(\"doc\")\n if isinstance(doc_rows, list) and doc_rows:\n # Non-empty list of structured rows\n rows: list[Data | None] = [\n Data(\n data={\n \"file_path\": file_path,\n **(item if isinstance(item, dict) else {\"value\": item}),\n },\n )\n for item in doc_rows\n ]\n final_return.extend(self.rollup_data([file], rows))\n elif isinstance(doc_rows, list) and not doc_rows:\n # Empty list - file was processed but no text content found\n # Create a Data object indicating no content was extracted\n self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n empty_data = Data(\n data={\n \"file_path\": file_path,\n \"text\": \"(No text content extracted from image)\",\n \"info\": \"Image processed successfully but contained no extractable text\",\n **{k: v for k, v in payload.items() if k != \"doc\"},\n },\n )\n final_return.extend(self.rollup_data([file], [empty_data]))\n else:\n # If not structured, keep as-is (e.g., markdown export or error dict)\n # Ensure file_path is set for proper rollup matching\n if not payload.get(\"file_path\"):\n payload[\"file_path\"] = file_path\n # Create new Data with file_path\n advanced_data = Data(\n data=payload,\n text=getattr(advanced_data, \"text\", None),\n )\n final_return.extend(self.rollup_data([file], [advanced_data]))\n return final_return\n\n # Standard multi-file (or single non-advanced) path\n concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n file_paths = [str(f.path) for f in file_list]\n self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n my_data = parallel_load_data(\n file_paths,\n silent_errors=self.silent_errors,\n load_function=process_file_standard,\n max_concurrency=concurrency,\n )\n return self.rollup_data(file_list, my_data)\n\n # ------------------------------ Output helpers -----------------------------------\n\n def load_files_helper(self) -> DataFrame:\n result = self.load_files()\n\n # Result is a DataFrame - check if it has any rows\n if result.empty:\n msg = \"Could not extract content from the provided file(s).\"\n raise ValueError(msg)\n\n # Check for error column with error messages\n if \"error\" in result.columns:\n errors = result[\"error\"].dropna().tolist()\n if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n raise ValueError(errors[0])\n\n return result\n\n def load_files_dataframe(self) -> DataFrame:\n \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n self.markdown = False\n return self.load_files_helper()\n\n def load_files_markdown(self) -> Message:\n \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n self.markdown = True\n result = self.load_files_helper()\n\n # Result is a DataFrame - check for text or exported_content columns\n if \"text\" in result.columns and not result[\"text\"].isna().all():\n text_values = result[\"text\"].dropna().tolist()\n if text_values:\n return Message(text=str(text_values[0]))\n\n if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n content_values = result[\"exported_content\"].dropna().tolist()\n if content_values:\n return Message(text=str(content_values[0]))\n\n # Return empty message with info that no text was found\n return Message(text=\"(No text content extracted from file)\")\n"
},
"concurrency_multithreading": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Processing Concurrency",
"dynamic": false,
"info": "When multiple files are being processed, the number of files to process concurrently.",
"list": false,
"list_add_label": "Add More",
"name": "concurrency_multithreading",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 1
},
"delete_server_file_after_processing": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Delete Server File After Processing",
"dynamic": false,
"info": "If true, the Server File Path will be deleted after processing.",
"list": false,
"list_add_label": "Add More",
"name": "delete_server_file_after_processing",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"doc_key": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Doc Key",
"dynamic": false,
"info": "The key to use for the DoclingDocument column.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "doc_key",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "doc"
},
"file_id": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "Google Drive File ID",
"dynamic": false,
"info": "The Google Drive file ID to read. The file must be shared with the service account email.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "file_id",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"file_path": {
"_input_type": "HandleInput",
"advanced": true,
"display_name": "Server File Path",
"dynamic": false,
"info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.",
"input_types": ["Data", "Message"],
"list": true,
"list_add_label": "Add More",
"name": "file_path",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "other",
"value": ""
},
"file_path_str": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "File Path",
"dynamic": false,
"info": "Path to the file to read. Used when component is called as a tool. If not provided, will use the uploaded file from 'path' input.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "file_path_str",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"ignore_unspecified_files": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Ignore Unspecified Files",
"dynamic": false,
"info": "If true, Data with no 'file_path' property will be ignored.",
"list": false,
"list_add_label": "Add More",
"name": "ignore_unspecified_files",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"ignore_unsupported_extensions": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Ignore Unsupported Extensions",
"dynamic": false,
"info": "If true, files with unsupported extensions will not be processed.",
"list": false,
"list_add_label": "Add More",
"name": "ignore_unsupported_extensions",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"is_refresh": false,
"markdown": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Markdown Export",
"dynamic": false,
"info": "Export processed documents to Markdown format. Only available when advanced mode is enabled.",
"list": false,
"list_add_label": "Add More",
"name": "markdown",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"md_image_placeholder": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Image placeholder",
"dynamic": false,
"info": "Specify the image placeholder for markdown exports.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "md_image_placeholder",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "<!-- image -->"
},
"md_page_break_placeholder": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Page break placeholder",
"dynamic": false,
"info": "Add this placeholder between pages in the markdown output.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "md_page_break_placeholder",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"ocr_engine": {
"_input_type": "DropdownInput",
"advanced": true,
"combobox": false,
"dialog_inputs": {},
"display_name": "OCR Engine",
"dynamic": false,
"external_options": {},
"info": "OCR engine to use. Only available when pipeline is set to 'standard'.",
"name": "ocr_engine",
"options": ["None", "easyocr"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "easyocr"
},
"path": {
"_input_type": "FileInput",
"advanced": false,
"display_name": "Files",
"dynamic": false,
"fileTypes": [
"csv",
"json",
"pdf",
"txt",
"md",
"mdx",
"yaml",
"yml",
"xml",
"html",
"htm",
"docx",
"py",
"sh",
"sql",
"js",
"ts",
"tsx",
"adoc",
"asciidoc",
"asc",
"bmp",
"dotx",
"dotm",
"docm",
"jpg",
"jpeg",
"png",
"potx",
"ppsx",
"pptm",
"potm",
"ppsm",
"pptx",
"tiff",
"xls",
"xlsx",
"xhtml",
"webp",
"zip",
"tar",
"tgz",
"bz2",
"gz"
],
"file_path": [
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0001-ผรม.2-คคง.-QC-0001.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0002-ผรม.2-คคง.-QC-0002.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0003-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0001.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0004-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0002.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0005-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0004.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0006-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0005.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0007-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0006.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0008-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0007.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0009-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0008.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0010-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0009.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0011-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0010.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0012-สคฉ.3-คคง.-0119.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0013-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0011.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0015-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0014.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0016-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0012.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0018-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0015.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0019-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0016.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0020-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0017.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0021-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0018.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0022-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0019.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0023-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0020.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0024-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0021.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0025-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0022.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0026-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0023.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0027-ผรม.2-คคง.-67091601.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0027-เอกสารยกเลิก.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0028-ผรม.2-คคง.-67091602.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0029-ผรม.2-คคง.-67091603.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0030-ผรม.2-คคง.-67091604.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0031-ผรม.2-คคง.-67091701.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0032-ผรม.2-คคง.-67091702.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0033-ผรม.2-คคง.-67091703.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0034-ผรม.2-คคง.-67091704.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0035-ผรม.2-คคง.-67091705.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0035-เอกสารยกเลิก.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0036-ผรม.2-คคง.-67091801.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0037-ผรม.2-คคง.-67091802.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0038-ผรม.2-คคง.-67091803.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0038-ผรม.2-คคง.-67091803ยกเลิก.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0038-เอกสารยกเลิก.pdf"
],
"info": "Supported file extensions: csv, json, pdf, txt, md, mdx, yaml, yml, xml, html, htm, docx, py, sh, sql, js, ts, tsx, adoc, asciidoc, asc, bmp, dotx, dotm, docm, jpg, jpeg, png, potx, ppsx, pptm, potm, ppsm, pptx, tiff, xls, xlsx, xhtml, webp; optionally bundled in file extensions: zip, tar, tgz, bz2, gz",
"list": true,
"list_add_label": "Add More",
"name": "path",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"temp_file": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "file",
"value": ""
},
"pipeline": {
"_input_type": "DropdownInput",
"advanced": true,
"combobox": false,
"dialog_inputs": {},
"display_name": "Pipeline",
"dynamic": false,
"external_options": {},
"info": "Docling pipeline to use",
"name": "pipeline",
"options": ["standard", "vlm"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": false,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "standard"
},
"s3_file_key": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "S3 File Key",
"dynamic": false,
"info": "The key (path) of the file in S3 bucket.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "s3_file_key",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"separator": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Separator",
"dynamic": false,
"info": "Specify the separator to use between multiple outputs in Message format.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "separator",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "\n\n"
},
"service_account_key": {
"_input_type": "SecretStrInput",
"advanced": false,
"display_name": "GCP Credentials Secret Key",
"dynamic": false,
"info": "Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).",
"input_types": [],
"load_from_db": false,
"name": "service_account_key",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"silent_errors": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Silent Errors",
"dynamic": false,
"info": "If true, errors will not raise an exception.",
"list": false,
"list_add_label": "Add More",
"name": "silent_errors",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"storage_location": {
"_input_type": "SortableListInput",
"advanced": true,
"display_name": "Storage Location",
"dynamic": false,
"info": "Choose where to read the file from.",
"limit": 1,
"name": "storage_location",
"options": [
{
"icon": "hard-drive",
"name": "Local"
},
{
"icon": "Amazon",
"name": "AWS"
},
{
"icon": "google",
"name": "Google Drive"
}
],
"override_skip": false,
"placeholder": "Select Location",
"real_time_refresh": true,
"required": false,
"search_category": [],
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "sortableList",
"value": [
{
"chosen": false,
"icon": "hard-drive",
"name": "Local",
"selected": false
}
]
},
"use_multithreading": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "[Deprecated] Use Multithreading",
"dynamic": false,
"info": "Set 'Processing Concurrency' greater than 1 to enable multithreading.",
"list": false,
"list_add_label": "Add More",
"name": "use_multithreading",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
}
},
"tool_mode": false
},
"showNode": true,
"type": "File"
},
"dragging": false,
"id": "File-5V2fL",
"measured": {
"height": 513,
"width": 400
},
"position": {
"x": -869.2220503002395,
"y": -131.38909294380795
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "OllamaModel-xJSnu",
"node": {
"base_classes": ["Data", "DataFrame", "LanguageModel", "Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Generate text using Ollama Local LLMs.",
"display_name": "Ollama",
"documentation": "",
"edited": false,
"field_order": [
"base_url",
"model_name",
"api_key",
"temperature",
"format",
"metadata",
"mirostat",
"mirostat_eta",
"mirostat_tau",
"num_ctx",
"num_gpu",
"num_thread",
"repeat_last_n",
"repeat_penalty",
"tfs_z",
"timeout",
"top_k",
"top_p",
"enable_verbose_output",
"tags",
"stop_tokens",
"system",
"tool_model_enabled",
"template",
"enable_structured_output",
"input_value",
"system_message",
"stream"
],
"frozen": false,
"icon": "Ollama",
"last_updated": "2026-03-13T07:57:38.225Z",
"legacy": false,
"metadata": {
"code_hash": "cd3dc38272a7",
"dependencies": {
"dependencies": [
{
"name": "httpx",
"version": "0.28.1"
},
{
"name": "langchain_ollama",
"version": "0.3.10"
},
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 3
},
"keywords": ["model", "llm", "language model", "large language model"],
"module": "lfx.components.ollama.ollama.ChatOllamaComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Text",
"group_outputs": false,
"loop_types": null,
"method": "text_response",
"name": "text_output",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Language Model",
"group_outputs": false,
"loop_types": null,
"method": "build_model",
"name": "model_output",
"options": null,
"required_inputs": null,
"tool_mode": true,
"types": ["LanguageModel"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Data",
"group_outputs": false,
"loop_types": null,
"method": "build_data_output",
"name": "data_output",
"options": null,
"required_inputs": null,
"tool_mode": true,
"types": ["Data"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "DataFrame",
"group_outputs": false,
"loop_types": null,
"method": "build_dataframe_output",
"name": "dataframe_output",
"options": null,
"required_inputs": null,
"tool_mode": true,
"types": ["DataFrame"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_frontend_node_flow_id": {
"value": "4a538191-04b4-41cf-98d7-8e62aaccf3a8"
},
"_frontend_node_folder_id": {
"value": "60f723dc-b1f8-4e25-9c31-0a4ee07abd5c"
},
"_type": "Component",
"api_key": {
"_input_type": "SecretStrInput",
"advanced": true,
"display_name": "Ollama API Key",
"dynamic": false,
"info": "Your Ollama API key.",
"input_types": [],
"load_from_db": false,
"name": "api_key",
"override_skip": false,
"password": true,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"base_url": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "Ollama API URL",
"dynamic": false,
"info": "Endpoint of the Ollama API. Defaults to http://localhost:11434.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "base_url",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "http://localhost:11434"
},
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "import asyncio\nimport json\nfrom contextlib import suppress\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import ChatOllama\n\nfrom lfx.base.models.model import LCModelComponent\nfrom lfx.field_typing import LanguageModel\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.helpers.base_model import build_model_from_schema\nfrom lfx.io import (\n BoolInput,\n DictInput,\n DropdownInput,\n FloatInput,\n IntInput,\n MessageTextInput,\n Output,\n SecretStrInput,\n SliderInput,\n StrInput,\n TableInput,\n)\nfrom lfx.log.logger import logger\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.table import EditMode\nfrom lfx.utils.util import transform_localhost_url\n\nHTTP_STATUS_OK = 200\nTABLE_ROW_PLACEHOLDER = {\"name\": \"field\", \"description\": \"description of field\", \"type\": \"str\", \"multiple\": \"False\"}\n\n\nclass ChatOllamaComponent(LCModelComponent):\n display_name = \"Ollama\"\n description = \"Generate text using Ollama Local LLMs.\"\n icon = \"Ollama\"\n name = \"OllamaModel\"\n\n # Define constants for JSON keys\n JSON_MODELS_KEY = \"models\"\n JSON_NAME_KEY = \"name\"\n JSON_CAPABILITIES_KEY = \"capabilities\"\n DESIRED_CAPABILITY = \"completion\"\n TOOL_CALLING_CAPABILITY = \"tools\"\n\n # Define the table schema for the format input\n TABLE_SCHEMA = [\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"edit_mode\": EditMode.INLINE,\n \"options\": [\"True\", \"False\"],\n \"default\": \"False\",\n },\n ]\n default_table_row = {row[\"name\"]: row.get(\"default\", None) for row in TABLE_SCHEMA}\n default_table_row_schema = build_model_from_schema([default_table_row]).model_json_schema()\n\n inputs = [\n StrInput(\n name=\"base_url\",\n display_name=\"Ollama API URL\",\n info=\"Endpoint of the Ollama API. Defaults to http://localhost:11434.\",\n value=\"http://localhost:11434\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n info=\"Refer to https://ollama.com/library for more models.\",\n refresh_button=True,\n real_time_refresh=True,\n required=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Ollama API Key\",\n info=\"Your Ollama API key.\",\n value=None,\n required=False,\n real_time_refresh=True,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n TableInput(\n name=\"format\",\n display_name=\"Format\",\n info=\"Specify the format of the output.\",\n table_schema=TABLE_SCHEMA,\n value=default_table_row,\n show=False,\n ),\n DictInput(name=\"metadata\", display_name=\"Metadata\", info=\"Metadata to add to the run trace.\", advanced=True),\n DropdownInput(\n name=\"mirostat\",\n display_name=\"Mirostat\",\n options=[\"Disabled\", \"Mirostat\", \"Mirostat 2.0\"],\n info=\"Enable/disable Mirostat sampling for controlling perplexity.\",\n value=\"Disabled\",\n advanced=True,\n real_time_refresh=True,\n ),\n FloatInput(\n name=\"mirostat_eta\",\n display_name=\"Mirostat Eta\",\n info=\"Learning rate for Mirostat algorithm. (Default: 0.1)\",\n advanced=True,\n ),\n FloatInput(\n name=\"mirostat_tau\",\n display_name=\"Mirostat Tau\",\n info=\"Controls the balance between coherence and diversity of the output. (Default: 5.0)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_ctx\",\n display_name=\"Context Window Size\",\n info=\"Size of the context window for generating tokens. (Default: 2048)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_gpu\",\n display_name=\"Number of GPUs\",\n info=\"Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_thread\",\n display_name=\"Number of Threads\",\n info=\"Number of threads to use during computation. (Default: detected for optimal performance)\",\n advanced=True,\n ),\n IntInput(\n name=\"repeat_last_n\",\n display_name=\"Repeat Last N\",\n info=\"How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\",\n advanced=True,\n ),\n FloatInput(\n name=\"repeat_penalty\",\n display_name=\"Repeat Penalty\",\n info=\"Penalty for repetitions in generated text. (Default: 1.1)\",\n advanced=True,\n ),\n FloatInput(name=\"tfs_z\", display_name=\"TFS Z\", info=\"Tail free sampling value. (Default: 1)\", advanced=True),\n IntInput(name=\"timeout\", display_name=\"Timeout\", info=\"Timeout for the request stream.\", advanced=True),\n IntInput(\n name=\"top_k\", display_name=\"Top K\", info=\"Limits token selection to top K. (Default: 40)\", advanced=True\n ),\n FloatInput(name=\"top_p\", display_name=\"Top P\", info=\"Works together with top-k. (Default: 0.9)\", advanced=True),\n BoolInput(\n name=\"enable_verbose_output\",\n display_name=\"Ollama Verbose Output\",\n info=\"Whether to print out response text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"tags\",\n display_name=\"Tags\",\n info=\"Comma-separated list of tags to add to the run trace.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"stop_tokens\",\n display_name=\"Stop Tokens\",\n info=\"Comma-separated list of tokens to signal the model to stop generating text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"system\", display_name=\"System\", info=\"System to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"tool_model_enabled\",\n display_name=\"Tool Model Enabled\",\n info=\"Whether to enable tool calling in the model.\",\n value=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"template\", display_name=\"Template\", info=\"Template to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"enable_structured_output\",\n display_name=\"Enable Structured Output\",\n info=\"Whether to enable structured output in the model.\",\n value=False,\n advanced=False,\n real_time_refresh=True,\n ),\n *LCModelComponent.get_base_inputs(),\n ]\n\n outputs = [\n Output(display_name=\"Text\", name=\"text_output\", method=\"text_response\"),\n Output(display_name=\"Language Model\", name=\"model_output\", method=\"build_model\"),\n Output(display_name=\"Data\", name=\"data_output\", method=\"build_data_output\"),\n Output(display_name=\"DataFrame\", name=\"dataframe_output\", method=\"build_dataframe_output\"),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # Mapping mirostat settings to their corresponding values\n mirostat_options = {\"Mirostat\": 1, \"Mirostat 2.0\": 2}\n\n # Default to None for 'Disabled'\n mirostat_value = mirostat_options.get(self.mirostat, None)\n\n # Set mirostat_eta and mirostat_tau to None if mirostat is disabled\n if mirostat_value is None:\n mirostat_eta = None\n mirostat_tau = None\n else:\n mirostat_eta = self.mirostat_eta\n mirostat_tau = self.mirostat_tau\n\n transformed_base_url = transform_localhost_url(self.base_url)\n\n # Check if URL contains /v1 suffix (OpenAI-compatible mode)\n if transformed_base_url and transformed_base_url.rstrip(\"/\").endswith(\"/v1\"):\n # Strip /v1 suffix and log warning\n transformed_base_url = transformed_base_url.rstrip(\"/\").removesuffix(\"/v1\")\n logger.warning(\n \"Detected '/v1' suffix in base URL. The Ollama component uses the native Ollama API, \"\n \"not the OpenAI-compatible API. The '/v1' suffix has been automatically removed. \"\n \"If you want to use the OpenAI-compatible API, please use the OpenAI component instead. \"\n \"Learn more at https://docs.ollama.com/openai#openai-compatibility\"\n )\n\n try:\n output_format = self._parse_format_field(self.format) if self.enable_structured_output else None\n except Exception as e:\n msg = f\"Failed to parse the format field: {e}\"\n raise ValueError(msg) from e\n\n # Mapping system settings to their corresponding values\n llm_params = {\n \"base_url\": transformed_base_url,\n \"model\": self.model_name,\n \"mirostat\": mirostat_value,\n \"format\": output_format or None,\n \"metadata\": self.metadata,\n \"tags\": self.tags.split(\",\") if self.tags else None,\n \"mirostat_eta\": mirostat_eta,\n \"mirostat_tau\": mirostat_tau,\n \"num_ctx\": self.num_ctx or None,\n \"num_gpu\": self.num_gpu or None,\n \"num_thread\": self.num_thread or None,\n \"repeat_last_n\": self.repeat_last_n or None,\n \"repeat_penalty\": self.repeat_penalty or None,\n \"temperature\": self.temperature or None,\n \"stop\": self.stop_tokens.split(\",\") if self.stop_tokens else None,\n \"system\": self.system,\n \"tfs_z\": self.tfs_z or None,\n \"timeout\": self.timeout or None,\n \"top_k\": self.top_k or None,\n \"top_p\": self.top_p or None,\n \"verbose\": self.enable_verbose_output or False,\n \"template\": self.template,\n }\n headers = self.headers\n if headers is not None:\n llm_params[\"client_kwargs\"] = {\"headers\": headers}\n\n # Remove parameters with None values\n llm_params = {k: v for k, v in llm_params.items() if v is not None}\n\n try:\n output = ChatOllama(**llm_params)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \"\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\"\n )\n raise ValueError(msg) from e\n\n return output\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n url = transform_localhost_url(url)\n if not url:\n return False\n # Strip /v1 suffix if present, as Ollama API endpoints are at root level\n url = url.rstrip(\"/\").removesuffix(\"/v1\")\n if not url.endswith(\"/\"):\n url = url + \"/\"\n return (\n await client.get(url=urljoin(url, \"api/tags\"), headers=self.headers)\n ).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name == \"enable_structured_output\": # bind enable_structured_output boolean to format show value\n build_config[\"format\"][\"show\"] = field_value\n\n if field_name == \"mirostat\":\n if field_value == \"Disabled\":\n build_config[\"mirostat_eta\"][\"advanced\"] = True\n build_config[\"mirostat_tau\"][\"advanced\"] = True\n build_config[\"mirostat_eta\"][\"value\"] = None\n build_config[\"mirostat_tau\"][\"value\"] = None\n\n else:\n build_config[\"mirostat_eta\"][\"advanced\"] = False\n build_config[\"mirostat_tau\"][\"advanced\"] = False\n\n if field_value == \"Mirostat 2.0\":\n build_config[\"mirostat_eta\"][\"value\"] = 0.2\n build_config[\"mirostat_tau\"][\"value\"] = 10\n else:\n build_config[\"mirostat_eta\"][\"value\"] = 0.1\n build_config[\"mirostat_tau\"][\"value\"] = 5\n\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n # Use field_value if base_url is being updated, otherwise use self.base_url\n base_url_to_check = field_value if field_name == \"base_url\" else self.base_url\n # Fallback to self.base_url if field_value is None or empty\n if not base_url_to_check and field_name == \"base_url\":\n base_url_to_check = self.base_url\n logger.warning(f\"Fetching Ollama models from updated URL: {base_url_to_check}\")\n\n if base_url_to_check and await self.is_valid_ollama_url(base_url_to_check):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n base_url_to_check, tool_model_enabled=tool_model_enabled\n )\n else:\n build_config[\"model_name\"][\"options\"] = []\n if field_name == \"keep_alive_flag\":\n if field_value == \"Keep\":\n build_config[\"keep_alive\"][\"value\"] = \"-1\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n elif field_value == \"Immediately\":\n build_config[\"keep_alive\"][\"value\"] = \"0\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n else:\n build_config[\"keep_alive\"][\"advanced\"] = False\n\n return build_config\n\n async def get_models(self, base_url_value: str, *, tool_model_enabled: bool | None = None) -> list[str]:\n \"\"\"Fetches a list of models from the Ollama API suitable for text generation.\n\n Args:\n base_url_value (str): The base URL of the Ollama API.\n tool_model_enabled (bool | None, optional): If True, filters the models further to include\n only those that support tool calling. Defaults to None.\n\n Returns:\n list[str]: A list of model names suitable for text generation. Models are included if:\n - They have the \"completion\" capability, OR\n - The capabilities field is not returned (backwards compatibility with older Ollama versions)\n If `tool_model_enabled` is True, only models with verified \"tools\" capability are included\n (models without capabilities info are excluded in this case).\n\n Raises:\n ValueError: If there is an issue with the API request or response, or if the model\n names cannot be retrieved.\n \"\"\"\n try:\n # Strip /v1 suffix if present, as Ollama API endpoints are at root level\n base_url = base_url_value.rstrip(\"/\").removesuffix(\"/v1\")\n if not base_url.endswith(\"/\"):\n base_url = base_url + \"/\"\n base_url = transform_localhost_url(base_url)\n\n # Ollama REST API to return models\n tags_url = urljoin(base_url, \"api/tags\")\n\n # Ollama REST API to return model capabilities\n show_url = urljoin(base_url, \"api/show\")\n\n async with httpx.AsyncClient() as client:\n headers = self.headers\n # Fetch available models\n tags_response = await client.get(url=tags_url, headers=headers)\n tags_response.raise_for_status()\n models = tags_response.json()\n if asyncio.iscoroutine(models):\n models = await models\n await logger.adebug(f\"Available models: {models}\")\n\n # Filter models that are NOT embedding models\n model_ids = []\n for model in models[self.JSON_MODELS_KEY]:\n model_name = model[self.JSON_NAME_KEY]\n await logger.adebug(f\"Checking model: {model_name}\")\n\n payload = {\"model\": model_name}\n show_response = await client.post(url=show_url, json=payload, headers=headers)\n show_response.raise_for_status()\n json_data = show_response.json()\n if asyncio.iscoroutine(json_data):\n json_data = await json_data\n\n capabilities = json_data.get(self.JSON_CAPABILITIES_KEY)\n await logger.adebug(f\"Model: {model_name}, Capabilities: {capabilities}\")\n\n # If capabilities not provided, assume it's a completion model (backwards compatibility\n # with older Ollama versions that don't return capabilities from /api/show)\n if capabilities is None:\n if not tool_model_enabled:\n model_ids.append(model_name)\n # If tool_model_enabled is True but no capabilities info, skip the model\n # since we can't verify tool support\n elif self.DESIRED_CAPABILITY in capabilities and (\n not tool_model_enabled or self.TOOL_CALLING_CAPABILITY in capabilities\n ):\n model_ids.append(model_name)\n\n except (httpx.RequestError, ValueError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n\n def _parse_format_field(self, format_value: Any) -> Any:\n \"\"\"Parse the format field to handle both string and dict inputs.\n\n The format field can be:\n - A simple string like \"json\" (backward compatibility)\n - A JSON string from NestedDictInput that needs parsing\n - A dict/JSON schema (already parsed)\n - None or empty\n\n Args:\n format_value: The raw format value from the input field\n\n Returns:\n Parsed format value as string, dict, or None\n \"\"\"\n if not format_value:\n return None\n\n schema = format_value\n if isinstance(format_value, list):\n schema = build_model_from_schema(format_value).model_json_schema()\n if schema == self.default_table_row_schema:\n return None # the rows are generic placeholder rows\n elif isinstance(format_value, str): # parse as json if string\n with suppress(json.JSONDecodeError): # e.g., literal \"json\" is valid for format field\n schema = json.loads(format_value)\n\n return schema or None\n\n async def _parse_json_response(self) -> Any:\n \"\"\"Parse the JSON response from the model.\n\n This method gets the text response and attempts to parse it as JSON.\n Works with models that have format='json' or a JSON schema set.\n\n Returns:\n Parsed JSON (dict, list, or primitive type)\n\n Raises:\n ValueError: If the response is not valid JSON\n \"\"\"\n message = await self.text_response()\n text = message.text if hasattr(message, \"text\") else str(message)\n\n if not text:\n msg = \"No response from model\"\n raise ValueError(msg)\n\n try:\n return json.loads(text)\n except json.JSONDecodeError as e:\n msg = f\"Invalid JSON response. Ensure model supports JSON output. Error: {e}\"\n raise ValueError(msg) from e\n\n async def build_data_output(self) -> Data:\n \"\"\"Build a Data output from the model's JSON response.\n\n Returns:\n Data: A Data object containing the parsed JSON response\n \"\"\"\n parsed = await self._parse_json_response()\n\n # If the response is already a dict, wrap it in Data\n if isinstance(parsed, dict):\n return Data(data=parsed)\n\n # If it's a list, wrap in a results container\n if isinstance(parsed, list):\n if len(parsed) == 1:\n return Data(data=parsed[0])\n return Data(data={\"results\": parsed})\n\n # For primitive types, wrap in a value container\n return Data(data={\"value\": parsed})\n\n async def build_dataframe_output(self) -> DataFrame:\n \"\"\"Build a DataFrame output from the model's JSON response.\n\n Returns:\n DataFrame: A DataFrame containing the parsed JSON response\n\n Raises:\n ValueError: If the response cannot be converted to a DataFrame\n \"\"\"\n parsed = await self._parse_json_response()\n\n # If it's a list of dicts, convert directly to DataFrame\n if isinstance(parsed, list):\n if not parsed:\n return DataFrame()\n # Ensure all items are dicts for proper DataFrame conversion\n if all(isinstance(item, dict) for item in parsed):\n return DataFrame(parsed)\n msg = \"List items must be dictionaries to convert to DataFrame\"\n raise ValueError(msg)\n\n # If it's a single dict, wrap in a list to create a single-row DataFrame\n if isinstance(parsed, dict):\n return DataFrame([parsed])\n\n # For primitive types, create a single-column DataFrame\n return DataFrame([{\"value\": parsed}])\n\n @property\n def headers(self) -> dict[str, str] | None:\n \"\"\"Get the headers for the Ollama API.\"\"\"\n if self.api_key and self.api_key.strip():\n return {\"Authorization\": f\"Bearer {self.api_key}\"}\n return None\n"
},
"enable_structured_output": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Enable Structured Output",
"dynamic": false,
"info": "Whether to enable structured output in the model.",
"list": false,
"list_add_label": "Add More",
"name": "enable_structured_output",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"enable_verbose_output": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Ollama Verbose Output",
"dynamic": false,
"info": "Whether to print out response text.",
"list": false,
"list_add_label": "Add More",
"name": "enable_verbose_output",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"format": {
"_input_type": "TableInput",
"advanced": false,
"display_name": "Format",
"dynamic": false,
"info": "Specify the format of the output.",
"is_list": true,
"list_add_label": "Add More",
"name": "format",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"table_icon": "Table",
"table_schema": [
{
"default": "field",
"description": "Specify the name of the output field.",
"display_name": "Name",
"edit_mode": "inline",
"formatter": "text",
"name": "name",
"type": "str"
},
{
"default": "description of field",
"description": "Describe the purpose of the output field.",
"display_name": "Description",
"edit_mode": "popover",
"formatter": "text",
"name": "description",
"type": "str"
},
{
"default": "str",
"description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).",
"display_name": "Type",
"edit_mode": "inline",
"formatter": "text",
"name": "type",
"options": ["str", "int", "float", "bool", "dict"],
"type": "str"
},
{
"default": "False",
"description": "Set to True if this output field should be a list of the specified type.",
"display_name": "As List",
"edit_mode": "inline",
"formatter": "text",
"name": "multiple",
"options": ["True", "False"],
"type": "boolean"
}
],
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"trigger_icon": "Table",
"trigger_text": "Open table",
"type": "table",
"value": [
{
"description": "description of field",
"multiple": "False",
"name": "field",
"type": "str"
}
]
},
"input_value": {
"_input_type": "MessageInput",
"advanced": false,
"display_name": "Input",
"dynamic": false,
"info": "",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "input_value",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"is_refresh": false,
"metadata": {
"_input_type": "DictInput",
"advanced": true,
"display_name": "Metadata",
"dynamic": false,
"info": "Metadata to add to the run trace.",
"list": false,
"list_add_label": "Add More",
"name": "metadata",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"track_in_telemetry": false,
"type": "dict",
"value": {}
},
"mirostat": {
"_input_type": "DropdownInput",
"advanced": true,
"combobox": false,
"dialog_inputs": {},
"display_name": "Mirostat",
"dynamic": false,
"external_options": {},
"info": "Enable/disable Mirostat sampling for controlling perplexity.",
"name": "mirostat",
"options": ["Disabled", "Mirostat", "Mirostat 2.0"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "Disabled"
},
"mirostat_eta": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Mirostat Eta",
"dynamic": false,
"info": "Learning rate for Mirostat algorithm. (Default: 0.1)",
"list": false,
"list_add_label": "Add More",
"name": "mirostat_eta",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"mirostat_tau": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Mirostat Tau",
"dynamic": false,
"info": "Controls the balance between coherence and diversity of the output. (Default: 5.0)",
"list": false,
"list_add_label": "Add More",
"name": "mirostat_tau",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"model_name": {
"_input_type": "DropdownInput",
"advanced": false,
"combobox": false,
"dialog_inputs": {},
"display_name": "Model Name",
"dynamic": false,
"external_options": {},
"info": "Refer to https://ollama.com/library for more models.",
"name": "model_name",
"options": ["scb10x/typhoon2.1-gemma3-4b:latest", "qwen2.5:7b-instruct-q4_K_M"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"refresh_button": true,
"required": true,
"show": true,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "scb10x/typhoon2.1-gemma3-4b:latest"
},
"num_ctx": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Context Window Size",
"dynamic": false,
"info": "Size of the context window for generating tokens. (Default: 2048)",
"list": false,
"list_add_label": "Add More",
"name": "num_ctx",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"num_gpu": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Number of GPUs",
"dynamic": false,
"info": "Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)",
"list": false,
"list_add_label": "Add More",
"name": "num_gpu",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"num_thread": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Number of Threads",
"dynamic": false,
"info": "Number of threads to use during computation. (Default: detected for optimal performance)",
"list": false,
"list_add_label": "Add More",
"name": "num_thread",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"repeat_last_n": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Repeat Last N",
"dynamic": false,
"info": "How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)",
"list": false,
"list_add_label": "Add More",
"name": "repeat_last_n",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"repeat_penalty": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Repeat Penalty",
"dynamic": false,
"info": "Penalty for repetitions in generated text. (Default: 1.1)",
"list": false,
"list_add_label": "Add More",
"name": "repeat_penalty",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"stop_tokens": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Stop Tokens",
"dynamic": false,
"info": "Comma-separated list of tokens to signal the model to stop generating text.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "stop_tokens",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"stream": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Stream",
"dynamic": false,
"info": "Stream the response from the model. Streaming works only in Chat.",
"list": false,
"list_add_label": "Add More",
"name": "stream",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"system": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "System",
"dynamic": false,
"info": "System to use for generating text.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "system",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"system_message": {
"value": "คุณเป็นผู้ช่วย AI สำหรับระบบจัดการเอกสารก่อสร้าง Laem Chabang Port Phase 3 (LCBP3)\nหน้าที่ของคุณคือดึงข้อมูล Metadata จากเอกสาร แล้วตอบกลับเป็น JSON ที่ valid เท่านั้น\nห้ามเพิ่มข้อความอื่นนอกจาก JSON\nเอกสารอาจเป็นภาษาไทย อังกฤษ หรือผสมกัน\n\nYou are a document metadata extraction assistant for a construction document management system (LCBP3).\nExtract the following fields and return ONLY a valid JSON object.\n\n{{\n \"source_file\": \"<string: extract from text if available, else leave text>\",\n \"is_valid\": <boolean: true if it looks like a document, false if empty or garbage>,\n \"confidence\": <float 0.0-1.0: how confident are you in this extraction>,\n \"metadata\": {{\n \"correspondence_number\": \"<string or null>\",\n \"title\": \"<string empty if not found>\",\n \"document_date\": \"<YYYY-MM-DD or null>\",\n \"sender_org\": \"<string or null>\",\n \"receiver_org\": \"<string or null>\",\n \"project_code\": \"LCBP3\"\n }}\n}}"
},
"tags": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Tags",
"dynamic": false,
"info": "Comma-separated list of tags to add to the run trace.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "tags",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"temperature": {
"_input_type": "SliderInput",
"advanced": true,
"display_name": "Temperature",
"dynamic": false,
"info": "",
"max_label": "",
"max_label_icon": "",
"min_label": "",
"min_label_icon": "",
"name": "temperature",
"override_skip": false,
"placeholder": "",
"range_spec": {
"max": 1,
"min": 0,
"step": 0.01,
"step_type": "float"
},
"required": false,
"show": true,
"slider_buttons": false,
"slider_buttons_options": [],
"slider_input": false,
"title_case": false,
"tool_mode": false,
"track_in_telemetry": false,
"type": "slider",
"value": 0.1
},
"template": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Template",
"dynamic": false,
"info": "Template to use for generating text.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "template",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"tfs_z": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "TFS Z",
"dynamic": false,
"info": "Tail free sampling value. (Default: 1)",
"list": false,
"list_add_label": "Add More",
"name": "tfs_z",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"timeout": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Timeout",
"dynamic": false,
"info": "Timeout for the request stream.",
"list": false,
"list_add_label": "Add More",
"name": "timeout",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"tool_model_enabled": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Tool Model Enabled",
"dynamic": false,
"info": "Whether to enable tool calling in the model.",
"list": false,
"list_add_label": "Add More",
"name": "tool_model_enabled",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"top_k": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Top K",
"dynamic": false,
"info": "Limits token selection to top K. (Default: 40)",
"list": false,
"list_add_label": "Add More",
"name": "top_k",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"top_p": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Top P",
"dynamic": false,
"info": "Works together with top-k. (Default: 0.9)",
"list": false,
"list_add_label": "Add More",
"name": "top_p",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
}
},
"tool_mode": false
},
"selected_output": "text_output",
"showNode": true,
"type": "OllamaModel"
},
"dragging": false,
"id": "OllamaModel-xJSnu",
"measured": {
"height": 776,
"width": 400
},
"position": {
"x": 1073.8111821659295,
"y": -546.2113692797573
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "LoopComponent-5vFOr",
"node": {
"base_classes": ["Data", "DataFrame"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Iterates through Data or Message objects, processing items individually and aggregating results from loop inputs.",
"display_name": "Loop",
"documentation": "https://docs.langflow.org/loop",
"edited": false,
"field_order": ["data"],
"frozen": false,
"icon": "infinity",
"legacy": false,
"metadata": {
"code_hash": "e516ea99611c",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 1
},
"module": "lfx.components.flow_controls.loop.LoopComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": true,
"cache": true,
"display_name": "Item",
"group_outputs": true,
"loop_types": ["Message"],
"method": "item_output",
"name": "item",
"selected": "Data",
"tool_mode": true,
"types": ["Data"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Done",
"group_outputs": true,
"method": "done_output",
"name": "done",
"selected": "DataFrame",
"tool_mode": true,
"types": ["DataFrame"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_type": "Component",
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "from lfx.base.flow_controls.loop_utils import (\n execute_loop_body,\n extract_loop_output,\n get_loop_body_start_edge,\n get_loop_body_start_vertex,\n get_loop_body_vertices,\n validate_data_input,\n)\nfrom lfx.components.processing.converter import convert_to_data\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.inputs import HandleInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass LoopComponent(Component):\n display_name = \"Loop\"\n description = (\n \"Iterates through Data or Message objects, processing items individually \"\n \"and aggregating results from loop inputs.\"\n )\n documentation: str = \"https://docs.langflow.org/loop\"\n icon = \"infinity\"\n\n inputs = [\n HandleInput(\n name=\"data\",\n display_name=\"Inputs\",\n info=\"The initial DataFrame to iterate over.\",\n input_types=[\"DataFrame\"],\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Item\",\n name=\"item\",\n method=\"item_output\",\n allows_loop=True,\n loop_types=[\"Message\"],\n group_outputs=True,\n ),\n Output(display_name=\"Done\", name=\"done\", method=\"done_output\", group_outputs=True),\n ]\n\n def initialize_data(self) -> None:\n \"\"\"Initialize the data list, context index, and aggregated list.\"\"\"\n if self.ctx.get(f\"{self._id}_initialized\", False):\n return\n\n # Ensure data is a list of Data objects\n data_list = self._validate_data(self.data)\n\n # Store the initial data and context variables\n self.update_ctx(\n {\n f\"{self._id}_data\": data_list,\n f\"{self._id}_index\": 0,\n f\"{self._id}_aggregated\": [],\n f\"{self._id}_initialized\": True,\n }\n )\n\n def _convert_message_to_data(self, message: Message) -> Data:\n \"\"\"Convert a Message object to a Data object using Type Convert logic.\"\"\"\n return convert_to_data(message, auto_parse=False)\n\n def _validate_data(self, data):\n \"\"\"Validate and return a list of Data objects.\"\"\"\n return validate_data_input(data)\n\n def get_loop_body_vertices(self) -> set[str]:\n \"\"\"Identify vertices in this loop's body via graph traversal.\n\n Traverses from the loop's \"item\" output to the vertex that feeds back\n to the loop's \"item\" input, collecting all vertices in between.\n This naturally handles nested loops by stopping at this loop's feedback edge.\n\n Returns:\n Set of vertex IDs that form this loop's body\n \"\"\"\n # Check if we have a proper graph context\n if not hasattr(self, \"_vertex\") or self._vertex is None:\n return set()\n\n return get_loop_body_vertices(\n vertex=self._vertex,\n graph=self.graph,\n get_incoming_edge_by_target_param_fn=self.get_incoming_edge_by_target_param,\n )\n\n def _get_loop_body_start_vertex(self) -> str | None:\n \"\"\"Get the first vertex in the loop body (connected to loop's item output).\n\n Returns:\n The vertex ID of the first vertex in the loop body, or None if not found\n \"\"\"\n # Check if we have a proper graph context\n if not hasattr(self, \"_vertex\") or self._vertex is None:\n return None\n\n return get_loop_body_start_vertex(vertex=self._vertex)\n\n def _extract_loop_output(self, results: list) -> Data:\n \"\"\"Extract the output from subgraph execution results.\n\n Args:\n results: List of VertexBuildResult objects from subgraph execution\n\n Returns:\n Data object containing the loop iteration output\n \"\"\"\n # Get the vertex ID that feeds back to the item input (end of loop body)\n end_vertex_id = self.get_incoming_edge_by_target_param(\"item\")\n return extract_loop_output(results=results, end_vertex_id=end_vertex_id)\n\n async def execute_loop_body(self, data_list: list[Data], event_manager=None) -> list[Data]:\n \"\"\"Execute loop body for each data item.\n\n Creates an isolated subgraph for the loop body and executes it\n for each item in the data list, collecting results.\n\n Args:\n data_list: List of Data objects to iterate over\n event_manager: Optional event manager to pass to subgraph execution for UI events\n\n Returns:\n List of Data objects containing results from each iteration\n \"\"\"\n # Get the loop body configuration once\n loop_body_vertex_ids = self.get_loop_body_vertices()\n start_vertex_id = self._get_loop_body_start_vertex()\n start_edge = get_loop_body_start_edge(self._vertex)\n end_vertex_id = self.get_incoming_edge_by_target_param(\"item\")\n\n return await execute_loop_body(\n graph=self.graph,\n data_list=data_list,\n loop_body_vertex_ids=loop_body_vertex_ids,\n start_vertex_id=start_vertex_id,\n start_edge=start_edge,\n end_vertex_id=end_vertex_id,\n event_manager=event_manager,\n )\n\n def item_output(self) -> Data:\n \"\"\"Output is no longer used - loop executes internally now.\n\n This method is kept for backward compatibility but does nothing.\n The actual loop execution happens in done_output().\n \"\"\"\n self.stop(\"item\")\n return Data(text=\"\")\n\n async def done_output(self) -> DataFrame:\n \"\"\"Execute the loop body for all items and return aggregated results.\n\n This is now the main execution point for the loop. It:\n 1. Gets the data list to iterate over\n 2. Executes the loop body as an isolated subgraph for each item\n 3. Returns the aggregated results\n\n Args:\n event_manager: Optional event manager for UI event emission\n \"\"\"\n self.initialize_data()\n\n # Get data list\n data_list = self.ctx.get(f\"{self._id}_data\", [])\n\n if not data_list:\n return DataFrame([])\n\n # Execute loop body for all items\n try:\n aggregated_results = await self.execute_loop_body(data_list, event_manager=self._event_manager)\n return DataFrame(aggregated_results)\n except Exception as e:\n # Log error and return empty DataFrame\n from lfx.log.logger import logger\n\n await logger.aerror(f\"Error executing loop body: {e}\")\n raise\n"
},
"data": {
"_input_type": "HandleInput",
"advanced": false,
"display_name": "Inputs",
"dynamic": false,
"info": "The initial DataFrame to iterate over.",
"input_types": ["DataFrame"],
"list": false,
"list_add_label": "Add More",
"name": "data",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "other",
"value": ""
}
},
"tool_mode": false
},
"showNode": true,
"type": "LoopComponent"
},
"dragging": false,
"id": "LoopComponent-5vFOr",
"measured": {
"height": 305,
"width": 400
},
"position": {
"x": -381.0556267521341,
"y": -161.3531393697653
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "Prompt Template-dKwcS",
"node": {
"base_classes": ["Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {
"template": ["extracted_text"]
},
"description": "Create a prompt template with dynamic variables.",
"display_name": "Prompt Template",
"documentation": "https://docs.langflow.org/components-prompts",
"edited": false,
"error": null,
"field_order": ["template", "use_double_brackets", "tool_placeholder"],
"frozen": false,
"full_path": null,
"icon": "prompts",
"is_composition": null,
"is_input": null,
"is_output": null,
"legacy": false,
"metadata": {
"code_hash": "5b3e6730923e",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 1
},
"module": "lfx.components.models_and_agents.prompt.PromptComponent"
},
"minimized": false,
"name": "",
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Prompt",
"group_outputs": false,
"hidden": null,
"loop_types": null,
"method": "build_prompt",
"name": "prompt",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"priority": null,
"replacement": null,
"template": {
"_type": "Component",
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "from typing import Any\n\nfrom lfx.base.prompts.api_utils import process_prompt_template\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.input_mixin import FieldTypes\nfrom lfx.inputs.inputs import DefaultPromptField\nfrom lfx.io import BoolInput, MessageTextInput, Output, PromptInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dotdict import dotdict\nfrom lfx.schema.message import Message\nfrom lfx.template.utils import update_template_values\nfrom lfx.utils.mustache_security import validate_mustache_template\n\n\nclass PromptComponent(Component):\n display_name: str = \"Prompt Template\"\n description: str = \"Create a prompt template with dynamic variables.\"\n documentation: str = \"https://docs.langflow.org/components-prompts\"\n icon = \"prompts\"\n trace_type = \"prompt\"\n name = \"Prompt Template\"\n\n inputs = [\n PromptInput(name=\"template\", display_name=\"Template\"),\n BoolInput(\n name=\"use_double_brackets\",\n display_name=\"Use Double Brackets\",\n value=False,\n advanced=True,\n info=\"Use {{variable}} syntax instead of {variable}.\",\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"tool_placeholder\",\n display_name=\"Tool Placeholder\",\n tool_mode=True,\n advanced=True,\n info=\"A placeholder input for tool mode.\",\n ),\n ]\n\n outputs = [\n Output(display_name=\"Prompt\", name=\"prompt\", method=\"build_prompt\"),\n ]\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update the template field type based on the selected mode.\"\"\"\n if field_name == \"use_double_brackets\":\n # Change the template field type based on mode\n is_mustache = field_value is True\n if is_mustache:\n build_config[\"template\"][\"type\"] = FieldTypes.MUSTACHE_PROMPT.value\n else:\n build_config[\"template\"][\"type\"] = FieldTypes.PROMPT.value\n\n # Re-process the template to update variables when mode changes\n template_value = build_config.get(\"template\", {}).get(\"value\", \"\")\n if template_value:\n # Ensure custom_fields is properly initialized\n if \"custom_fields\" not in build_config:\n build_config[\"custom_fields\"] = {}\n\n # Clean up fields from the OLD mode before processing with NEW mode\n # This ensures we don't keep fields with wrong syntax even if validation fails\n old_custom_fields = build_config[\"custom_fields\"].get(\"template\", [])\n for old_field in list(old_custom_fields):\n # Remove the field from custom_fields and template\n if old_field in old_custom_fields:\n old_custom_fields.remove(old_field)\n build_config.pop(old_field, None)\n\n # Try to process template with new mode to add new variables\n # If validation fails, at least we cleaned up old fields\n try:\n # Validate mustache templates for security\n if is_mustache:\n validate_mustache_template(template_value)\n\n # Re-process template with new mode to add new variables\n _ = process_prompt_template(\n template=template_value,\n name=\"template\",\n custom_fields=build_config[\"custom_fields\"],\n frontend_node_template=build_config,\n is_mustache=is_mustache,\n )\n except ValueError as e:\n # If validation fails, we still updated the mode and cleaned old fields\n # User will see error when they try to save\n logger.debug(f\"Template validation failed during mode switch: {e}\")\n return build_config\n\n async def build_prompt(self) -> Message:\n use_double_brackets = self.use_double_brackets if hasattr(self, \"use_double_brackets\") else False\n template_format = \"mustache\" if use_double_brackets else \"f-string\"\n prompt = await Message.from_template_and_variables(template_format=template_format, **self._attributes)\n self.status = prompt.text\n return prompt\n\n def _update_template(self, frontend_node: dict):\n prompt_template = frontend_node[\"template\"][\"template\"][\"value\"]\n use_double_brackets = frontend_node[\"template\"].get(\"use_double_brackets\", {}).get(\"value\", False)\n is_mustache = use_double_brackets is True\n\n try:\n # Validate mustache templates for security\n if is_mustache:\n validate_mustache_template(prompt_template)\n\n custom_fields = frontend_node[\"custom_fields\"]\n frontend_node_template = frontend_node[\"template\"]\n _ = process_prompt_template(\n template=prompt_template,\n name=\"template\",\n custom_fields=custom_fields,\n frontend_node_template=frontend_node_template,\n is_mustache=is_mustache,\n )\n except ValueError as e:\n # If validation fails, don't add variables but allow component to be created\n logger.debug(f\"Template validation failed in _update_template: {e}\")\n return frontend_node\n\n async def update_frontend_node(self, new_frontend_node: dict, current_frontend_node: dict):\n \"\"\"This function is called after the code validation is done.\"\"\"\n frontend_node = await super().update_frontend_node(new_frontend_node, current_frontend_node)\n template = frontend_node[\"template\"][\"template\"][\"value\"]\n use_double_brackets = frontend_node[\"template\"].get(\"use_double_brackets\", {}).get(\"value\", False)\n is_mustache = use_double_brackets is True\n\n try:\n # Validate mustache templates for security\n if is_mustache:\n validate_mustache_template(template)\n\n # Kept it duplicated for backwards compatibility\n _ = process_prompt_template(\n template=template,\n name=\"template\",\n custom_fields=frontend_node[\"custom_fields\"],\n frontend_node_template=frontend_node[\"template\"],\n is_mustache=is_mustache,\n )\n except ValueError as e:\n # If validation fails, don't add variables but allow component to be updated\n logger.debug(f\"Template validation failed in update_frontend_node: {e}\")\n # Now that template is updated, we need to grab any values that were set in the current_frontend_node\n # and update the frontend_node with those values\n update_template_values(new_template=frontend_node, previous_template=current_frontend_node[\"template\"])\n return frontend_node\n\n def _get_fallback_input(self, **kwargs):\n return DefaultPromptField(**kwargs)\n"
},
"extracted_text": {
"advanced": false,
"display_name": "extracted_text",
"dynamic": false,
"field_type": "str",
"fileTypes": [],
"file_path": "",
"info": "",
"input_types": ["Message"],
"list": false,
"load_from_db": false,
"multiline": true,
"name": "extracted_text",
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"type": "str",
"value": ""
},
"template": {
"_input_type": "PromptInput",
"advanced": false,
"display_name": "Template",
"dynamic": false,
"info": "",
"list": false,
"list_add_label": "Add More",
"name": "template",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"track_in_telemetry": false,
"type": "prompt",
"value": "คุณเป็นผู้ช่วย AI สำหรับระบบจัดการเอกสารก่อสร้าง LCBP3\nดึง Metadata จากเอกสาร แล้วตอบกลับเป็น JSON เท่านั้น ห้ามมีข้อความอื่น\nเอกสารอาจเป็นภาษาไทย อังกฤษ หรือผสมกัน\n\nReturn ONLY this JSON structure:\n{{\n \"source_file\": \"<ชื่อไฟล์ที่รับมา>\",\n \"is_valid\": true,\n \"confidence\": 0.0,\n \"extracted_text\": \"<ข้อความเต็ม max 2000 chars>\",\n \"metadata\": {{\n \"correspondence_number\": \"<เลขที่เอกสาร หรือ null>\",\n \"title\": \"<หัวข้อเอกสาร>\",\n \"document_date\": \"<YYYY-MM-DD หรือ null>\",\n \"sender_org\": \"<ชื่อย่อองค์กรผู้ส่ง หรือ null>\",\n \"receiver_org\": \"<ชื่อย่อองค์กรผู้รับ หรือ null>\",\n \"project_code\": \"<รหัสโครงการ เช่น LCBP3 หรือ null>\",\n \"suggested_category\": \"<Correspondence | RFA | ContractDrawing | ShopDrawing>\",\n \"detected_issues\": []\n }},\n \"chunks\": [\n {{\"chunk_index\": 0, \"page\": 1, \"text\": \"<ข้อความส่วนแรก max 500 chars>\"}}\n ]\n}}\n\nDocument text to analyze:\n{extracted_text}\n"
},
"tool_placeholder": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Tool Placeholder",
"dynamic": false,
"info": "A placeholder input for tool mode.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "tool_placeholder",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": true,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"use_double_brackets": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Use Double Brackets",
"dynamic": false,
"info": "Use {{variable}} syntax instead of {variable}.",
"list": false,
"list_add_label": "Add More",
"name": "use_double_brackets",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
}
},
"tool_mode": false
},
"showNode": true,
"type": "Prompt Template"
},
"dragging": false,
"id": "Prompt Template-dKwcS",
"measured": {
"height": 429,
"width": 400
},
"position": {
"x": 559.0376004371929,
"y": -533.3166409357893
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "ParserComponent-Xspgr",
"node": {
"base_classes": ["Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Extracts text using a template.",
"display_name": "Parser",
"documentation": "https://docs.langflow.org/parser",
"edited": false,
"field_order": ["input_data", "mode", "pattern", "sep"],
"frozen": false,
"icon": "braces",
"last_updated": "2026-03-13T08:19:27.565Z",
"legacy": false,
"metadata": {
"code_hash": "3cda25c3f7b5",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 1
},
"module": "lfx.components.processing.parser.ParserComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Parsed Text",
"group_outputs": false,
"loop_types": null,
"method": "parse_combined_text",
"name": "parsed_text",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_frontend_node_flow_id": {
"value": "4a538191-04b4-41cf-98d7-8e62aaccf3a8"
},
"_frontend_node_folder_id": {
"value": "60f723dc-b1f8-4e25-9c31-0a4ee07abd5c"
},
"_type": "Component",
"clean_data": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Clean Data",
"dynamic": false,
"info": "Enable to clean the data by removing empty rows and lines in each cell of the DataFrame/ Data object.",
"list": false,
"list_add_label": "Add More",
"name": "clean_data",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "from lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n # Use format_map with a dict that returns default_value for missing keys\n class DefaultDict(dict):\n def __missing__(self, key):\n return data.default_value or \"\"\n\n formatted_text = self.pattern.format_map(DefaultDict(data.data))\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n"
},
"input_data": {
"_input_type": "HandleInput",
"advanced": false,
"display_name": "Data or DataFrame",
"dynamic": false,
"info": "Accepts either a DataFrame or a Data object.",
"input_types": ["DataFrame", "Data"],
"list": false,
"list_add_label": "Add More",
"name": "input_data",
"override_skip": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "other",
"value": ""
},
"is_refresh": false,
"mode": {
"_input_type": "TabInput",
"advanced": false,
"display_name": "Mode",
"dynamic": false,
"info": "Convert into raw string instead of using a template.",
"name": "mode",
"options": ["Parser", "Stringify"],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "tab",
"value": "Stringify"
},
"pattern": {
"_input_type": "MultilineInput",
"advanced": false,
"ai_enabled": false,
"copy_field": false,
"display_name": "Template",
"dynamic": true,
"info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"multiline": true,
"name": "pattern",
"override_skip": false,
"password": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "Text: {text}"
},
"sep": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Separator",
"dynamic": false,
"info": "String used to separate rows/items.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "sep",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "\n"
}
},
"tool_mode": false
},
"showNode": true,
"type": "ParserComponent"
},
"dragging": false,
"id": "ParserComponent-Xspgr",
"measured": {
"height": 310,
"width": 400
},
"position": {
"x": 78.4571915835524,
"y": -393.7689699613337
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "CustomComponent-WriteJsonIdempotent",
"node": {
"base_classes": ["CustomComponent", "Component"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Writes Ollama JSON output dynamically based on Loop Item filename",
"display_name": "Write JSON (Idempotent)",
"documentation": "https://docs.langflow.org/write-file",
"edited": false,
"field_order": [
"storage_location",
"input",
"file_name",
"append_mode",
"local_format",
"aws_format",
"gdrive_format",
"aws_access_key_id",
"aws_secret_access_key",
"bucket_name",
"aws_region",
"s3_prefix",
"service_account_key",
"folder_id"
],
"frozen": false,
"icon": "file-text",
"last_updated": "2026-03-13T08:33:30.601Z",
"legacy": false,
"metadata": {
"code_hash": "6d0e4842271e",
"dependencies": {
"dependencies": [
{
"name": "orjson",
"version": "3.10.15"
},
{
"name": "pandas",
"version": "2.2.3"
},
{
"name": "fastapi",
"version": "0.133.1"
},
{
"name": "lfx",
"version": null
},
{
"name": "langflow",
"version": null
},
{
"name": "boto3",
"version": "1.40.61"
},
{
"name": "googleapiclient",
"version": "2.154.0"
}
],
"total_dependencies": 7
},
"module": "lfx.components.files_and_knowledge.save_file.SaveToFileComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "File Path",
"group_outputs": false,
"hidden": null,
"loop_types": null,
"method": "save_to_file",
"name": "message",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"code": {
"value": "from langflow.custom import Component\nfrom langflow.io import StrInput, DataInput, Output\nfrom langflow.schema import Data\nimport json\nimport os\nfrom pathlib import Path\n\nclass WriteJsonIdempotent(Component):\n display_name = \"Write JSON (Idempotent)\"\n description = \"Writes JSON to staging_ai dynamically based on loop item filename\"\n \n inputs = [\n StrInput(name=\"json_content\", display_name=\"JSON Content\"),\n DataInput(name=\"loop_item\", display_name=\"Loop Item (PDF)\"),\n ]\n \n outputs = [\n Output(display_name=\"Result Path\", name=\"result_path\", method=\"write_file\")\n ]\n \n def write_file(self) -> Data:\n # Extract filename from loop_item\n pdf_path = self.loop_item.data.get(\"file_path\", \"\")\n if not pdf_path:\n return Data(data={\"error\": \"No file_path in loop item\"})\n \n base_name = Path(pdf_path).stem\n out_dir = Path(\"/data/staging_ai/rag-output\")\n out_dir.mkdir(parents=True, exist_ok=True)\n \n json_path = out_dir / f\"{base_name}.json\"\n \n # Idempotency check\n if json_path.exists():\n return Data(data={\"status\": \"skipped\", \"path\": str(json_path), \"reason\": \"already exists\"})\n \n # Parse and write content to ensure it's valid JSON before saving\n try:\n parsed = json.loads(self.json_content)\n # Inject source file name if missing\n if not parsed.get(\"source_file\"):\n parsed[\"source_file\"] = f\"{base_name}.pdf\"\n \n tmp_path = out_dir / f\"{base_name}.tmp\"\n with open(tmp_path, \"w\", encoding=\"utf-8\") as f:\n json.dump(parsed, f, ensure_ascii=False, indent=2)\n \n # Atomic rename\n os.replace(tmp_path, json_path)\n \n return Data(data={\"status\": \"written\", \"path\": str(json_path)})\n except Exception as e:\n err_path = out_dir / f\"{base_name}.error\"\n with open(err_path, \"w\", encoding=\"utf-8\") as f:\n f.write(f\"Error parsing JSON from API: {str(e)}\\n\\nContent:\\n{self.json_content}\")\n return Data(data={\"status\": \"error\", \"path\": str(err_path), \"error\": str(e)})\n"
},
"json_content": {},
"loop_item": {}
},
"tool_mode": false
},
"showNode": true,
"type": "CustomComponent"
},
"dragging": false,
"id": "CustomComponent-WriteJsonIdempotent",
"measured": {
"height": 488,
"width": 400
},
"position": {
"x": 1055.8955765668504,
"y": 296.8332683000652
},
"selected": false,
"type": "genericNode"
}
],
"viewport": {
"x": -209.03647850835887,
"y": 116.68545635016744,
"zoom": 0.954841619659289
}
},
"description": "Language Models, Mapped and Mastered.",
"endpoint_name": null,
"id": "4a538191-04b4-41cf-98d7-8e62aaccf3a8",
"is_component": false,
"last_tested_version": "1.8.0",
"locked": false,
"name": "OpenRAG V0.1",
"tags": []
}