Files
admin 11984bfa29
CI Pipeline / build (push) Failing after 12m41s
Build and Deploy / deploy (push) Failing after 2m44s
260322:1648 Correct Coresspondence / Doing RFA / Correct CI
2026-03-22 16:48:12 +07:00

2735 lines
235 KiB
JSON

{
"data": {
"edges": [
{
"animated": false,
"className": "",
"data": {
"sourceHandle": {
"dataType": "File",
"id": "File-5V2fL",
"name": "dataframe",
"output_types": ["DataFrame"]
},
"targetHandle": {
"fieldName": "data",
"id": "LoopComponent-5vFOr",
"inputTypes": ["DataFrame"],
"type": "other"
}
},
"id": "xy-edge__File-5V2fL{œdataTypeœ:œFileœ,œidœ:œFile-5V2fLœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-LoopComponent-5vFOr{œfieldNameœ:œdataœ,œidœ:œLoopComponent-5vFOrœ,œinputTypesœ:[œDataFrameœ],œtypeœ:œotherœ}",
"selected": false,
"source": "File-5V2fL",
"sourceHandle": "{œdataTypeœ:œFileœ,œidœ:œFile-5V2fLœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}",
"target": "LoopComponent-5vFOr",
"targetHandle": "{œfieldNameœ:œdataœ,œidœ:œLoopComponent-5vFOrœ,œinputTypesœ:[œDataFrameœ],œtypeœ:œotherœ}"
},
{
"animated": false,
"className": "",
"data": {
"sourceHandle": {
"dataType": "Prompt Template",
"id": "Prompt Template-dKwcS",
"name": "prompt",
"output_types": ["Message"]
},
"targetHandle": {
"fieldName": "system_message",
"id": "OllamaModel-xJSnu",
"inputTypes": ["Message"],
"type": "str"
}
},
"id": "xy-edge__Prompt Template-dKwcS{œdataTypeœ:œPrompt Templateœ,œidœ:œPrompt Template-dKwcSœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OllamaModel-xJSnu{œfieldNameœ:œsystem_messageœ,œidœ:œOllamaModel-xJSnuœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
"selected": false,
"source": "Prompt Template-dKwcS",
"sourceHandle": "{œdataTypeœ:œPrompt Templateœ,œidœ:œPrompt Template-dKwcSœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}",
"target": "OllamaModel-xJSnu",
"targetHandle": "{œfieldNameœ:œsystem_messageœ,œidœ:œOllamaModel-xJSnuœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "LoopComponent",
"id": "LoopComponent-5vFOr",
"name": "item",
"output_types": ["Data"]
},
"targetHandle": {
"fieldName": "input_data",
"id": "ParserComponent-Xspgr",
"inputTypes": ["DataFrame", "Data"],
"type": "other"
}
},
"id": "xy-edge__LoopComponent-5vFOr{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ]}-ParserComponent-Xspgr{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-Xspgrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}",
"selected": false,
"source": "LoopComponent-5vFOr",
"sourceHandle": "{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ]}",
"target": "ParserComponent-Xspgr",
"targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-Xspgrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "ParserComponent",
"id": "ParserComponent-Xspgr",
"name": "parsed_text",
"output_types": ["Message"]
},
"targetHandle": {
"fieldName": "extracted_text",
"id": "Prompt Template-dKwcS",
"inputTypes": ["Message"],
"type": "str"
}
},
"id": "xy-edge__ParserComponent-Xspgr{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-Xspgrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt Template-dKwcS{œfieldNameœ:œextracted_textœ,œidœ:œPrompt Template-dKwcSœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}",
"selected": false,
"source": "ParserComponent-Xspgr",
"sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-Xspgrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}",
"target": "Prompt Template-dKwcS",
"targetHandle": "{œfieldNameœ:œextracted_textœ,œidœ:œPrompt Template-dKwcSœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "OllamaModel",
"id": "OllamaModel-xJSnu",
"name": "text_output",
"output_types": ["Message"]
},
"targetHandle": {
"fieldName": "input",
"id": "SaveToFile-M0RUY",
"inputTypes": ["Data", "DataFrame", "Message"],
"type": "other"
}
},
"id": "xy-edge__OllamaModel-xJSnu{œdataTypeœ:œOllamaModelœ,œidœ:œOllamaModel-xJSnuœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-SaveToFile-M0RUY{œfieldNameœ:œinputœ,œidœ:œSaveToFile-M0RUYœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}",
"selected": false,
"source": "OllamaModel-xJSnu",
"sourceHandle": "{œdataTypeœ:œOllamaModelœ,œidœ:œOllamaModel-xJSnuœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}",
"target": "SaveToFile-M0RUY",
"targetHandle": "{œfieldNameœ:œinputœ,œidœ:œSaveToFile-M0RUYœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}"
},
{
"animated": false,
"data": {
"sourceHandle": {
"dataType": "SaveToFile",
"id": "SaveToFile-M0RUY",
"name": "message",
"output_types": ["Message"]
},
"targetHandle": {
"dataType": "LoopComponent",
"id": "LoopComponent-5vFOr",
"name": "item",
"output_types": ["Data", "Message"]
}
},
"id": "xy-edge__SaveToFile-M0RUY{œdataTypeœ:œSaveToFileœ,œidœ:œSaveToFile-M0RUYœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-LoopComponent-5vFOr{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ,œMessageœ]}",
"selected": false,
"source": "SaveToFile-M0RUY",
"sourceHandle": "{œdataTypeœ:œSaveToFileœ,œidœ:œSaveToFile-M0RUYœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}",
"target": "LoopComponent-5vFOr",
"targetHandle": "{œdataTypeœ:œLoopComponentœ,œidœ:œLoopComponent-5vFOrœ,œnameœ:œitemœ,œoutput_typesœ:[œDataœ,œMessageœ]}"
}
],
"nodes": [
{
"data": {
"id": "File-5V2fL",
"node": {
"base_classes": ["Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Loads and returns the content from uploaded files.",
"display_name": "Read File",
"documentation": "https://docs.langflow.org/read-file",
"edited": false,
"field_order": [
"storage_location",
"path",
"file_path",
"separator",
"silent_errors",
"delete_server_file_after_processing",
"ignore_unsupported_extensions",
"ignore_unspecified_files",
"file_path_str",
"aws_access_key_id",
"aws_secret_access_key",
"bucket_name",
"aws_region",
"s3_file_key",
"service_account_key",
"file_id",
"advanced_mode",
"pipeline",
"ocr_engine",
"md_image_placeholder",
"md_page_break_placeholder",
"doc_key",
"use_multithreading",
"concurrency_multithreading",
"markdown"
],
"frozen": false,
"icon": "file-text",
"last_updated": "2026-03-13T07:48:58.791Z",
"legacy": false,
"lf_version": "1.8.0",
"metadata": {
"code_hash": "12a5841f1a03",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
},
{
"name": "langchain_core",
"version": "0.3.83"
},
{
"name": "pydantic",
"version": "2.11.10"
},
{
"name": "googleapiclient",
"version": "2.154.0"
}
],
"total_dependencies": 4
},
"module": "lfx.components.files_and_knowledge.file.FileComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Files",
"group_outputs": false,
"hidden": null,
"loop_types": null,
"method": "load_files",
"name": "dataframe",
"options": null,
"required_inputs": null,
"selected": "DataFrame",
"tool_mode": true,
"types": ["DataFrame"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_frontend_node_flow_id": {
"value": "4a538191-04b4-41cf-98d7-8e62aaccf3a8"
},
"_frontend_node_folder_id": {
"value": "60f723dc-b1f8-4e25-9c31-0a4ee07abd5c"
},
"_type": "Component",
"advanced_mode": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Advanced Parser",
"dynamic": false,
"info": "Enable advanced document processing and export with Docling for PDFs, images, and office documents. Note that advanced document processing can consume significant resources.",
"list": false,
"list_add_label": "Add More",
"name": "advanced_mode",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"aws_access_key_id": {
"_input_type": "SecretStrInput",
"advanced": false,
"display_name": "AWS Access Key ID",
"dynamic": false,
"info": "AWS Access key ID.",
"input_types": [],
"load_from_db": false,
"name": "aws_access_key_id",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"aws_region": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "AWS Region",
"dynamic": false,
"info": "AWS region (e.g., us-east-1, eu-west-1).",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "aws_region",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"aws_secret_access_key": {
"_input_type": "SecretStrInput",
"advanced": false,
"display_name": "AWS Secret Key",
"dynamic": false,
"info": "AWS Secret Key.",
"input_types": [],
"load_from_db": false,
"name": "aws_secret_access_key",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"bucket_name": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "S3 Bucket Name",
"dynamic": false,
"info": "Enter the name of the S3 bucket.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "bucket_name",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "\"\"\"Enhanced file component with Docling support and process isolation.\n\nNotes:\n-----\n- ALL Docling parsing/export runs in a separate OS process to prevent memory\n growth and native library state from impacting the main Langflow process.\n- Standard text/structured parsing continues to use existing BaseFileComponent\n utilities (and optional threading via `parallel_load_data`).\n\"\"\"\n\nfrom __future__ import annotations\n\nimport contextlib\nimport json\nimport subprocess\nimport sys\nimport textwrap\nfrom copy import deepcopy\nfrom pathlib import Path\nfrom tempfile import NamedTemporaryFile\nfrom typing import Any\n\nfrom lfx.base.data.base_file import BaseFileComponent\nfrom lfx.base.data.storage_utils import parse_storage_path, read_file_bytes, validate_image_content_type\nfrom lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom lfx.inputs import SortableListInput\nfrom lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput\nfrom lfx.io import BoolInput, FileInput, IntInput, Output, SecretStrInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame # noqa: TC001\nfrom lfx.schema.message import Message\nfrom lfx.services.deps import get_settings_service, get_storage_service\nfrom lfx.utils.async_helpers import run_until_complete\nfrom lfx.utils.validate_cloud import is_astra_cloud_environment\n\n\ndef _get_storage_location_options():\n \"\"\"Get storage location options, filtering out Local if in Astra cloud environment.\"\"\"\n all_options = [{\"name\": \"AWS\", \"icon\": \"Amazon\"}, {\"name\": \"Google Drive\", \"icon\": \"google\"}]\n if is_astra_cloud_environment():\n return all_options\n return [{\"name\": \"Local\", \"icon\": \"hard-drive\"}, *all_options]\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"File component with optional Docling processing (isolated in a subprocess).\"\"\"\n\n display_name = \"Read File\"\n # description is now a dynamic property - see get_tool_description()\n _base_description = \"Loads content from one or more files.\"\n documentation: str = \"https://docs.langflow.org/read-file\"\n icon = \"file-text\"\n name = \"File\"\n add_tool_output = True # Enable tool mode toggle without requiring tool_mode inputs\n\n # Extensions that can be processed without Docling (using standard text parsing)\n TEXT_EXTENSIONS = TEXT_FILE_TYPES\n\n # Extensions that require Docling for processing (images, advanced office formats, etc.)\n DOCLING_ONLY_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"jpg\",\n \"jpeg\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"webp\",\n ]\n\n # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.\n VALID_EXTENSIONS = [\n *TEXT_EXTENSIONS,\n *DOCLING_ONLY_EXTENSIONS,\n ]\n\n # Fixed export settings used when markdown export is requested.\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())\n\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n input_item.tool_mode = False # Disable tool mode for file upload input\n input_item.required = False # Make it optional so it doesn't error in tool mode\n break\n\n inputs = [\n SortableListInput(\n name=\"storage_location\",\n display_name=\"Storage Location\",\n placeholder=\"Select Location\",\n info=\"Choose where to read the file from.\",\n options=_get_storage_location_options(),\n real_time_refresh=True,\n limit=1,\n value=[{\"name\": \"Local\", \"icon\": \"hard-drive\"}],\n advanced=True,\n ),\n *_base_inputs,\n StrInput(\n name=\"file_path_str\",\n display_name=\"File Path\",\n info=(\n \"Path to the file to read. Used when component is called as a tool. \"\n \"If not provided, will use the uploaded file from 'path' input.\"\n ),\n show=False,\n advanced=True,\n tool_mode=True, # Required for Toolset toggle, but _get_tools() ignores this parameter\n required=False,\n ),\n # AWS S3 specific inputs\n SecretStrInput(\n name=\"aws_access_key_id\",\n display_name=\"AWS Access Key ID\",\n info=\"AWS Access key ID.\",\n show=False,\n advanced=False,\n required=True,\n ),\n SecretStrInput(\n name=\"aws_secret_access_key\",\n display_name=\"AWS Secret Key\",\n info=\"AWS Secret Key.\",\n show=False,\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"bucket_name\",\n display_name=\"S3 Bucket Name\",\n info=\"Enter the name of the S3 bucket.\",\n show=False,\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"aws_region\",\n display_name=\"AWS Region\",\n info=\"AWS region (e.g., us-east-1, eu-west-1).\",\n show=False,\n advanced=False,\n ),\n StrInput(\n name=\"s3_file_key\",\n display_name=\"S3 File Key\",\n info=\"The key (path) of the file in S3 bucket.\",\n show=False,\n advanced=False,\n required=True,\n ),\n # Google Drive specific inputs\n SecretStrInput(\n name=\"service_account_key\",\n display_name=\"GCP Credentials Secret Key\",\n info=\"Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).\",\n show=False,\n advanced=False,\n required=True,\n ),\n StrInput(\n name=\"file_id\",\n display_name=\"Google Drive File ID\",\n info=(\"The Google Drive file ID to read. The file must be shared with the service account email.\"),\n show=False,\n advanced=False,\n required=True,\n ),\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Note that advanced document processing can consume significant resources.\"\n ),\n # Disabled in cloud\n show=not is_astra_cloud_environment(),\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"None\", \"easyocr\"],\n value=\"easyocr\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"<!-- image -->\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n # Deprecated input retained for backward-compatibility.\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n ]\n\n # ------------------------------ Tool description with file names --------------\n\n def get_tool_description(self) -> str:\n \"\"\"Return a dynamic description that includes the names of uploaded files.\n\n This helps the Agent understand which files are available to read.\n \"\"\"\n base_description = \"Loads and returns the content from uploaded files.\"\n\n # Get the list of uploaded file paths\n file_paths = getattr(self, \"path\", None)\n if not file_paths:\n return base_description\n\n # Ensure it's a list\n if not isinstance(file_paths, list):\n file_paths = [file_paths]\n\n # Extract just the file names from the paths\n file_names = []\n for fp in file_paths:\n if fp:\n name = Path(fp).name\n file_names.append(name)\n\n if file_names:\n files_str = \", \".join(file_names)\n return f\"{base_description} Available files: {files_str}. Call this tool to read these files.\"\n\n return base_description\n\n @property\n def description(self) -> str:\n \"\"\"Dynamic description property that includes uploaded file names.\"\"\"\n return self.get_tool_description()\n\n async def _get_tools(self) -> list:\n \"\"\"Override to create a tool without parameters.\n\n The Read File component should use the files already uploaded via UI,\n not accept file paths from the Agent (which wouldn't know the internal paths).\n \"\"\"\n from langchain_core.tools import StructuredTool\n from pydantic import BaseModel\n\n # Empty schema - no parameters needed\n class EmptySchema(BaseModel):\n \"\"\"No parameters required - uses pre-uploaded files.\"\"\"\n\n async def read_files_tool() -> str:\n \"\"\"Read the content of uploaded files.\"\"\"\n try:\n result = self.load_files_message()\n if hasattr(result, \"get_text\"):\n return result.get_text()\n if hasattr(result, \"text\"):\n return result.text\n return str(result)\n except (FileNotFoundError, ValueError, OSError, RuntimeError) as e:\n return f\"Error reading files: {e}\"\n\n description = self.get_tool_description()\n\n tool = StructuredTool(\n name=\"load_files_message\",\n description=description,\n coroutine=read_files_tool,\n args_schema=EmptySchema,\n handle_tool_error=True,\n tags=[\"load_files_message\"],\n metadata={\n \"display_name\": \"Read File\",\n \"display_description\": description,\n },\n )\n\n return [tool]\n\n # ------------------------------ UI helpers --------------------------------------\n\n def _path_value(self, template: dict) -> list[str]:\n \"\"\"Return the list of currently selected file paths from the template.\"\"\"\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def _disable_docling_fields_in_cloud(self, build_config: dict[str, Any]) -> None:\n \"\"\"Disable all Docling-related fields in cloud environments.\"\"\"\n if \"advanced_mode\" in build_config:\n build_config[\"advanced_mode\"][\"show\"] = False\n build_config[\"advanced_mode\"][\"value\"] = False\n # Hide all Docling-related fields\n docling_fields = (\"pipeline\", \"ocr_engine\", \"doc_key\", \"md_image_placeholder\", \"md_page_break_placeholder\")\n for field in docling_fields:\n if field in build_config:\n build_config[field][\"show\"] = False\n # Also disable OCR engine specifically\n if \"ocr_engine\" in build_config:\n build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Show/hide Advanced Parser and related fields based on selection context.\"\"\"\n # Update storage location options dynamically based on cloud environment\n if \"storage_location\" in build_config:\n updated_options = _get_storage_location_options()\n build_config[\"storage_location\"][\"options\"] = updated_options\n\n # Handle storage location selection\n if field_name == \"storage_location\":\n # Extract selected storage location\n selected = [location[\"name\"] for location in field_value] if isinstance(field_value, list) else []\n\n # Hide all storage-specific fields first\n storage_fields = [\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_file_key\",\n \"service_account_key\",\n \"file_id\",\n ]\n\n for f_name in storage_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = False\n\n # Show fields based on selected storage location\n if len(selected) == 1:\n location = selected[0]\n\n if location == \"Local\":\n # Show file upload input for local storage\n if \"path\" in build_config:\n build_config[\"path\"][\"show\"] = True\n\n elif location == \"AWS\":\n # Hide file upload input, show AWS fields\n if \"path\" in build_config:\n build_config[\"path\"][\"show\"] = False\n\n aws_fields = [\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_file_key\",\n ]\n for f_name in aws_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n build_config[f_name][\"advanced\"] = False\n\n elif location == \"Google Drive\":\n # Hide file upload input, show Google Drive fields\n if \"path\" in build_config:\n build_config[\"path\"][\"show\"] = False\n\n gdrive_fields = [\"service_account_key\", \"file_id\"]\n for f_name in gdrive_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n build_config[f_name][\"advanced\"] = False\n # No storage location selected - show file upload by default\n elif \"path\" in build_config:\n build_config[\"path\"][\"show\"] = True\n\n return build_config\n\n if field_name == \"path\":\n paths = self._path_value(build_config)\n\n # Disable in cloud environments\n if is_astra_cloud_environment():\n self._disable_docling_fields_in_cloud(build_config)\n else:\n # If all files can be processed by docling, do so\n allow_advanced = all(not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")) for file_path in paths)\n build_config[\"advanced_mode\"][\"show\"] = allow_advanced\n if not allow_advanced:\n build_config[\"advanced_mode\"][\"value\"] = False\n docling_fields = (\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n )\n for field in docling_fields:\n if field in build_config:\n build_config[field][\"show\"] = False\n\n # Docling Processing\n elif field_name == \"advanced_mode\":\n # Disable in cloud environments - don't show Docling fields even if advanced_mode is toggled\n if is_astra_cloud_environment():\n self._disable_docling_fields_in_cloud(build_config)\n else:\n docling_fields = (\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n )\n for field in docling_fields:\n if field in build_config:\n build_config[field][\"show\"] = bool(field_value)\n if field == \"pipeline\":\n build_config[field][\"advanced\"] = not bool(field_value)\n\n elif field_name == \"pipeline\":\n # Disable in cloud environments - don't show OCR engine even if pipeline is changed\n if is_astra_cloud_environment():\n self._disable_docling_fields_in_cloud(build_config)\n elif field_value == \"standard\":\n build_config[\"ocr_engine\"][\"show\"] = True\n build_config[\"ocr_engine\"][\"value\"] = \"easyocr\"\n else:\n build_config[\"ocr_engine\"][\"show\"] = False\n build_config[\"ocr_engine\"][\"value\"] = \"None\"\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on file count/type and advanced mode.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\", \"pipeline\"]:\n return frontend_node\n\n template = frontend_node.get(\"template\", {})\n paths = self._path_value(template)\n if not paths:\n return frontend_node\n\n frontend_node[\"outputs\"] = []\n if len(paths) == 1:\n file_path = paths[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Structured Content\",\n name=\"dataframe\",\n method=\"load_files_structured\",\n tool_mode=True,\n ),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\", tool_mode=True),\n )\n\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n if advanced_mode:\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Structured Output\",\n name=\"advanced_dataframe\",\n method=\"load_files_dataframe\",\n tool_mode=True,\n ),\n )\n frontend_node[\"outputs\"].append(\n Output(\n display_name=\"Markdown\", name=\"advanced_markdown\", method=\"load_files_markdown\", tool_mode=True\n ),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n )\n else:\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\", tool_mode=True),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\", tool_mode=True),\n )\n else:\n # Multiple files => DataFrame output; advanced parser disabled\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\", tool_mode=True)\n )\n\n return frontend_node\n\n # ------------------------------ Core processing ----------------------------------\n\n def _get_selected_storage_location(self) -> str:\n \"\"\"Get the selected storage location from the SortableListInput.\"\"\"\n if hasattr(self, \"storage_location\") and self.storage_location:\n if isinstance(self.storage_location, list) and len(self.storage_location) > 0:\n return self.storage_location[0].get(\"name\", \"\")\n if isinstance(self.storage_location, dict):\n return self.storage_location.get(\"name\", \"\")\n return \"Local\" # Default to Local if not specified\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Override to handle file_path_str input from tool mode and cloud storage.\n\n Priority:\n 1. Cloud storage (AWS/Google Drive) if selected\n 2. file_path_str (if provided by the tool call)\n 3. path (uploaded file from UI)\n \"\"\"\n storage_location = self._get_selected_storage_location()\n\n # Handle AWS S3\n if storage_location == \"AWS\":\n return self._read_from_aws_s3()\n\n # Handle Google Drive\n if storage_location == \"Google Drive\":\n return self._read_from_google_drive()\n\n # Handle Local storage\n # Check if file_path_str is provided (from tool mode)\n file_path_str = getattr(self, \"file_path_str\", None)\n if file_path_str:\n # Use the string path from tool mode\n from pathlib import Path\n\n from lfx.schema.data import Data\n\n # Use same resolution logic as BaseFileComponent (support storage paths)\n path_str = str(file_path_str)\n if parse_storage_path(path_str):\n try:\n resolved_path = Path(self.get_full_path(path_str))\n except (ValueError, AttributeError):\n resolved_path = Path(self.resolve_path(path_str))\n else:\n resolved_path = Path(self.resolve_path(path_str))\n\n if not resolved_path.exists():\n msg = f\"File or directory not found: {file_path_str}\"\n self.log(msg)\n if not self.silent_errors:\n raise ValueError(msg)\n return []\n\n data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(resolved_path)})\n return [BaseFileComponent.BaseFile(data_obj, resolved_path, delete_after_processing=False)]\n\n # Otherwise use the default implementation (uses path FileInput)\n return super()._validate_and_resolve_paths()\n\n def _read_from_aws_s3(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Read file from AWS S3.\"\"\"\n from lfx.base.data.cloud_storage_utils import create_s3_client, validate_aws_credentials\n\n # Validate AWS credentials\n validate_aws_credentials(self)\n if not getattr(self, \"s3_file_key\", None):\n msg = \"S3 File Key is required\"\n raise ValueError(msg)\n\n # Create S3 client\n s3_client = create_s3_client(self)\n\n # Download file to temp location\n import tempfile\n\n # Get file extension from S3 key\n file_extension = Path(self.s3_file_key).suffix or \"\"\n\n with tempfile.NamedTemporaryFile(mode=\"wb\", suffix=file_extension, delete=False) as temp_file:\n temp_file_path = temp_file.name\n try:\n s3_client.download_fileobj(self.bucket_name, self.s3_file_key, temp_file)\n except Exception as e:\n # Clean up temp file on failure\n with contextlib.suppress(OSError):\n Path(temp_file_path).unlink()\n msg = f\"Failed to download file from S3: {e}\"\n raise RuntimeError(msg) from e\n\n # Create BaseFile object\n from lfx.schema.data import Data\n\n temp_path = Path(temp_file_path)\n data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(temp_path)})\n return [BaseFileComponent.BaseFile(data_obj, temp_path, delete_after_processing=True)]\n\n def _read_from_google_drive(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Read file from Google Drive.\"\"\"\n import tempfile\n\n from googleapiclient.http import MediaIoBaseDownload\n\n from lfx.base.data.cloud_storage_utils import create_google_drive_service\n\n # Validate Google Drive credentials\n if not getattr(self, \"service_account_key\", None):\n msg = \"GCP Credentials Secret Key is required for Google Drive storage\"\n raise ValueError(msg)\n if not getattr(self, \"file_id\", None):\n msg = \"Google Drive File ID is required\"\n raise ValueError(msg)\n\n # Create Google Drive service with read-only scope\n drive_service = create_google_drive_service(\n self.service_account_key, scopes=[\"https://www.googleapis.com/auth/drive.readonly\"]\n )\n\n # Get file metadata to determine file name and extension\n try:\n file_metadata = drive_service.files().get(fileId=self.file_id, fields=\"name,mimeType\").execute()\n file_name = file_metadata.get(\"name\", \"download\")\n except Exception as e:\n msg = (\n f\"Unable to access file with ID '{self.file_id}'. \"\n f\"Error: {e!s}. \"\n \"Please ensure: 1) The file ID is correct, 2) The file exists, \"\n \"3) The service account has been granted access to this file.\"\n )\n raise ValueError(msg) from e\n\n # Download file to temp location\n file_extension = Path(file_name).suffix or \"\"\n with tempfile.NamedTemporaryFile(mode=\"wb\", suffix=file_extension, delete=False) as temp_file:\n temp_file_path = temp_file.name\n try:\n request = drive_service.files().get_media(fileId=self.file_id)\n downloader = MediaIoBaseDownload(temp_file, request)\n done = False\n while not done:\n _status, done = downloader.next_chunk()\n except Exception as e:\n # Clean up temp file on failure\n with contextlib.suppress(OSError):\n Path(temp_file_path).unlink()\n msg = f\"Failed to download file from Google Drive: {e}\"\n raise RuntimeError(msg) from e\n\n # Create BaseFile object\n from lfx.schema.data import Data\n\n temp_path = Path(temp_file_path)\n data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: str(temp_path)})\n return [BaseFileComponent.BaseFile(data_obj, temp_path, delete_after_processing=True)]\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Lightweight extension gate for Docling-compatible types.\"\"\"\n docling_exts = (\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpg\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n )\n return file_path.lower().endswith(docling_exts)\n\n async def _get_local_file_for_docling(self, file_path: str) -> tuple[str, bool]:\n \"\"\"Get a local file path for Docling processing, downloading from S3 if needed.\n\n Args:\n file_path: Either a local path or S3 key (format \"flow_id/filename\")\n\n Returns:\n tuple[str, bool]: (local_path, should_delete) where should_delete indicates\n if this is a temporary file that should be cleaned up\n \"\"\"\n settings = get_settings_service().settings\n if settings.storage_type == \"local\":\n return file_path, False\n\n # S3 storage - download to temp file\n parsed = parse_storage_path(file_path)\n if not parsed:\n msg = f\"Invalid S3 path format: {file_path}. Expected 'flow_id/filename'\"\n raise ValueError(msg)\n\n storage_service = get_storage_service()\n flow_id, filename = parsed\n\n # Get file content from S3\n content = await storage_service.get_file(flow_id, filename)\n\n suffix = Path(filename).suffix\n with NamedTemporaryFile(mode=\"wb\", suffix=suffix, delete=False) as tmp_file:\n tmp_file.write(content)\n temp_path = tmp_file.name\n\n return temp_path, True\n\n def _process_docling_in_subprocess(self, file_path: str) -> Data | None:\n \"\"\"Run Docling in a separate OS process and map the result to a Data object.\n\n We avoid multiprocessing pickling by launching `python -c \"<script>\"` and\n passing JSON config via stdin. The child prints a JSON result to stdout.\n\n For S3 storage, the file is downloaded to a temp file first.\n \"\"\"\n if not file_path:\n return None\n\n settings = get_settings_service().settings\n if settings.storage_type == \"s3\":\n local_path, should_delete = run_until_complete(self._get_local_file_for_docling(file_path))\n else:\n local_path = file_path\n should_delete = False\n\n try:\n return self._process_docling_subprocess_impl(local_path, file_path)\n finally:\n # Clean up temp file if we created one\n if should_delete:\n with contextlib.suppress(Exception):\n Path(local_path).unlink() # Ignore cleanup errors\n\n def _process_docling_subprocess_impl(self, local_file_path: str, original_file_path: str) -> Data | None:\n \"\"\"Implementation of Docling subprocess processing.\n\n Args:\n local_file_path: Path to local file to process\n original_file_path: Original file path to include in metadata\n Returns:\n Data object with processed content\n \"\"\"\n args: dict[str, Any] = {\n \"file_path\": local_file_path,\n \"markdown\": bool(self.markdown),\n \"image_mode\": str(self.IMAGE_MODE),\n \"md_image_placeholder\": str(self.md_image_placeholder),\n \"md_page_break_placeholder\": str(self.md_page_break_placeholder),\n \"pipeline\": str(self.pipeline),\n \"ocr_engine\": (\n self.ocr_engine if self.ocr_engine and self.ocr_engine != \"None\" and self.pipeline != \"vlm\" else None\n ),\n }\n\n # Child script for isolating the docling processing\n child_script = textwrap.dedent(\n r\"\"\"\n import json, sys\n\n def try_imports():\n try:\n from docling.datamodel.base_models import ConversionStatus, InputFormat # type: ignore\n from docling.document_converter import DocumentConverter # type: ignore\n from docling_core.types.doc import ImageRefMode # type: ignore\n return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, \"latest\"\n except Exception as e:\n raise e\n\n def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):\n # --- Standard PDF/IMAGE pipeline (your existing behavior), with optional OCR ---\n if pipeline == \"standard\":\n try:\n from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore\n from docling.document_converter import PdfFormatOption # type: ignore\n\n pipe = PdfPipelineOptions()\n pipe.do_ocr = False\n\n if ocr_engine:\n try:\n from docling.models.factories import get_ocr_factory # type: ignore\n pipe.do_ocr = True\n fac = get_ocr_factory(allow_external_plugins=False)\n pipe.ocr_options = fac.create_options(kind=ocr_engine)\n except Exception:\n # If OCR setup fails, disable it\n pipe.do_ocr = False\n\n fmt = {}\n if hasattr(input_format, \"PDF\"):\n fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(pipeline_options=pipe)\n if hasattr(input_format, \"IMAGE\"):\n fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(pipeline_options=pipe)\n\n return DocumentConverter(format_options=fmt)\n except Exception:\n return DocumentConverter()\n\n # --- Vision-Language Model (VLM) pipeline ---\n if pipeline == \"vlm\":\n try:\n from docling.datamodel.pipeline_options import VlmPipelineOptions\n from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX, GRANITEDOCLING_TRANSFORMERS\n from docling.document_converter import PdfFormatOption\n from docling.pipeline.vlm_pipeline import VlmPipeline\n\n vl_pipe = VlmPipelineOptions(\n vlm_options=GRANITEDOCLING_TRANSFORMERS,\n )\n\n if sys.platform == \"darwin\":\n try:\n import mlx_vlm\n vl_pipe.vlm_options = GRANITEDOCLING_MLX\n except ImportError as e:\n raise e\n\n # VLM paths generally don't need OCR; keep OCR off by default here.\n fmt = {}\n if hasattr(input_format, \"PDF\"):\n fmt[getattr(input_format, \"PDF\")] = PdfFormatOption(\n pipeline_cls=VlmPipeline,\n pipeline_options=vl_pipe\n )\n if hasattr(input_format, \"IMAGE\"):\n fmt[getattr(input_format, \"IMAGE\")] = PdfFormatOption(\n pipeline_cls=VlmPipeline,\n pipeline_options=vl_pipe\n )\n\n return DocumentConverter(format_options=fmt)\n except Exception as e:\n raise e\n\n # --- Fallback: default converter with no special options ---\n return DocumentConverter()\n\n def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):\n try:\n mode = getattr(ImageRefMode, image_mode.upper(), image_mode)\n return document.export_to_markdown(\n image_mode=mode,\n image_placeholder=img_ph,\n page_break_placeholder=pg_ph,\n )\n except Exception:\n try:\n return document.export_to_text()\n except Exception:\n return str(document)\n\n def to_rows(doc_dict):\n rows = []\n for t in doc_dict.get(\"texts\", []):\n prov = t.get(\"prov\") or []\n page_no = None\n if prov and isinstance(prov, list) and isinstance(prov[0], dict):\n page_no = prov[0].get(\"page_no\")\n rows.append({\n \"page_no\": page_no,\n \"label\": t.get(\"label\"),\n \"text\": t.get(\"text\"),\n \"level\": t.get(\"level\"),\n })\n return rows\n\n def main():\n cfg = json.loads(sys.stdin.read())\n file_path = cfg[\"file_path\"]\n markdown = cfg[\"markdown\"]\n image_mode = cfg[\"image_mode\"]\n img_ph = cfg[\"md_image_placeholder\"]\n pg_ph = cfg[\"md_page_break_placeholder\"]\n pipeline = cfg[\"pipeline\"]\n ocr_engine = cfg.get(\"ocr_engine\")\n meta = {\"file_path\": file_path}\n\n try:\n ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()\n converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)\n try:\n res = converter.convert(file_path)\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling conversion error: {e}\", \"meta\": meta}))\n return\n\n ok = False\n if hasattr(res, \"status\"):\n try:\n ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == \"success\")\n except Exception:\n ok = (str(res.status).lower() == \"success\")\n if not ok and hasattr(res, \"document\"):\n ok = getattr(res, \"document\", None) is not None\n if not ok:\n print(json.dumps({\"ok\": False, \"error\": \"Docling conversion failed\", \"meta\": meta}))\n return\n\n doc = getattr(res, \"document\", None)\n if doc is None:\n print(json.dumps({\"ok\": False, \"error\": \"Docling produced no document\", \"meta\": meta}))\n return\n\n if markdown:\n text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)\n print(json.dumps({\"ok\": True, \"mode\": \"markdown\", \"text\": text, \"meta\": meta}))\n return\n\n # structured\n try:\n doc_dict = doc.export_to_dict()\n except Exception as e:\n print(json.dumps({\"ok\": False, \"error\": f\"Docling export_to_dict failed: {e}\", \"meta\": meta}))\n return\n\n rows = to_rows(doc_dict)\n print(json.dumps({\"ok\": True, \"mode\": \"structured\", \"doc\": rows, \"meta\": meta}))\n except Exception as e:\n print(\n json.dumps({\n \"ok\": False,\n \"error\": f\"Docling processing error: {e}\",\n \"meta\": {\"file_path\": file_path},\n })\n )\n\n if __name__ == \"__main__\":\n main()\n \"\"\"\n )\n\n # Validate file_path to avoid command injection or unsafe input\n if not isinstance(args[\"file_path\"], str) or any(c in args[\"file_path\"] for c in [\";\", \"|\", \"&\", \"$\", \"`\"]):\n return Data(data={\"error\": \"Unsafe file path detected.\", \"file_path\": args[\"file_path\"]})\n\n proc = subprocess.run( # noqa: S603\n [sys.executable, \"-u\", \"-c\", child_script],\n input=json.dumps(args).encode(\"utf-8\"),\n capture_output=True,\n check=False,\n )\n\n if not proc.stdout:\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\") if proc.stderr else \"no output from child process\"\n return Data(data={\"error\": f\"Docling subprocess error: {err_msg}\", \"file_path\": original_file_path})\n\n try:\n result = json.loads(proc.stdout.decode(\"utf-8\"))\n except Exception as e: # noqa: BLE001\n err_msg = proc.stderr.decode(\"utf-8\", errors=\"replace\")\n return Data(\n data={\n \"error\": f\"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}\",\n \"file_path\": original_file_path,\n },\n )\n\n if not result.get(\"ok\"):\n error_msg = result.get(\"error\", \"Unknown Docling error\")\n # Override meta file_path with original_file_path to ensure correct path matching\n meta = result.get(\"meta\", {})\n meta[\"file_path\"] = original_file_path\n return Data(data={\"error\": error_msg, **meta})\n\n meta = result.get(\"meta\", {})\n # Override meta file_path with original_file_path to ensure correct path matching\n # The subprocess returns the temp file path, but we need the original S3/local path for rollup_data\n meta[\"file_path\"] = original_file_path\n if result.get(\"mode\") == \"markdown\":\n exported_content = str(result.get(\"text\", \"\"))\n return Data(\n text=exported_content,\n data={\"exported_content\": exported_content, \"export_format\": self.EXPORT_FORMAT, **meta},\n )\n\n rows = list(result.get(\"doc\", []))\n return Data(data={\"doc\": rows, \"export_format\": self.EXPORT_FORMAT, **meta})\n\n def process_files(\n self,\n file_list: list[BaseFileComponent.BaseFile],\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process input files.\n\n - advanced_mode => Docling in a separate process.\n - Otherwise => standard parsing in current process (optionally threaded).\n \"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n # Validate image files to detect content/extension mismatches\n # This prevents API errors like \"Image does not match the provided media type\"\n image_extensions = {\"jpeg\", \"jpg\", \"png\", \"gif\", \"webp\", \"bmp\", \"tiff\"}\n settings = get_settings_service().settings\n for file in file_list:\n extension = file.path.suffix[1:].lower()\n if extension in image_extensions:\n # Read bytes based on storage type\n try:\n if settings.storage_type == \"s3\":\n # For S3 storage, use storage service to read file bytes\n file_path_str = str(file.path)\n content = run_until_complete(read_file_bytes(file_path_str))\n else:\n # For local storage, read bytes directly from filesystem\n content = file.path.read_bytes()\n\n is_valid, error_msg = validate_image_content_type(\n str(file.path),\n content=content,\n )\n if not is_valid:\n self.log(error_msg)\n if not self.silent_errors:\n raise ValueError(error_msg)\n except (OSError, FileNotFoundError) as e:\n self.log(f\"Could not read file for validation: {e}\")\n # Continue - let it fail later with better error\n\n # Validate that files requiring Docling are only processed when advanced mode is enabled\n if not self.advanced_mode:\n for file in file_list:\n extension = file.path.suffix[1:].lower()\n if extension in self.DOCLING_ONLY_EXTENSIONS:\n if is_astra_cloud_environment():\n msg = (\n f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n f\"Advanced Parser mode. Advanced Parser is not available in cloud environments.\"\n )\n else:\n msg = (\n f\"File '{file.path.name}' has extension '.{extension}' which requires \"\n f\"Advanced Parser mode. Please enable 'Advanced Parser' to process this file.\"\n )\n self.log(msg)\n raise ValueError(msg)\n\n def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n try:\n return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n except FileNotFoundError as e:\n self.log(f\"File not found: {file_path}. Error: {e}\")\n if not silent_errors:\n raise\n return None\n except Exception as e:\n self.log(f\"Unexpected error processing {file_path}: {e}\")\n if not silent_errors:\n raise\n return None\n\n docling_compatible = all(self._is_docling_compatible(str(f.path)) for f in file_list)\n\n # Advanced path: Check if ALL files are compatible with Docling\n if self.advanced_mode and docling_compatible:\n final_return: list[BaseFileComponent.BaseFile] = []\n for file in file_list:\n file_path = str(file.path)\n advanced_data: Data | None = self._process_docling_in_subprocess(file_path)\n\n # Handle None case - Docling processing failed or returned None\n if advanced_data is None:\n error_data = Data(\n data={\n \"file_path\": file_path,\n \"error\": \"Docling processing returned no result. Check logs for details.\",\n },\n )\n final_return.extend(self.rollup_data([file], [error_data]))\n continue\n\n # --- UNNEST: expand each element in `doc` to its own Data row\n payload = getattr(advanced_data, \"data\", {}) or {}\n\n # Check for errors first\n if \"error\" in payload:\n error_msg = payload.get(\"error\", \"Unknown error\")\n error_data = Data(\n data={\n \"file_path\": file_path,\n \"error\": error_msg,\n **{k: v for k, v in payload.items() if k not in (\"error\", \"file_path\")},\n },\n )\n final_return.extend(self.rollup_data([file], [error_data]))\n continue\n\n doc_rows = payload.get(\"doc\")\n if isinstance(doc_rows, list) and doc_rows:\n # Non-empty list of structured rows\n rows: list[Data | None] = [\n Data(\n data={\n \"file_path\": file_path,\n **(item if isinstance(item, dict) else {\"value\": item}),\n },\n )\n for item in doc_rows\n ]\n final_return.extend(self.rollup_data([file], rows))\n elif isinstance(doc_rows, list) and not doc_rows:\n # Empty list - file was processed but no text content found\n # Create a Data object indicating no content was extracted\n self.log(f\"No text extracted from '{file_path}', creating placeholder data\")\n empty_data = Data(\n data={\n \"file_path\": file_path,\n \"text\": \"(No text content extracted from image)\",\n \"info\": \"Image processed successfully but contained no extractable text\",\n **{k: v for k, v in payload.items() if k != \"doc\"},\n },\n )\n final_return.extend(self.rollup_data([file], [empty_data]))\n else:\n # If not structured, keep as-is (e.g., markdown export or error dict)\n # Ensure file_path is set for proper rollup matching\n if not payload.get(\"file_path\"):\n payload[\"file_path\"] = file_path\n # Create new Data with file_path\n advanced_data = Data(\n data=payload,\n text=getattr(advanced_data, \"text\", None),\n )\n final_return.extend(self.rollup_data([file], [advanced_data]))\n return final_return\n\n # Standard multi-file (or single non-advanced) path\n concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n\n file_paths = [str(f.path) for f in file_list]\n self.log(f\"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.\")\n my_data = parallel_load_data(\n file_paths,\n silent_errors=self.silent_errors,\n load_function=process_file_standard,\n max_concurrency=concurrency,\n )\n return self.rollup_data(file_list, my_data)\n\n # ------------------------------ Output helpers -----------------------------------\n\n def load_files_helper(self) -> DataFrame:\n result = self.load_files()\n\n # Result is a DataFrame - check if it has any rows\n if result.empty:\n msg = \"Could not extract content from the provided file(s).\"\n raise ValueError(msg)\n\n # Check for error column with error messages\n if \"error\" in result.columns:\n errors = result[\"error\"].dropna().tolist()\n if errors and not any(col in result.columns for col in [\"text\", \"doc\", \"exported_content\"]):\n raise ValueError(errors[0])\n\n return result\n\n def load_files_dataframe(self) -> DataFrame:\n \"\"\"Load files using advanced Docling processing and export to DataFrame format.\"\"\"\n self.markdown = False\n return self.load_files_helper()\n\n def load_files_markdown(self) -> Message:\n \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n self.markdown = True\n result = self.load_files_helper()\n\n # Result is a DataFrame - check for text or exported_content columns\n if \"text\" in result.columns and not result[\"text\"].isna().all():\n text_values = result[\"text\"].dropna().tolist()\n if text_values:\n return Message(text=str(text_values[0]))\n\n if \"exported_content\" in result.columns and not result[\"exported_content\"].isna().all():\n content_values = result[\"exported_content\"].dropna().tolist()\n if content_values:\n return Message(text=str(content_values[0]))\n\n # Return empty message with info that no text was found\n return Message(text=\"(No text content extracted from file)\")\n"
},
"concurrency_multithreading": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Processing Concurrency",
"dynamic": false,
"info": "When multiple files are being processed, the number of files to process concurrently.",
"list": false,
"list_add_label": "Add More",
"name": "concurrency_multithreading",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 1
},
"delete_server_file_after_processing": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Delete Server File After Processing",
"dynamic": false,
"info": "If true, the Server File Path will be deleted after processing.",
"list": false,
"list_add_label": "Add More",
"name": "delete_server_file_after_processing",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"doc_key": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Doc Key",
"dynamic": false,
"info": "The key to use for the DoclingDocument column.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "doc_key",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "doc"
},
"file_id": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "Google Drive File ID",
"dynamic": false,
"info": "The Google Drive file ID to read. The file must be shared with the service account email.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "file_id",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"file_path": {
"_input_type": "HandleInput",
"advanced": true,
"display_name": "Server File Path",
"dynamic": false,
"info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.",
"input_types": ["Data", "Message"],
"list": true,
"list_add_label": "Add More",
"name": "file_path",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "other",
"value": ""
},
"file_path_str": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "File Path",
"dynamic": false,
"info": "Path to the file to read. Used when component is called as a tool. If not provided, will use the uploaded file from 'path' input.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "file_path_str",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"ignore_unspecified_files": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Ignore Unspecified Files",
"dynamic": false,
"info": "If true, Data with no 'file_path' property will be ignored.",
"list": false,
"list_add_label": "Add More",
"name": "ignore_unspecified_files",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"ignore_unsupported_extensions": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Ignore Unsupported Extensions",
"dynamic": false,
"info": "If true, files with unsupported extensions will not be processed.",
"list": false,
"list_add_label": "Add More",
"name": "ignore_unsupported_extensions",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"is_refresh": false,
"markdown": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Markdown Export",
"dynamic": false,
"info": "Export processed documents to Markdown format. Only available when advanced mode is enabled.",
"list": false,
"list_add_label": "Add More",
"name": "markdown",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"md_image_placeholder": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Image placeholder",
"dynamic": false,
"info": "Specify the image placeholder for markdown exports.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "md_image_placeholder",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "<!-- image -->"
},
"md_page_break_placeholder": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Page break placeholder",
"dynamic": false,
"info": "Add this placeholder between pages in the markdown output.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "md_page_break_placeholder",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"ocr_engine": {
"_input_type": "DropdownInput",
"advanced": true,
"combobox": false,
"dialog_inputs": {},
"display_name": "OCR Engine",
"dynamic": false,
"external_options": {},
"info": "OCR engine to use. Only available when pipeline is set to 'standard'.",
"name": "ocr_engine",
"options": ["None", "easyocr"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "easyocr"
},
"path": {
"_input_type": "FileInput",
"advanced": false,
"display_name": "Files",
"dynamic": false,
"fileTypes": [
"csv",
"json",
"pdf",
"txt",
"md",
"mdx",
"yaml",
"yml",
"xml",
"html",
"htm",
"docx",
"py",
"sh",
"sql",
"js",
"ts",
"tsx",
"adoc",
"asciidoc",
"asc",
"bmp",
"dotx",
"dotm",
"docm",
"jpg",
"jpeg",
"png",
"potx",
"ppsx",
"pptm",
"potm",
"ppsm",
"pptx",
"tiff",
"xls",
"xlsx",
"xhtml",
"webp",
"zip",
"tar",
"tgz",
"bz2",
"gz"
],
"file_path": [
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0001-ผรม.2-คคง.-QC-0001.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0002-ผรม.2-คคง.-QC-0002.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0003-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0001.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0004-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0002.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0005-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0004.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0006-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0005.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0007-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0006.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0008-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0007.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0009-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0008.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0010-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0009.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0011-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0010.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0012-สคฉ.3-คคง.-0119.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0013-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0011.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0015-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0014.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0016-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0012.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0018-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0015.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0019-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0016.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0020-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0017.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0021-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0018.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0022-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0019.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0023-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0020.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0024-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0021.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0025-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0022.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0026-ผรม.2-คคง.-CHEC-LCP-C2-O-24-0023.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0027-ผรม.2-คคง.-67091601.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0027-เอกสารยกเลิก.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0028-ผรม.2-คคง.-67091602.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0029-ผรม.2-คคง.-67091603.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0030-ผรม.2-คคง.-67091604.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0031-ผรม.2-คคง.-67091701.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0032-ผรม.2-คคง.-67091702.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0033-ผรม.2-คคง.-67091703.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0034-ผรม.2-คคง.-67091704.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0035-ผรม.2-คคง.-67091705.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0035-เอกสารยกเลิก.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0036-ผรม.2-คคง.-67091801.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0037-ผรม.2-คคง.-67091802.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0038-ผรม.2-คคง.-67091803.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0038-ผรม.2-คคง.-67091803ยกเลิก.pdf",
"415f2a06-1b99-4a28-8f2a-159becc509ff/I672-0038-เอกสารยกเลิก.pdf"
],
"info": "Supported file extensions: csv, json, pdf, txt, md, mdx, yaml, yml, xml, html, htm, docx, py, sh, sql, js, ts, tsx, adoc, asciidoc, asc, bmp, dotx, dotm, docm, jpg, jpeg, png, potx, ppsx, pptm, potm, ppsm, pptx, tiff, xls, xlsx, xhtml, webp; optionally bundled in file extensions: zip, tar, tgz, bz2, gz",
"list": true,
"list_add_label": "Add More",
"name": "path",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"temp_file": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "file",
"value": ""
},
"pipeline": {
"_input_type": "DropdownInput",
"advanced": true,
"combobox": false,
"dialog_inputs": {},
"display_name": "Pipeline",
"dynamic": false,
"external_options": {},
"info": "Docling pipeline to use",
"name": "pipeline",
"options": ["standard", "vlm"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": false,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "standard"
},
"s3_file_key": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "S3 File Key",
"dynamic": false,
"info": "The key (path) of the file in S3 bucket.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "s3_file_key",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"separator": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Separator",
"dynamic": false,
"info": "Specify the separator to use between multiple outputs in Message format.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "separator",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "\n\n"
},
"service_account_key": {
"_input_type": "SecretStrInput",
"advanced": false,
"display_name": "GCP Credentials Secret Key",
"dynamic": false,
"info": "Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).",
"input_types": [],
"load_from_db": false,
"name": "service_account_key",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"silent_errors": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Silent Errors",
"dynamic": false,
"info": "If true, errors will not raise an exception.",
"list": false,
"list_add_label": "Add More",
"name": "silent_errors",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"storage_location": {
"_input_type": "SortableListInput",
"advanced": true,
"display_name": "Storage Location",
"dynamic": false,
"info": "Choose where to read the file from.",
"limit": 1,
"name": "storage_location",
"options": [
{
"icon": "hard-drive",
"name": "Local"
},
{
"icon": "Amazon",
"name": "AWS"
},
{
"icon": "google",
"name": "Google Drive"
}
],
"override_skip": false,
"placeholder": "Select Location",
"real_time_refresh": true,
"required": false,
"search_category": [],
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "sortableList",
"value": [
{
"chosen": false,
"icon": "hard-drive",
"name": "Local",
"selected": false
}
]
},
"use_multithreading": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "[Deprecated] Use Multithreading",
"dynamic": false,
"info": "Set 'Processing Concurrency' greater than 1 to enable multithreading.",
"list": false,
"list_add_label": "Add More",
"name": "use_multithreading",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
}
},
"tool_mode": false
},
"showNode": true,
"type": "File"
},
"dragging": false,
"id": "File-5V2fL",
"measured": {
"height": 513,
"width": 400
},
"position": {
"x": -869.2220503002395,
"y": -131.38909294380795
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "OllamaModel-xJSnu",
"node": {
"base_classes": ["Data", "DataFrame", "LanguageModel", "Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Generate text using Ollama Local LLMs.",
"display_name": "Ollama",
"documentation": "",
"edited": false,
"field_order": [
"base_url",
"model_name",
"api_key",
"temperature",
"format",
"metadata",
"mirostat",
"mirostat_eta",
"mirostat_tau",
"num_ctx",
"num_gpu",
"num_thread",
"repeat_last_n",
"repeat_penalty",
"tfs_z",
"timeout",
"top_k",
"top_p",
"enable_verbose_output",
"tags",
"stop_tokens",
"system",
"tool_model_enabled",
"template",
"enable_structured_output",
"input_value",
"system_message",
"stream"
],
"frozen": false,
"icon": "Ollama",
"last_updated": "2026-03-13T07:57:38.225Z",
"legacy": false,
"metadata": {
"code_hash": "cd3dc38272a7",
"dependencies": {
"dependencies": [
{
"name": "httpx",
"version": "0.28.1"
},
{
"name": "langchain_ollama",
"version": "0.3.10"
},
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 3
},
"keywords": ["model", "llm", "language model", "large language model"],
"module": "lfx.components.ollama.ollama.ChatOllamaComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Text",
"group_outputs": false,
"loop_types": null,
"method": "text_response",
"name": "text_output",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Language Model",
"group_outputs": false,
"loop_types": null,
"method": "build_model",
"name": "model_output",
"options": null,
"required_inputs": null,
"tool_mode": true,
"types": ["LanguageModel"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Data",
"group_outputs": false,
"loop_types": null,
"method": "build_data_output",
"name": "data_output",
"options": null,
"required_inputs": null,
"tool_mode": true,
"types": ["Data"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "DataFrame",
"group_outputs": false,
"loop_types": null,
"method": "build_dataframe_output",
"name": "dataframe_output",
"options": null,
"required_inputs": null,
"tool_mode": true,
"types": ["DataFrame"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_frontend_node_flow_id": {
"value": "4a538191-04b4-41cf-98d7-8e62aaccf3a8"
},
"_frontend_node_folder_id": {
"value": "60f723dc-b1f8-4e25-9c31-0a4ee07abd5c"
},
"_type": "Component",
"api_key": {
"_input_type": "SecretStrInput",
"advanced": true,
"display_name": "Ollama API Key",
"dynamic": false,
"info": "Your Ollama API key.",
"input_types": [],
"load_from_db": false,
"name": "api_key",
"override_skip": false,
"password": true,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"base_url": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "Ollama API URL",
"dynamic": false,
"info": "Endpoint of the Ollama API. Defaults to http://localhost:11434.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "base_url",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "http://localhost:11434"
},
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "import asyncio\nimport json\nfrom contextlib import suppress\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import ChatOllama\n\nfrom lfx.base.models.model import LCModelComponent\nfrom lfx.field_typing import LanguageModel\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.helpers.base_model import build_model_from_schema\nfrom lfx.io import (\n BoolInput,\n DictInput,\n DropdownInput,\n FloatInput,\n IntInput,\n MessageTextInput,\n Output,\n SecretStrInput,\n SliderInput,\n StrInput,\n TableInput,\n)\nfrom lfx.log.logger import logger\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.table import EditMode\nfrom lfx.utils.util import transform_localhost_url\n\nHTTP_STATUS_OK = 200\nTABLE_ROW_PLACEHOLDER = {\"name\": \"field\", \"description\": \"description of field\", \"type\": \"str\", \"multiple\": \"False\"}\n\n\nclass ChatOllamaComponent(LCModelComponent):\n display_name = \"Ollama\"\n description = \"Generate text using Ollama Local LLMs.\"\n icon = \"Ollama\"\n name = \"OllamaModel\"\n\n # Define constants for JSON keys\n JSON_MODELS_KEY = \"models\"\n JSON_NAME_KEY = \"name\"\n JSON_CAPABILITIES_KEY = \"capabilities\"\n DESIRED_CAPABILITY = \"completion\"\n TOOL_CALLING_CAPABILITY = \"tools\"\n\n # Define the table schema for the format input\n TABLE_SCHEMA = [\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"edit_mode\": EditMode.INLINE,\n \"options\": [\"True\", \"False\"],\n \"default\": \"False\",\n },\n ]\n default_table_row = {row[\"name\"]: row.get(\"default\", None) for row in TABLE_SCHEMA}\n default_table_row_schema = build_model_from_schema([default_table_row]).model_json_schema()\n\n inputs = [\n StrInput(\n name=\"base_url\",\n display_name=\"Ollama API URL\",\n info=\"Endpoint of the Ollama API. Defaults to http://localhost:11434.\",\n value=\"http://localhost:11434\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n info=\"Refer to https://ollama.com/library for more models.\",\n refresh_button=True,\n real_time_refresh=True,\n required=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Ollama API Key\",\n info=\"Your Ollama API key.\",\n value=None,\n required=False,\n real_time_refresh=True,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n TableInput(\n name=\"format\",\n display_name=\"Format\",\n info=\"Specify the format of the output.\",\n table_schema=TABLE_SCHEMA,\n value=default_table_row,\n show=False,\n ),\n DictInput(name=\"metadata\", display_name=\"Metadata\", info=\"Metadata to add to the run trace.\", advanced=True),\n DropdownInput(\n name=\"mirostat\",\n display_name=\"Mirostat\",\n options=[\"Disabled\", \"Mirostat\", \"Mirostat 2.0\"],\n info=\"Enable/disable Mirostat sampling for controlling perplexity.\",\n value=\"Disabled\",\n advanced=True,\n real_time_refresh=True,\n ),\n FloatInput(\n name=\"mirostat_eta\",\n display_name=\"Mirostat Eta\",\n info=\"Learning rate for Mirostat algorithm. (Default: 0.1)\",\n advanced=True,\n ),\n FloatInput(\n name=\"mirostat_tau\",\n display_name=\"Mirostat Tau\",\n info=\"Controls the balance between coherence and diversity of the output. (Default: 5.0)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_ctx\",\n display_name=\"Context Window Size\",\n info=\"Size of the context window for generating tokens. (Default: 2048)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_gpu\",\n display_name=\"Number of GPUs\",\n info=\"Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_thread\",\n display_name=\"Number of Threads\",\n info=\"Number of threads to use during computation. (Default: detected for optimal performance)\",\n advanced=True,\n ),\n IntInput(\n name=\"repeat_last_n\",\n display_name=\"Repeat Last N\",\n info=\"How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\",\n advanced=True,\n ),\n FloatInput(\n name=\"repeat_penalty\",\n display_name=\"Repeat Penalty\",\n info=\"Penalty for repetitions in generated text. (Default: 1.1)\",\n advanced=True,\n ),\n FloatInput(name=\"tfs_z\", display_name=\"TFS Z\", info=\"Tail free sampling value. (Default: 1)\", advanced=True),\n IntInput(name=\"timeout\", display_name=\"Timeout\", info=\"Timeout for the request stream.\", advanced=True),\n IntInput(\n name=\"top_k\", display_name=\"Top K\", info=\"Limits token selection to top K. (Default: 40)\", advanced=True\n ),\n FloatInput(name=\"top_p\", display_name=\"Top P\", info=\"Works together with top-k. (Default: 0.9)\", advanced=True),\n BoolInput(\n name=\"enable_verbose_output\",\n display_name=\"Ollama Verbose Output\",\n info=\"Whether to print out response text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"tags\",\n display_name=\"Tags\",\n info=\"Comma-separated list of tags to add to the run trace.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"stop_tokens\",\n display_name=\"Stop Tokens\",\n info=\"Comma-separated list of tokens to signal the model to stop generating text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"system\", display_name=\"System\", info=\"System to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"tool_model_enabled\",\n display_name=\"Tool Model Enabled\",\n info=\"Whether to enable tool calling in the model.\",\n value=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"template\", display_name=\"Template\", info=\"Template to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"enable_structured_output\",\n display_name=\"Enable Structured Output\",\n info=\"Whether to enable structured output in the model.\",\n value=False,\n advanced=False,\n real_time_refresh=True,\n ),\n *LCModelComponent.get_base_inputs(),\n ]\n\n outputs = [\n Output(display_name=\"Text\", name=\"text_output\", method=\"text_response\"),\n Output(display_name=\"Language Model\", name=\"model_output\", method=\"build_model\"),\n Output(display_name=\"Data\", name=\"data_output\", method=\"build_data_output\"),\n Output(display_name=\"DataFrame\", name=\"dataframe_output\", method=\"build_dataframe_output\"),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # Mapping mirostat settings to their corresponding values\n mirostat_options = {\"Mirostat\": 1, \"Mirostat 2.0\": 2}\n\n # Default to None for 'Disabled'\n mirostat_value = mirostat_options.get(self.mirostat, None)\n\n # Set mirostat_eta and mirostat_tau to None if mirostat is disabled\n if mirostat_value is None:\n mirostat_eta = None\n mirostat_tau = None\n else:\n mirostat_eta = self.mirostat_eta\n mirostat_tau = self.mirostat_tau\n\n transformed_base_url = transform_localhost_url(self.base_url)\n\n # Check if URL contains /v1 suffix (OpenAI-compatible mode)\n if transformed_base_url and transformed_base_url.rstrip(\"/\").endswith(\"/v1\"):\n # Strip /v1 suffix and log warning\n transformed_base_url = transformed_base_url.rstrip(\"/\").removesuffix(\"/v1\")\n logger.warning(\n \"Detected '/v1' suffix in base URL. The Ollama component uses the native Ollama API, \"\n \"not the OpenAI-compatible API. The '/v1' suffix has been automatically removed. \"\n \"If you want to use the OpenAI-compatible API, please use the OpenAI component instead. \"\n \"Learn more at https://docs.ollama.com/openai#openai-compatibility\"\n )\n\n try:\n output_format = self._parse_format_field(self.format) if self.enable_structured_output else None\n except Exception as e:\n msg = f\"Failed to parse the format field: {e}\"\n raise ValueError(msg) from e\n\n # Mapping system settings to their corresponding values\n llm_params = {\n \"base_url\": transformed_base_url,\n \"model\": self.model_name,\n \"mirostat\": mirostat_value,\n \"format\": output_format or None,\n \"metadata\": self.metadata,\n \"tags\": self.tags.split(\",\") if self.tags else None,\n \"mirostat_eta\": mirostat_eta,\n \"mirostat_tau\": mirostat_tau,\n \"num_ctx\": self.num_ctx or None,\n \"num_gpu\": self.num_gpu or None,\n \"num_thread\": self.num_thread or None,\n \"repeat_last_n\": self.repeat_last_n or None,\n \"repeat_penalty\": self.repeat_penalty or None,\n \"temperature\": self.temperature or None,\n \"stop\": self.stop_tokens.split(\",\") if self.stop_tokens else None,\n \"system\": self.system,\n \"tfs_z\": self.tfs_z or None,\n \"timeout\": self.timeout or None,\n \"top_k\": self.top_k or None,\n \"top_p\": self.top_p or None,\n \"verbose\": self.enable_verbose_output or False,\n \"template\": self.template,\n }\n headers = self.headers\n if headers is not None:\n llm_params[\"client_kwargs\"] = {\"headers\": headers}\n\n # Remove parameters with None values\n llm_params = {k: v for k, v in llm_params.items() if v is not None}\n\n try:\n output = ChatOllama(**llm_params)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \"\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\"\n )\n raise ValueError(msg) from e\n\n return output\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n url = transform_localhost_url(url)\n if not url:\n return False\n # Strip /v1 suffix if present, as Ollama API endpoints are at root level\n url = url.rstrip(\"/\").removesuffix(\"/v1\")\n if not url.endswith(\"/\"):\n url = url + \"/\"\n return (\n await client.get(url=urljoin(url, \"api/tags\"), headers=self.headers)\n ).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name == \"enable_structured_output\": # bind enable_structured_output boolean to format show value\n build_config[\"format\"][\"show\"] = field_value\n\n if field_name == \"mirostat\":\n if field_value == \"Disabled\":\n build_config[\"mirostat_eta\"][\"advanced\"] = True\n build_config[\"mirostat_tau\"][\"advanced\"] = True\n build_config[\"mirostat_eta\"][\"value\"] = None\n build_config[\"mirostat_tau\"][\"value\"] = None\n\n else:\n build_config[\"mirostat_eta\"][\"advanced\"] = False\n build_config[\"mirostat_tau\"][\"advanced\"] = False\n\n if field_value == \"Mirostat 2.0\":\n build_config[\"mirostat_eta\"][\"value\"] = 0.2\n build_config[\"mirostat_tau\"][\"value\"] = 10\n else:\n build_config[\"mirostat_eta\"][\"value\"] = 0.1\n build_config[\"mirostat_tau\"][\"value\"] = 5\n\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n # Use field_value if base_url is being updated, otherwise use self.base_url\n base_url_to_check = field_value if field_name == \"base_url\" else self.base_url\n # Fallback to self.base_url if field_value is None or empty\n if not base_url_to_check and field_name == \"base_url\":\n base_url_to_check = self.base_url\n logger.warning(f\"Fetching Ollama models from updated URL: {base_url_to_check}\")\n\n if base_url_to_check and await self.is_valid_ollama_url(base_url_to_check):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n base_url_to_check, tool_model_enabled=tool_model_enabled\n )\n else:\n build_config[\"model_name\"][\"options\"] = []\n if field_name == \"keep_alive_flag\":\n if field_value == \"Keep\":\n build_config[\"keep_alive\"][\"value\"] = \"-1\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n elif field_value == \"Immediately\":\n build_config[\"keep_alive\"][\"value\"] = \"0\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n else:\n build_config[\"keep_alive\"][\"advanced\"] = False\n\n return build_config\n\n async def get_models(self, base_url_value: str, *, tool_model_enabled: bool | None = None) -> list[str]:\n \"\"\"Fetches a list of models from the Ollama API suitable for text generation.\n\n Args:\n base_url_value (str): The base URL of the Ollama API.\n tool_model_enabled (bool | None, optional): If True, filters the models further to include\n only those that support tool calling. Defaults to None.\n\n Returns:\n list[str]: A list of model names suitable for text generation. Models are included if:\n - They have the \"completion\" capability, OR\n - The capabilities field is not returned (backwards compatibility with older Ollama versions)\n If `tool_model_enabled` is True, only models with verified \"tools\" capability are included\n (models without capabilities info are excluded in this case).\n\n Raises:\n ValueError: If there is an issue with the API request or response, or if the model\n names cannot be retrieved.\n \"\"\"\n try:\n # Strip /v1 suffix if present, as Ollama API endpoints are at root level\n base_url = base_url_value.rstrip(\"/\").removesuffix(\"/v1\")\n if not base_url.endswith(\"/\"):\n base_url = base_url + \"/\"\n base_url = transform_localhost_url(base_url)\n\n # Ollama REST API to return models\n tags_url = urljoin(base_url, \"api/tags\")\n\n # Ollama REST API to return model capabilities\n show_url = urljoin(base_url, \"api/show\")\n\n async with httpx.AsyncClient() as client:\n headers = self.headers\n # Fetch available models\n tags_response = await client.get(url=tags_url, headers=headers)\n tags_response.raise_for_status()\n models = tags_response.json()\n if asyncio.iscoroutine(models):\n models = await models\n await logger.adebug(f\"Available models: {models}\")\n\n # Filter models that are NOT embedding models\n model_ids = []\n for model in models[self.JSON_MODELS_KEY]:\n model_name = model[self.JSON_NAME_KEY]\n await logger.adebug(f\"Checking model: {model_name}\")\n\n payload = {\"model\": model_name}\n show_response = await client.post(url=show_url, json=payload, headers=headers)\n show_response.raise_for_status()\n json_data = show_response.json()\n if asyncio.iscoroutine(json_data):\n json_data = await json_data\n\n capabilities = json_data.get(self.JSON_CAPABILITIES_KEY)\n await logger.adebug(f\"Model: {model_name}, Capabilities: {capabilities}\")\n\n # If capabilities not provided, assume it's a completion model (backwards compatibility\n # with older Ollama versions that don't return capabilities from /api/show)\n if capabilities is None:\n if not tool_model_enabled:\n model_ids.append(model_name)\n # If tool_model_enabled is True but no capabilities info, skip the model\n # since we can't verify tool support\n elif self.DESIRED_CAPABILITY in capabilities and (\n not tool_model_enabled or self.TOOL_CALLING_CAPABILITY in capabilities\n ):\n model_ids.append(model_name)\n\n except (httpx.RequestError, ValueError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n\n def _parse_format_field(self, format_value: Any) -> Any:\n \"\"\"Parse the format field to handle both string and dict inputs.\n\n The format field can be:\n - A simple string like \"json\" (backward compatibility)\n - A JSON string from NestedDictInput that needs parsing\n - A dict/JSON schema (already parsed)\n - None or empty\n\n Args:\n format_value: The raw format value from the input field\n\n Returns:\n Parsed format value as string, dict, or None\n \"\"\"\n if not format_value:\n return None\n\n schema = format_value\n if isinstance(format_value, list):\n schema = build_model_from_schema(format_value).model_json_schema()\n if schema == self.default_table_row_schema:\n return None # the rows are generic placeholder rows\n elif isinstance(format_value, str): # parse as json if string\n with suppress(json.JSONDecodeError): # e.g., literal \"json\" is valid for format field\n schema = json.loads(format_value)\n\n return schema or None\n\n async def _parse_json_response(self) -> Any:\n \"\"\"Parse the JSON response from the model.\n\n This method gets the text response and attempts to parse it as JSON.\n Works with models that have format='json' or a JSON schema set.\n\n Returns:\n Parsed JSON (dict, list, or primitive type)\n\n Raises:\n ValueError: If the response is not valid JSON\n \"\"\"\n message = await self.text_response()\n text = message.text if hasattr(message, \"text\") else str(message)\n\n if not text:\n msg = \"No response from model\"\n raise ValueError(msg)\n\n try:\n return json.loads(text)\n except json.JSONDecodeError as e:\n msg = f\"Invalid JSON response. Ensure model supports JSON output. Error: {e}\"\n raise ValueError(msg) from e\n\n async def build_data_output(self) -> Data:\n \"\"\"Build a Data output from the model's JSON response.\n\n Returns:\n Data: A Data object containing the parsed JSON response\n \"\"\"\n parsed = await self._parse_json_response()\n\n # If the response is already a dict, wrap it in Data\n if isinstance(parsed, dict):\n return Data(data=parsed)\n\n # If it's a list, wrap in a results container\n if isinstance(parsed, list):\n if len(parsed) == 1:\n return Data(data=parsed[0])\n return Data(data={\"results\": parsed})\n\n # For primitive types, wrap in a value container\n return Data(data={\"value\": parsed})\n\n async def build_dataframe_output(self) -> DataFrame:\n \"\"\"Build a DataFrame output from the model's JSON response.\n\n Returns:\n DataFrame: A DataFrame containing the parsed JSON response\n\n Raises:\n ValueError: If the response cannot be converted to a DataFrame\n \"\"\"\n parsed = await self._parse_json_response()\n\n # If it's a list of dicts, convert directly to DataFrame\n if isinstance(parsed, list):\n if not parsed:\n return DataFrame()\n # Ensure all items are dicts for proper DataFrame conversion\n if all(isinstance(item, dict) for item in parsed):\n return DataFrame(parsed)\n msg = \"List items must be dictionaries to convert to DataFrame\"\n raise ValueError(msg)\n\n # If it's a single dict, wrap in a list to create a single-row DataFrame\n if isinstance(parsed, dict):\n return DataFrame([parsed])\n\n # For primitive types, create a single-column DataFrame\n return DataFrame([{\"value\": parsed}])\n\n @property\n def headers(self) -> dict[str, str] | None:\n \"\"\"Get the headers for the Ollama API.\"\"\"\n if self.api_key and self.api_key.strip():\n return {\"Authorization\": f\"Bearer {self.api_key}\"}\n return None\n"
},
"enable_structured_output": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Enable Structured Output",
"dynamic": false,
"info": "Whether to enable structured output in the model.",
"list": false,
"list_add_label": "Add More",
"name": "enable_structured_output",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"enable_verbose_output": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Ollama Verbose Output",
"dynamic": false,
"info": "Whether to print out response text.",
"list": false,
"list_add_label": "Add More",
"name": "enable_verbose_output",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"format": {
"_input_type": "TableInput",
"advanced": false,
"display_name": "Format",
"dynamic": false,
"info": "Specify the format of the output.",
"is_list": true,
"list_add_label": "Add More",
"name": "format",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"table_icon": "Table",
"table_schema": [
{
"default": "field",
"description": "Specify the name of the output field.",
"display_name": "Name",
"edit_mode": "inline",
"formatter": "text",
"name": "name",
"type": "str"
},
{
"default": "description of field",
"description": "Describe the purpose of the output field.",
"display_name": "Description",
"edit_mode": "popover",
"formatter": "text",
"name": "description",
"type": "str"
},
{
"default": "str",
"description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).",
"display_name": "Type",
"edit_mode": "inline",
"formatter": "text",
"name": "type",
"options": ["str", "int", "float", "bool", "dict"],
"type": "str"
},
{
"default": "False",
"description": "Set to True if this output field should be a list of the specified type.",
"display_name": "As List",
"edit_mode": "inline",
"formatter": "text",
"name": "multiple",
"options": ["True", "False"],
"type": "boolean"
}
],
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"trigger_icon": "Table",
"trigger_text": "Open table",
"type": "table",
"value": [
{
"description": "description of field",
"multiple": "False",
"name": "field",
"type": "str"
}
]
},
"input_value": {
"_input_type": "MessageInput",
"advanced": false,
"display_name": "Input",
"dynamic": false,
"info": "",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "input_value",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"is_refresh": false,
"metadata": {
"_input_type": "DictInput",
"advanced": true,
"display_name": "Metadata",
"dynamic": false,
"info": "Metadata to add to the run trace.",
"list": false,
"list_add_label": "Add More",
"name": "metadata",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"track_in_telemetry": false,
"type": "dict",
"value": {}
},
"mirostat": {
"_input_type": "DropdownInput",
"advanced": true,
"combobox": false,
"dialog_inputs": {},
"display_name": "Mirostat",
"dynamic": false,
"external_options": {},
"info": "Enable/disable Mirostat sampling for controlling perplexity.",
"name": "mirostat",
"options": ["Disabled", "Mirostat", "Mirostat 2.0"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "Disabled"
},
"mirostat_eta": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Mirostat Eta",
"dynamic": false,
"info": "Learning rate for Mirostat algorithm. (Default: 0.1)",
"list": false,
"list_add_label": "Add More",
"name": "mirostat_eta",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"mirostat_tau": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Mirostat Tau",
"dynamic": false,
"info": "Controls the balance between coherence and diversity of the output. (Default: 5.0)",
"list": false,
"list_add_label": "Add More",
"name": "mirostat_tau",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"model_name": {
"_input_type": "DropdownInput",
"advanced": false,
"combobox": false,
"dialog_inputs": {},
"display_name": "Model Name",
"dynamic": false,
"external_options": {},
"info": "Refer to https://ollama.com/library for more models.",
"name": "model_name",
"options": ["scb10x/typhoon2.1-gemma3-4b:latest", "qwen2.5:7b-instruct-q4_K_M"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"refresh_button": true,
"required": true,
"show": true,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "scb10x/typhoon2.1-gemma3-4b:latest"
},
"num_ctx": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Context Window Size",
"dynamic": false,
"info": "Size of the context window for generating tokens. (Default: 2048)",
"list": false,
"list_add_label": "Add More",
"name": "num_ctx",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"num_gpu": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Number of GPUs",
"dynamic": false,
"info": "Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)",
"list": false,
"list_add_label": "Add More",
"name": "num_gpu",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"num_thread": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Number of Threads",
"dynamic": false,
"info": "Number of threads to use during computation. (Default: detected for optimal performance)",
"list": false,
"list_add_label": "Add More",
"name": "num_thread",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"repeat_last_n": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Repeat Last N",
"dynamic": false,
"info": "How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)",
"list": false,
"list_add_label": "Add More",
"name": "repeat_last_n",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"repeat_penalty": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Repeat Penalty",
"dynamic": false,
"info": "Penalty for repetitions in generated text. (Default: 1.1)",
"list": false,
"list_add_label": "Add More",
"name": "repeat_penalty",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"stop_tokens": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Stop Tokens",
"dynamic": false,
"info": "Comma-separated list of tokens to signal the model to stop generating text.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "stop_tokens",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"stream": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Stream",
"dynamic": false,
"info": "Stream the response from the model. Streaming works only in Chat.",
"list": false,
"list_add_label": "Add More",
"name": "stream",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"system": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "System",
"dynamic": false,
"info": "System to use for generating text.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "system",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"system_message": {
"_input_type": "MultilineInput",
"advanced": false,
"ai_enabled": false,
"copy_field": false,
"display_name": "System Message",
"dynamic": false,
"info": "System message to pass to the model.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"multiline": true,
"name": "system_message",
"override_skip": false,
"password": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"tags": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Tags",
"dynamic": false,
"info": "Comma-separated list of tags to add to the run trace.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "tags",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"temperature": {
"_input_type": "SliderInput",
"advanced": true,
"display_name": "Temperature",
"dynamic": false,
"info": "",
"max_label": "",
"max_label_icon": "",
"min_label": "",
"min_label_icon": "",
"name": "temperature",
"override_skip": false,
"placeholder": "",
"range_spec": {
"max": 1,
"min": 0,
"step": 0.01,
"step_type": "float"
},
"required": false,
"show": true,
"slider_buttons": false,
"slider_buttons_options": [],
"slider_input": false,
"title_case": false,
"tool_mode": false,
"track_in_telemetry": false,
"type": "slider",
"value": 0.1
},
"template": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Template",
"dynamic": false,
"info": "Template to use for generating text.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "template",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"tfs_z": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "TFS Z",
"dynamic": false,
"info": "Tail free sampling value. (Default: 1)",
"list": false,
"list_add_label": "Add More",
"name": "tfs_z",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
},
"timeout": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Timeout",
"dynamic": false,
"info": "Timeout for the request stream.",
"list": false,
"list_add_label": "Add More",
"name": "timeout",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"tool_model_enabled": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Tool Model Enabled",
"dynamic": false,
"info": "Whether to enable tool calling in the model.",
"list": false,
"list_add_label": "Add More",
"name": "tool_model_enabled",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"top_k": {
"_input_type": "IntInput",
"advanced": true,
"display_name": "Top K",
"dynamic": false,
"info": "Limits token selection to top K. (Default: 40)",
"list": false,
"list_add_label": "Add More",
"name": "top_k",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "int",
"value": 0
},
"top_p": {
"_input_type": "FloatInput",
"advanced": true,
"display_name": "Top P",
"dynamic": false,
"info": "Works together with top-k. (Default: 0.9)",
"list": false,
"list_add_label": "Add More",
"name": "top_p",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "float",
"value": ""
}
},
"tool_mode": false
},
"selected_output": "text_output",
"showNode": true,
"type": "OllamaModel"
},
"dragging": false,
"id": "OllamaModel-xJSnu",
"measured": {
"height": 776,
"width": 400
},
"position": {
"x": 1073.8111821659295,
"y": -546.2113692797573
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "LoopComponent-5vFOr",
"node": {
"base_classes": ["Data", "DataFrame"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Iterates through Data or Message objects, processing items individually and aggregating results from loop inputs.",
"display_name": "Loop",
"documentation": "https://docs.langflow.org/loop",
"edited": false,
"field_order": ["data"],
"frozen": false,
"icon": "infinity",
"legacy": false,
"metadata": {
"code_hash": "e516ea99611c",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 1
},
"module": "lfx.components.flow_controls.loop.LoopComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": true,
"cache": true,
"display_name": "Item",
"group_outputs": true,
"loop_types": ["Message"],
"method": "item_output",
"name": "item",
"selected": "Data",
"tool_mode": true,
"types": ["Data"],
"value": "__UNDEFINED__"
},
{
"allows_loop": false,
"cache": true,
"display_name": "Done",
"group_outputs": true,
"method": "done_output",
"name": "done",
"selected": "DataFrame",
"tool_mode": true,
"types": ["DataFrame"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_type": "Component",
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "from lfx.base.flow_controls.loop_utils import (\n execute_loop_body,\n extract_loop_output,\n get_loop_body_start_edge,\n get_loop_body_start_vertex,\n get_loop_body_vertices,\n validate_data_input,\n)\nfrom lfx.components.processing.converter import convert_to_data\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.inputs import HandleInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass LoopComponent(Component):\n display_name = \"Loop\"\n description = (\n \"Iterates through Data or Message objects, processing items individually \"\n \"and aggregating results from loop inputs.\"\n )\n documentation: str = \"https://docs.langflow.org/loop\"\n icon = \"infinity\"\n\n inputs = [\n HandleInput(\n name=\"data\",\n display_name=\"Inputs\",\n info=\"The initial DataFrame to iterate over.\",\n input_types=[\"DataFrame\"],\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Item\",\n name=\"item\",\n method=\"item_output\",\n allows_loop=True,\n loop_types=[\"Message\"],\n group_outputs=True,\n ),\n Output(display_name=\"Done\", name=\"done\", method=\"done_output\", group_outputs=True),\n ]\n\n def initialize_data(self) -> None:\n \"\"\"Initialize the data list, context index, and aggregated list.\"\"\"\n if self.ctx.get(f\"{self._id}_initialized\", False):\n return\n\n # Ensure data is a list of Data objects\n data_list = self._validate_data(self.data)\n\n # Store the initial data and context variables\n self.update_ctx(\n {\n f\"{self._id}_data\": data_list,\n f\"{self._id}_index\": 0,\n f\"{self._id}_aggregated\": [],\n f\"{self._id}_initialized\": True,\n }\n )\n\n def _convert_message_to_data(self, message: Message) -> Data:\n \"\"\"Convert a Message object to a Data object using Type Convert logic.\"\"\"\n return convert_to_data(message, auto_parse=False)\n\n def _validate_data(self, data):\n \"\"\"Validate and return a list of Data objects.\"\"\"\n return validate_data_input(data)\n\n def get_loop_body_vertices(self) -> set[str]:\n \"\"\"Identify vertices in this loop's body via graph traversal.\n\n Traverses from the loop's \"item\" output to the vertex that feeds back\n to the loop's \"item\" input, collecting all vertices in between.\n This naturally handles nested loops by stopping at this loop's feedback edge.\n\n Returns:\n Set of vertex IDs that form this loop's body\n \"\"\"\n # Check if we have a proper graph context\n if not hasattr(self, \"_vertex\") or self._vertex is None:\n return set()\n\n return get_loop_body_vertices(\n vertex=self._vertex,\n graph=self.graph,\n get_incoming_edge_by_target_param_fn=self.get_incoming_edge_by_target_param,\n )\n\n def _get_loop_body_start_vertex(self) -> str | None:\n \"\"\"Get the first vertex in the loop body (connected to loop's item output).\n\n Returns:\n The vertex ID of the first vertex in the loop body, or None if not found\n \"\"\"\n # Check if we have a proper graph context\n if not hasattr(self, \"_vertex\") or self._vertex is None:\n return None\n\n return get_loop_body_start_vertex(vertex=self._vertex)\n\n def _extract_loop_output(self, results: list) -> Data:\n \"\"\"Extract the output from subgraph execution results.\n\n Args:\n results: List of VertexBuildResult objects from subgraph execution\n\n Returns:\n Data object containing the loop iteration output\n \"\"\"\n # Get the vertex ID that feeds back to the item input (end of loop body)\n end_vertex_id = self.get_incoming_edge_by_target_param(\"item\")\n return extract_loop_output(results=results, end_vertex_id=end_vertex_id)\n\n async def execute_loop_body(self, data_list: list[Data], event_manager=None) -> list[Data]:\n \"\"\"Execute loop body for each data item.\n\n Creates an isolated subgraph for the loop body and executes it\n for each item in the data list, collecting results.\n\n Args:\n data_list: List of Data objects to iterate over\n event_manager: Optional event manager to pass to subgraph execution for UI events\n\n Returns:\n List of Data objects containing results from each iteration\n \"\"\"\n # Get the loop body configuration once\n loop_body_vertex_ids = self.get_loop_body_vertices()\n start_vertex_id = self._get_loop_body_start_vertex()\n start_edge = get_loop_body_start_edge(self._vertex)\n end_vertex_id = self.get_incoming_edge_by_target_param(\"item\")\n\n return await execute_loop_body(\n graph=self.graph,\n data_list=data_list,\n loop_body_vertex_ids=loop_body_vertex_ids,\n start_vertex_id=start_vertex_id,\n start_edge=start_edge,\n end_vertex_id=end_vertex_id,\n event_manager=event_manager,\n )\n\n def item_output(self) -> Data:\n \"\"\"Output is no longer used - loop executes internally now.\n\n This method is kept for backward compatibility but does nothing.\n The actual loop execution happens in done_output().\n \"\"\"\n self.stop(\"item\")\n return Data(text=\"\")\n\n async def done_output(self) -> DataFrame:\n \"\"\"Execute the loop body for all items and return aggregated results.\n\n This is now the main execution point for the loop. It:\n 1. Gets the data list to iterate over\n 2. Executes the loop body as an isolated subgraph for each item\n 3. Returns the aggregated results\n\n Args:\n event_manager: Optional event manager for UI event emission\n \"\"\"\n self.initialize_data()\n\n # Get data list\n data_list = self.ctx.get(f\"{self._id}_data\", [])\n\n if not data_list:\n return DataFrame([])\n\n # Execute loop body for all items\n try:\n aggregated_results = await self.execute_loop_body(data_list, event_manager=self._event_manager)\n return DataFrame(aggregated_results)\n except Exception as e:\n # Log error and return empty DataFrame\n from lfx.log.logger import logger\n\n await logger.aerror(f\"Error executing loop body: {e}\")\n raise\n"
},
"data": {
"_input_type": "HandleInput",
"advanced": false,
"display_name": "Inputs",
"dynamic": false,
"info": "The initial DataFrame to iterate over.",
"input_types": ["DataFrame"],
"list": false,
"list_add_label": "Add More",
"name": "data",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "other",
"value": ""
}
},
"tool_mode": false
},
"showNode": true,
"type": "LoopComponent"
},
"dragging": false,
"id": "LoopComponent-5vFOr",
"measured": {
"height": 305,
"width": 400
},
"position": {
"x": -381.0556267521341,
"y": -161.3531393697653
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "Prompt Template-dKwcS",
"node": {
"base_classes": ["Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {
"template": ["extracted_text"]
},
"description": "Create a prompt template with dynamic variables.",
"display_name": "Prompt Template",
"documentation": "https://docs.langflow.org/components-prompts",
"edited": false,
"error": null,
"field_order": ["template", "use_double_brackets", "tool_placeholder"],
"frozen": false,
"full_path": null,
"icon": "prompts",
"is_composition": null,
"is_input": null,
"is_output": null,
"legacy": false,
"metadata": {
"code_hash": "5b3e6730923e",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 1
},
"module": "lfx.components.models_and_agents.prompt.PromptComponent"
},
"minimized": false,
"name": "",
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Prompt",
"group_outputs": false,
"hidden": null,
"loop_types": null,
"method": "build_prompt",
"name": "prompt",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"priority": null,
"replacement": null,
"template": {
"_type": "Component",
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "from typing import Any\n\nfrom lfx.base.prompts.api_utils import process_prompt_template\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.input_mixin import FieldTypes\nfrom lfx.inputs.inputs import DefaultPromptField\nfrom lfx.io import BoolInput, MessageTextInput, Output, PromptInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dotdict import dotdict\nfrom lfx.schema.message import Message\nfrom lfx.template.utils import update_template_values\nfrom lfx.utils.mustache_security import validate_mustache_template\n\n\nclass PromptComponent(Component):\n display_name: str = \"Prompt Template\"\n description: str = \"Create a prompt template with dynamic variables.\"\n documentation: str = \"https://docs.langflow.org/components-prompts\"\n icon = \"prompts\"\n trace_type = \"prompt\"\n name = \"Prompt Template\"\n\n inputs = [\n PromptInput(name=\"template\", display_name=\"Template\"),\n BoolInput(\n name=\"use_double_brackets\",\n display_name=\"Use Double Brackets\",\n value=False,\n advanced=True,\n info=\"Use {{variable}} syntax instead of {variable}.\",\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"tool_placeholder\",\n display_name=\"Tool Placeholder\",\n tool_mode=True,\n advanced=True,\n info=\"A placeholder input for tool mode.\",\n ),\n ]\n\n outputs = [\n Output(display_name=\"Prompt\", name=\"prompt\", method=\"build_prompt\"),\n ]\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update the template field type based on the selected mode.\"\"\"\n if field_name == \"use_double_brackets\":\n # Change the template field type based on mode\n is_mustache = field_value is True\n if is_mustache:\n build_config[\"template\"][\"type\"] = FieldTypes.MUSTACHE_PROMPT.value\n else:\n build_config[\"template\"][\"type\"] = FieldTypes.PROMPT.value\n\n # Re-process the template to update variables when mode changes\n template_value = build_config.get(\"template\", {}).get(\"value\", \"\")\n if template_value:\n # Ensure custom_fields is properly initialized\n if \"custom_fields\" not in build_config:\n build_config[\"custom_fields\"] = {}\n\n # Clean up fields from the OLD mode before processing with NEW mode\n # This ensures we don't keep fields with wrong syntax even if validation fails\n old_custom_fields = build_config[\"custom_fields\"].get(\"template\", [])\n for old_field in list(old_custom_fields):\n # Remove the field from custom_fields and template\n if old_field in old_custom_fields:\n old_custom_fields.remove(old_field)\n build_config.pop(old_field, None)\n\n # Try to process template with new mode to add new variables\n # If validation fails, at least we cleaned up old fields\n try:\n # Validate mustache templates for security\n if is_mustache:\n validate_mustache_template(template_value)\n\n # Re-process template with new mode to add new variables\n _ = process_prompt_template(\n template=template_value,\n name=\"template\",\n custom_fields=build_config[\"custom_fields\"],\n frontend_node_template=build_config,\n is_mustache=is_mustache,\n )\n except ValueError as e:\n # If validation fails, we still updated the mode and cleaned old fields\n # User will see error when they try to save\n logger.debug(f\"Template validation failed during mode switch: {e}\")\n return build_config\n\n async def build_prompt(self) -> Message:\n use_double_brackets = self.use_double_brackets if hasattr(self, \"use_double_brackets\") else False\n template_format = \"mustache\" if use_double_brackets else \"f-string\"\n prompt = await Message.from_template_and_variables(template_format=template_format, **self._attributes)\n self.status = prompt.text\n return prompt\n\n def _update_template(self, frontend_node: dict):\n prompt_template = frontend_node[\"template\"][\"template\"][\"value\"]\n use_double_brackets = frontend_node[\"template\"].get(\"use_double_brackets\", {}).get(\"value\", False)\n is_mustache = use_double_brackets is True\n\n try:\n # Validate mustache templates for security\n if is_mustache:\n validate_mustache_template(prompt_template)\n\n custom_fields = frontend_node[\"custom_fields\"]\n frontend_node_template = frontend_node[\"template\"]\n _ = process_prompt_template(\n template=prompt_template,\n name=\"template\",\n custom_fields=custom_fields,\n frontend_node_template=frontend_node_template,\n is_mustache=is_mustache,\n )\n except ValueError as e:\n # If validation fails, don't add variables but allow component to be created\n logger.debug(f\"Template validation failed in _update_template: {e}\")\n return frontend_node\n\n async def update_frontend_node(self, new_frontend_node: dict, current_frontend_node: dict):\n \"\"\"This function is called after the code validation is done.\"\"\"\n frontend_node = await super().update_frontend_node(new_frontend_node, current_frontend_node)\n template = frontend_node[\"template\"][\"template\"][\"value\"]\n use_double_brackets = frontend_node[\"template\"].get(\"use_double_brackets\", {}).get(\"value\", False)\n is_mustache = use_double_brackets is True\n\n try:\n # Validate mustache templates for security\n if is_mustache:\n validate_mustache_template(template)\n\n # Kept it duplicated for backwards compatibility\n _ = process_prompt_template(\n template=template,\n name=\"template\",\n custom_fields=frontend_node[\"custom_fields\"],\n frontend_node_template=frontend_node[\"template\"],\n is_mustache=is_mustache,\n )\n except ValueError as e:\n # If validation fails, don't add variables but allow component to be updated\n logger.debug(f\"Template validation failed in update_frontend_node: {e}\")\n # Now that template is updated, we need to grab any values that were set in the current_frontend_node\n # and update the frontend_node with those values\n update_template_values(new_template=frontend_node, previous_template=current_frontend_node[\"template\"])\n return frontend_node\n\n def _get_fallback_input(self, **kwargs):\n return DefaultPromptField(**kwargs)\n"
},
"extracted_text": {
"advanced": false,
"display_name": "extracted_text",
"dynamic": false,
"field_type": "str",
"fileTypes": [],
"file_path": "",
"info": "",
"input_types": ["Message"],
"list": false,
"load_from_db": false,
"multiline": true,
"name": "extracted_text",
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"type": "str",
"value": ""
},
"template": {
"_input_type": "PromptInput",
"advanced": false,
"display_name": "Template",
"dynamic": false,
"info": "",
"list": false,
"list_add_label": "Add More",
"name": "template",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"track_in_telemetry": false,
"type": "prompt",
"value": "คุณเป็นผู้ช่วย AI สำหรับระบบจัดการเอกสารก่อสร้าง LCBP3\nดึง Metadata จากเอกสาร แล้วตอบกลับเป็น JSON เท่านั้น ห้ามมีข้อความอื่น\nเอกสารอาจเป็นภาษาไทย อังกฤษ หรือผสมกัน\n\nReturn ONLY this JSON structure:\n{{\n \"source_file\": \"<ชื่อไฟล์ที่รับมา>\",\n \"is_valid\": true,\n \"confidence\": 0.0,\n \"extracted_text\": \"<ข้อความเต็ม max 2000 chars>\",\n \"metadata\": {{\n \"correspondence_number\": \"<เลขที่เอกสาร หรือ null>\",\n \"title\": \"<หัวข้อเอกสาร>\",\n \"document_date\": \"<YYYY-MM-DD หรือ null>\",\n \"sender_org\": \"<ชื่อย่อองค์กรผู้ส่ง หรือ null>\",\n \"receiver_org\": \"<ชื่อย่อองค์กรผู้รับ หรือ null>\",\n \"project_code\": \"<รหัสโครงการ เช่น LCBP3 หรือ null>\",\n \"suggested_category\": \"<Correspondence | RFA | ContractDrawing | ShopDrawing>\",\n \"detected_issues\": []\n }},\n \"chunks\": [\n {{\"chunk_index\": 0, \"page\": 1, \"text\": \"<ข้อความส่วนแรก max 500 chars>\"}}\n ]\n}}\n\nDocument text to analyze:\n{extracted_text}\n"
},
"tool_placeholder": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Tool Placeholder",
"dynamic": false,
"info": "A placeholder input for tool mode.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "tool_placeholder",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": true,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"use_double_brackets": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Use Double Brackets",
"dynamic": false,
"info": "Use {{variable}} syntax instead of {variable}.",
"list": false,
"list_add_label": "Add More",
"name": "use_double_brackets",
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
}
},
"tool_mode": false
},
"showNode": true,
"type": "Prompt Template"
},
"dragging": false,
"id": "Prompt Template-dKwcS",
"measured": {
"height": 429,
"width": 400
},
"position": {
"x": 559.0376004371929,
"y": -533.3166409357893
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "ParserComponent-Xspgr",
"node": {
"base_classes": ["Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Extracts text using a template.",
"display_name": "Parser",
"documentation": "https://docs.langflow.org/parser",
"edited": false,
"field_order": ["input_data", "mode", "pattern", "sep"],
"frozen": false,
"icon": "braces",
"last_updated": "2026-03-13T08:19:27.565Z",
"legacy": false,
"metadata": {
"code_hash": "3cda25c3f7b5",
"dependencies": {
"dependencies": [
{
"name": "lfx",
"version": null
}
],
"total_dependencies": 1
},
"module": "lfx.components.processing.parser.ParserComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "Parsed Text",
"group_outputs": false,
"loop_types": null,
"method": "parse_combined_text",
"name": "parsed_text",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_frontend_node_flow_id": {
"value": "4a538191-04b4-41cf-98d7-8e62aaccf3a8"
},
"_frontend_node_folder_id": {
"value": "60f723dc-b1f8-4e25-9c31-0a4ee07abd5c"
},
"_type": "Component",
"clean_data": {
"_input_type": "BoolInput",
"advanced": true,
"display_name": "Clean Data",
"dynamic": false,
"info": "Enable to clean the data by removing empty rows and lines in each cell of the DataFrame/ Data object.",
"list": false,
"list_add_label": "Add More",
"name": "clean_data",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": true
},
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "from lfx.custom.custom_component.component import Component\nfrom lfx.helpers.data import safe_convert\nfrom lfx.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom lfx.schema.data import Data\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.schema.message import Message\nfrom lfx.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n # Use format_map with a dict that returns default_value for missing keys\n class DefaultDict(dict):\n def __missing__(self, key):\n return data.default_value or \"\"\n\n formatted_text = self.pattern.format_map(DefaultDict(data.data))\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n"
},
"input_data": {
"_input_type": "HandleInput",
"advanced": false,
"display_name": "Data or DataFrame",
"dynamic": false,
"info": "Accepts either a DataFrame or a Data object.",
"input_types": ["DataFrame", "Data"],
"list": false,
"list_add_label": "Add More",
"name": "input_data",
"override_skip": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "other",
"value": ""
},
"is_refresh": false,
"mode": {
"_input_type": "TabInput",
"advanced": false,
"display_name": "Mode",
"dynamic": false,
"info": "Convert into raw string instead of using a template.",
"name": "mode",
"options": ["Parser", "Stringify"],
"override_skip": false,
"placeholder": "",
"real_time_refresh": true,
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "tab",
"value": "Stringify"
},
"pattern": {
"_input_type": "MultilineInput",
"advanced": false,
"ai_enabled": false,
"copy_field": false,
"display_name": "Template",
"dynamic": true,
"info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"multiline": true,
"name": "pattern",
"override_skip": false,
"password": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "Text: {text}"
},
"sep": {
"_input_type": "MessageTextInput",
"advanced": true,
"display_name": "Separator",
"dynamic": false,
"info": "String used to separate rows/items.",
"input_types": ["Message"],
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "sep",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_input": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "\n"
}
},
"tool_mode": false
},
"showNode": true,
"type": "ParserComponent"
},
"dragging": false,
"id": "ParserComponent-Xspgr",
"measured": {
"height": 310,
"width": 400
},
"position": {
"x": 78.4571915835524,
"y": -393.7689699613337
},
"selected": false,
"type": "genericNode"
},
{
"data": {
"id": "SaveToFile-M0RUY",
"node": {
"base_classes": ["Message"],
"beta": false,
"conditional_paths": [],
"custom_fields": {},
"description": "Save data to local file, AWS S3, or Google Drive in the selected format.",
"display_name": "Write File",
"documentation": "https://docs.langflow.org/write-file",
"edited": false,
"field_order": [
"storage_location",
"input",
"file_name",
"append_mode",
"local_format",
"aws_format",
"gdrive_format",
"aws_access_key_id",
"aws_secret_access_key",
"bucket_name",
"aws_region",
"s3_prefix",
"service_account_key",
"folder_id"
],
"frozen": false,
"icon": "file-text",
"last_updated": "2026-03-13T08:33:30.601Z",
"legacy": false,
"metadata": {
"code_hash": "6d0e4842271e",
"dependencies": {
"dependencies": [
{
"name": "orjson",
"version": "3.10.15"
},
{
"name": "pandas",
"version": "2.2.3"
},
{
"name": "fastapi",
"version": "0.133.1"
},
{
"name": "lfx",
"version": null
},
{
"name": "langflow",
"version": null
},
{
"name": "boto3",
"version": "1.40.61"
},
{
"name": "googleapiclient",
"version": "2.154.0"
}
],
"total_dependencies": 7
},
"module": "lfx.components.files_and_knowledge.save_file.SaveToFileComponent"
},
"minimized": false,
"output_types": [],
"outputs": [
{
"allows_loop": false,
"cache": true,
"display_name": "File Path",
"group_outputs": false,
"hidden": null,
"loop_types": null,
"method": "save_to_file",
"name": "message",
"options": null,
"required_inputs": null,
"selected": "Message",
"tool_mode": true,
"types": ["Message"],
"value": "__UNDEFINED__"
}
],
"pinned": false,
"template": {
"_frontend_node_flow_id": {
"value": "4a538191-04b4-41cf-98d7-8e62aaccf3a8"
},
"_frontend_node_folder_id": {
"value": "60f723dc-b1f8-4e25-9c31-0a4ee07abd5c"
},
"_type": "Component",
"append_mode": {
"_input_type": "BoolInput",
"advanced": false,
"display_name": "Append",
"dynamic": false,
"info": "Append to file if it exists (only for Local storage with plain text formats). Not supported for cloud storage (AWS/Google Drive).",
"list": false,
"list_add_label": "Add More",
"name": "append_mode",
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "bool",
"value": false
},
"aws_access_key_id": {
"_input_type": "SecretStrInput",
"advanced": true,
"display_name": "AWS Access Key ID",
"dynamic": false,
"info": "AWS Access key ID.",
"input_types": [],
"load_from_db": false,
"name": "aws_access_key_id",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"aws_format": {
"_input_type": "DropdownInput",
"advanced": false,
"combobox": false,
"dialog_inputs": {},
"display_name": "File Format",
"dynamic": false,
"external_options": {},
"info": "Select the file format for AWS S3 storage.",
"name": "aws_format",
"options": [
"txt",
"json",
"csv",
"xml",
"html",
"md",
"yaml",
"log",
"tsv",
"jsonl",
"parquet",
"xlsx",
"zip"
],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "txt"
},
"aws_region": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "AWS Region",
"dynamic": false,
"info": "AWS region (e.g., us-east-1, eu-west-1).",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "aws_region",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"aws_secret_access_key": {
"_input_type": "SecretStrInput",
"advanced": true,
"display_name": "AWS Secret Key",
"dynamic": false,
"info": "AWS Secret Key.",
"input_types": [],
"load_from_db": false,
"name": "aws_secret_access_key",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"bucket_name": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "S3 Bucket Name",
"dynamic": false,
"info": "Enter the name of the S3 bucket.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "bucket_name",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"code": {
"advanced": true,
"dynamic": true,
"fileTypes": [],
"file_path": "",
"info": "",
"list": false,
"load_from_db": false,
"multiline": true,
"name": "code",
"password": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"type": "code",
"value": "import json\nfrom collections.abc import AsyncIterator, Iterator\nfrom pathlib import Path\nfrom typing import Any\n\nimport orjson\nimport pandas as pd\nfrom fastapi import UploadFile\nfrom fastapi.encoders import jsonable_encoder\n\nfrom lfx.custom import Component\nfrom lfx.inputs import SortableListInput\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, SecretStrInput, StrInput\nfrom lfx.schema import Data, DataFrame, Message\nfrom lfx.services.deps import get_settings_service, get_storage_service, session_scope\nfrom lfx.template.field.base import Output\nfrom lfx.utils.validate_cloud import is_astra_cloud_environment\n\n\ndef _get_storage_location_options():\n \"\"\"Get storage location options, filtering out Local if in Astra cloud environment.\"\"\"\n all_options = [{\"name\": \"AWS\", \"icon\": \"Amazon\"}, {\"name\": \"Google Drive\", \"icon\": \"google\"}]\n if is_astra_cloud_environment():\n return all_options\n return [{\"name\": \"Local\", \"icon\": \"hard-drive\"}, *all_options]\n\n\nclass SaveToFileComponent(Component):\n display_name = \"Write File\"\n description = \"Save data to local file, AWS S3, or Google Drive in the selected format.\"\n documentation: str = \"https://docs.langflow.org/write-file\"\n icon = \"file-text\"\n name = \"SaveToFile\"\n\n # File format options for different storage types\n LOCAL_DATA_FORMAT_CHOICES = [\"csv\", \"excel\", \"json\", \"markdown\"]\n LOCAL_MESSAGE_FORMAT_CHOICES = [\"txt\", \"json\", \"markdown\"]\n AWS_FORMAT_CHOICES = [\n \"txt\",\n \"json\",\n \"csv\",\n \"xml\",\n \"html\",\n \"md\",\n \"yaml\",\n \"log\",\n \"tsv\",\n \"jsonl\",\n \"parquet\",\n \"xlsx\",\n \"zip\",\n ]\n GDRIVE_FORMAT_CHOICES = [\"txt\", \"json\", \"csv\", \"xlsx\", \"slides\", \"docs\", \"jpg\", \"mp3\"]\n\n inputs = [\n SortableListInput(\n name=\"storage_location\",\n display_name=\"Storage Location\",\n placeholder=\"Select Location\",\n info=\"Choose where to save the file.\",\n options=_get_storage_location_options(),\n real_time_refresh=True,\n limit=1,\n value=[{\"name\": \"Local\", \"icon\": \"hard-drive\"}],\n advanced=True,\n ),\n # Common inputs\n HandleInput(\n name=\"input\",\n display_name=\"File Content\",\n info=\"The input to save.\",\n dynamic=True,\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n StrInput(\n name=\"file_name\",\n display_name=\"File Name\",\n info=\"Name file will be saved as (without extension).\",\n required=True,\n show=False,\n tool_mode=True,\n ),\n BoolInput(\n name=\"append_mode\",\n display_name=\"Append\",\n info=(\n \"Append to file if it exists (only for Local storage with plain text formats). \"\n \"Not supported for cloud storage (AWS/Google Drive).\"\n ),\n value=False,\n show=False,\n ),\n # Format inputs (dynamic based on storage location)\n DropdownInput(\n name=\"local_format\",\n display_name=\"File Format\",\n options=list(dict.fromkeys(LOCAL_DATA_FORMAT_CHOICES + LOCAL_MESSAGE_FORMAT_CHOICES)),\n info=\"Select the file format for local storage.\",\n value=\"json\",\n show=False,\n ),\n DropdownInput(\n name=\"aws_format\",\n display_name=\"File Format\",\n options=AWS_FORMAT_CHOICES,\n info=\"Select the file format for AWS S3 storage.\",\n value=\"txt\",\n show=False,\n ),\n DropdownInput(\n name=\"gdrive_format\",\n display_name=\"File Format\",\n options=GDRIVE_FORMAT_CHOICES,\n info=\"Select the file format for Google Drive storage.\",\n value=\"txt\",\n show=False,\n ),\n # AWS S3 specific inputs\n SecretStrInput(\n name=\"aws_access_key_id\",\n display_name=\"AWS Access Key ID\",\n info=\"AWS Access key ID.\",\n show=False,\n advanced=True,\n required=True,\n ),\n SecretStrInput(\n name=\"aws_secret_access_key\",\n display_name=\"AWS Secret Key\",\n info=\"AWS Secret Key.\",\n show=False,\n advanced=True,\n required=True,\n ),\n StrInput(\n name=\"bucket_name\",\n display_name=\"S3 Bucket Name\",\n info=\"Enter the name of the S3 bucket.\",\n show=False,\n advanced=True,\n required=True,\n ),\n StrInput(\n name=\"aws_region\",\n display_name=\"AWS Region\",\n info=\"AWS region (e.g., us-east-1, eu-west-1).\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"s3_prefix\",\n display_name=\"S3 Prefix\",\n info=\"Prefix for all files in S3.\",\n show=False,\n advanced=True,\n ),\n # Google Drive specific inputs\n SecretStrInput(\n name=\"service_account_key\",\n display_name=\"GCP Credentials Secret Key\",\n info=\"Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).\",\n show=False,\n advanced=True,\n required=True,\n ),\n StrInput(\n name=\"folder_id\",\n display_name=\"Google Drive Folder ID\",\n info=(\n \"The Google Drive folder ID where the file will be uploaded. \"\n \"The folder must be shared with the service account email.\"\n ),\n required=True,\n show=False,\n advanced=True,\n ),\n ]\n\n outputs = [Output(display_name=\"File Path\", name=\"message\", method=\"save_to_file\")]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Update build configuration to show/hide fields based on storage location selection.\"\"\"\n # Update options dynamically based on cloud environment\n # This ensures options are refreshed when build_config is updated\n if \"storage_location\" in build_config:\n updated_options = _get_storage_location_options()\n build_config[\"storage_location\"][\"options\"] = updated_options\n\n if field_name != \"storage_location\":\n return build_config\n\n # Extract selected storage location\n selected = [location[\"name\"] for location in field_value] if isinstance(field_value, list) else []\n\n # Hide all dynamic fields first\n dynamic_fields = [\n \"file_name\", # Common fields (input is always visible)\n \"append_mode\",\n \"local_format\",\n \"aws_format\",\n \"gdrive_format\",\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_prefix\",\n \"service_account_key\",\n \"folder_id\",\n ]\n\n for f_name in dynamic_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = False\n\n # Show fields based on selected storage location\n if len(selected) == 1:\n location = selected[0]\n\n # Show file_name when any storage location is selected\n if \"file_name\" in build_config:\n build_config[\"file_name\"][\"show\"] = True\n\n # Show append_mode only for Local storage (not supported for cloud storage)\n if \"append_mode\" in build_config:\n build_config[\"append_mode\"][\"show\"] = location == \"Local\"\n\n if location == \"Local\":\n if \"local_format\" in build_config:\n build_config[\"local_format\"][\"show\"] = True\n\n elif location == \"AWS\":\n aws_fields = [\n \"aws_format\",\n \"aws_access_key_id\",\n \"aws_secret_access_key\",\n \"bucket_name\",\n \"aws_region\",\n \"s3_prefix\",\n ]\n for f_name in aws_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n build_config[f_name][\"advanced\"] = False\n\n elif location == \"Google Drive\":\n gdrive_fields = [\"gdrive_format\", \"service_account_key\", \"folder_id\"]\n for f_name in gdrive_fields:\n if f_name in build_config:\n build_config[f_name][\"show\"] = True\n build_config[f_name][\"advanced\"] = False\n\n return build_config\n\n async def save_to_file(self) -> Message:\n \"\"\"Save the input to a file and upload it, returning a confirmation message.\"\"\"\n # Validate inputs\n if not self.file_name:\n msg = \"File name must be provided.\"\n raise ValueError(msg)\n if not self._get_input_type():\n msg = \"Input type is not set.\"\n raise ValueError(msg)\n\n # Get selected storage location\n storage_location = self._get_selected_storage_location()\n if not storage_location:\n msg = \"Storage location must be selected.\"\n raise ValueError(msg)\n\n # Check if Local storage is disabled in cloud environment\n if storage_location == \"Local\" and is_astra_cloud_environment():\n msg = \"Local storage is not available in cloud environment. Please use AWS or Google Drive.\"\n raise ValueError(msg)\n\n # Route to appropriate save method based on storage location\n if storage_location == \"Local\":\n return await self._save_to_local()\n if storage_location == \"AWS\":\n return await self._save_to_aws()\n if storage_location == \"Google Drive\":\n return await self._save_to_google_drive()\n msg = f\"Unsupported storage location: {storage_location}\"\n raise ValueError(msg)\n\n def _get_input_type(self) -> str:\n \"\"\"Determine the input type based on the provided input.\"\"\"\n # Use exact type checking (type() is) instead of isinstance() to avoid inheritance issues.\n # Since Message inherits from Data, isinstance(message, Data) would return True for Message objects,\n # causing Message inputs to be incorrectly identified as Data type.\n if type(self.input) is DataFrame:\n return \"DataFrame\"\n if type(self.input) is Message:\n return \"Message\"\n if type(self.input) is Data:\n return \"Data\"\n msg = f\"Unsupported input type: {type(self.input)}\"\n raise ValueError(msg)\n\n def _get_default_format(self) -> str:\n \"\"\"Return the default file format based on input type.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return \"csv\"\n if self._get_input_type() == \"Data\":\n return \"json\"\n if self._get_input_type() == \"Message\":\n return \"json\"\n return \"json\" # Fallback\n\n def _adjust_file_path_with_format(self, path: Path, fmt: str) -> Path:\n \"\"\"Adjust the file path to include the correct extension.\"\"\"\n file_extension = path.suffix.lower().lstrip(\".\")\n if fmt == \"excel\":\n return Path(f\"{path}.xlsx\").expanduser() if file_extension not in [\"xlsx\", \"xls\"] else path\n return Path(f\"{path}.{fmt}\").expanduser() if file_extension != fmt else path\n\n def _is_plain_text_format(self, fmt: str) -> bool:\n \"\"\"Check if a file format is plain text (supports appending).\"\"\"\n plain_text_formats = [\"txt\", \"json\", \"markdown\", \"md\", \"csv\", \"xml\", \"html\", \"yaml\", \"log\", \"tsv\", \"jsonl\"]\n return fmt.lower() in plain_text_formats\n\n async def _upload_file(self, file_path: Path) -> None:\n \"\"\"Upload the saved file using the upload_user_file service.\"\"\"\n from langflow.api.v2.files import upload_user_file\n from langflow.services.database.models.user.crud import get_user_by_id\n\n # Ensure the file exists\n if not file_path.exists():\n msg = f\"File not found: {file_path}\"\n raise FileNotFoundError(msg)\n\n # Upload the file - always use append=False because the local file already contains\n # the correct content (either new or appended locally)\n with file_path.open(\"rb\") as f:\n async with session_scope() as db:\n if not self.user_id:\n msg = \"User ID is required for file saving.\"\n raise ValueError(msg)\n current_user = await get_user_by_id(db, self.user_id)\n\n await upload_user_file(\n file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),\n session=db,\n current_user=current_user,\n storage_service=get_storage_service(),\n settings_service=get_settings_service(),\n append=False,\n )\n\n def _save_dataframe(self, dataframe: DataFrame, path: Path, fmt: str) -> str:\n \"\"\"Save a DataFrame to the specified file format.\"\"\"\n append_mode = getattr(self, \"append_mode\", False)\n should_append = append_mode and path.exists() and self._is_plain_text_format(fmt)\n\n if fmt == \"csv\":\n dataframe.to_csv(path, index=False, mode=\"a\" if should_append else \"w\", header=not should_append)\n elif fmt == \"excel\":\n dataframe.to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n if should_append:\n # Read and parse existing JSON\n existing_data = []\n try:\n existing_content = path.read_text(encoding=\"utf-8\").strip()\n if existing_content:\n parsed = json.loads(existing_content)\n # Handle case where existing content is a single object\n if isinstance(parsed, dict):\n existing_data = [parsed]\n elif isinstance(parsed, list):\n existing_data = parsed\n except (json.JSONDecodeError, FileNotFoundError):\n # Treat parse errors or missing file as empty array\n existing_data = []\n\n # Append new data\n new_records = json.loads(dataframe.to_json(orient=\"records\"))\n existing_data.extend(new_records)\n\n # Write back as a single JSON array\n path.write_text(json.dumps(existing_data, indent=2), encoding=\"utf-8\")\n else:\n dataframe.to_json(path, orient=\"records\", indent=2)\n elif fmt == \"markdown\":\n content = dataframe.to_markdown(index=False)\n if should_append:\n path.write_text(path.read_text(encoding=\"utf-8\") + \"\\n\\n\" + content, encoding=\"utf-8\")\n else:\n path.write_text(content, encoding=\"utf-8\")\n else:\n msg = f\"Unsupported DataFrame format: {fmt}\"\n raise ValueError(msg)\n action = \"appended to\" if should_append else \"saved successfully as\"\n return f\"DataFrame {action} '{path}'\"\n\n def _save_data(self, data: Data, path: Path, fmt: str) -> str:\n \"\"\"Save a Data object to the specified file format.\"\"\"\n append_mode = getattr(self, \"append_mode\", False)\n should_append = append_mode and path.exists() and self._is_plain_text_format(fmt)\n\n if fmt == \"csv\":\n pd.DataFrame(data.data).to_csv(\n path,\n index=False,\n mode=\"a\" if should_append else \"w\",\n header=not should_append,\n )\n elif fmt == \"excel\":\n pd.DataFrame(data.data).to_excel(path, index=False, engine=\"openpyxl\")\n elif fmt == \"json\":\n new_data = jsonable_encoder(data.data)\n if should_append:\n # Read and parse existing JSON\n existing_data = []\n try:\n existing_content = path.read_text(encoding=\"utf-8\").strip()\n if existing_content:\n parsed = json.loads(existing_content)\n # Handle case where existing content is a single object\n if isinstance(parsed, dict):\n existing_data = [parsed]\n elif isinstance(parsed, list):\n existing_data = parsed\n except (json.JSONDecodeError, FileNotFoundError):\n # Treat parse errors or missing file as empty array\n existing_data = []\n\n # Append new data\n if isinstance(new_data, list):\n existing_data.extend(new_data)\n else:\n existing_data.append(new_data)\n\n # Write back as a single JSON array\n path.write_text(json.dumps(existing_data, indent=2), encoding=\"utf-8\")\n else:\n content = orjson.dumps(new_data, option=orjson.OPT_INDENT_2).decode(\"utf-8\")\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"markdown\":\n content = pd.DataFrame(data.data).to_markdown(index=False)\n if should_append:\n path.write_text(path.read_text(encoding=\"utf-8\") + \"\\n\\n\" + content, encoding=\"utf-8\")\n else:\n path.write_text(content, encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Data format: {fmt}\"\n raise ValueError(msg)\n action = \"appended to\" if should_append else \"saved successfully as\"\n return f\"Data {action} '{path}'\"\n\n async def _save_message(self, message: Message, path: Path, fmt: str) -> str:\n \"\"\"Save a Message to the specified file format, handling async iterators.\"\"\"\n content = \"\"\n if message.text is None:\n content = \"\"\n elif isinstance(message.text, AsyncIterator):\n async for item in message.text:\n content += str(item) + \" \"\n content = content.strip()\n elif isinstance(message.text, Iterator):\n content = \" \".join(str(item) for item in message.text)\n else:\n content = str(message.text)\n\n append_mode = getattr(self, \"append_mode\", False)\n should_append = append_mode and path.exists() and self._is_plain_text_format(fmt)\n\n if fmt == \"txt\":\n if should_append:\n path.write_text(path.read_text(encoding=\"utf-8\") + \"\\n\" + content, encoding=\"utf-8\")\n else:\n path.write_text(content, encoding=\"utf-8\")\n elif fmt == \"json\":\n new_message = {\"message\": content}\n if should_append:\n # Read and parse existing JSON\n existing_data = []\n try:\n existing_content = path.read_text(encoding=\"utf-8\").strip()\n if existing_content:\n parsed = json.loads(existing_content)\n # Handle case where existing content is a single object\n if isinstance(parsed, dict):\n existing_data = [parsed]\n elif isinstance(parsed, list):\n existing_data = parsed\n except (json.JSONDecodeError, FileNotFoundError):\n # Treat parse errors or missing file as empty array\n existing_data = []\n\n # Append new message\n existing_data.append(new_message)\n\n # Write back as a single JSON array\n path.write_text(json.dumps(existing_data, indent=2), encoding=\"utf-8\")\n else:\n path.write_text(json.dumps(new_message, indent=2), encoding=\"utf-8\")\n elif fmt == \"markdown\":\n md_content = f\"**Message:**\\n\\n{content}\"\n if should_append:\n path.write_text(path.read_text(encoding=\"utf-8\") + \"\\n\\n\" + md_content, encoding=\"utf-8\")\n else:\n path.write_text(md_content, encoding=\"utf-8\")\n else:\n msg = f\"Unsupported Message format: {fmt}\"\n raise ValueError(msg)\n action = \"appended to\" if should_append else \"saved successfully as\"\n return f\"Message {action} '{path}'\"\n\n def _get_selected_storage_location(self) -> str:\n \"\"\"Get the selected storage location from the SortableListInput.\"\"\"\n if hasattr(self, \"storage_location\") and self.storage_location:\n if isinstance(self.storage_location, list) and len(self.storage_location) > 0:\n return self.storage_location[0].get(\"name\", \"\")\n if isinstance(self.storage_location, dict):\n return self.storage_location.get(\"name\", \"\")\n return \"\"\n\n def _get_file_format_for_location(self, location: str) -> str:\n \"\"\"Get the appropriate file format based on storage location.\"\"\"\n if location == \"Local\":\n return getattr(self, \"local_format\", None) or self._get_default_format()\n if location == \"AWS\":\n return getattr(self, \"aws_format\", \"txt\")\n if location == \"Google Drive\":\n return getattr(self, \"gdrive_format\", \"txt\")\n return self._get_default_format()\n\n async def _save_to_local(self) -> Message:\n \"\"\"Save file to local storage (original functionality).\"\"\"\n file_format = self._get_file_format_for_location(\"Local\")\n\n # Validate file format based on input type\n allowed_formats = (\n self.LOCAL_MESSAGE_FORMAT_CHOICES if self._get_input_type() == \"Message\" else self.LOCAL_DATA_FORMAT_CHOICES\n )\n if file_format not in allowed_formats:\n msg = f\"Invalid file format '{file_format}' for {self._get_input_type()}. Allowed: {allowed_formats}\"\n raise ValueError(msg)\n\n # Prepare file path\n file_path = Path(self.file_name).expanduser()\n if not file_path.parent.exists():\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path = self._adjust_file_path_with_format(file_path, file_format)\n\n # Save the input to file based on type\n if self._get_input_type() == \"DataFrame\":\n confirmation = self._save_dataframe(self.input, file_path, file_format)\n elif self._get_input_type() == \"Data\":\n confirmation = self._save_data(self.input, file_path, file_format)\n elif self._get_input_type() == \"Message\":\n confirmation = await self._save_message(self.input, file_path, file_format)\n else:\n msg = f\"Unsupported input type: {self._get_input_type()}\"\n raise ValueError(msg)\n\n # Upload the saved file\n await self._upload_file(file_path)\n\n # Return the final file path and confirmation message\n final_path = Path.cwd() / file_path if not file_path.is_absolute() else file_path\n return Message(text=f\"{confirmation} at {final_path}\")\n\n async def _save_to_aws(self) -> Message:\n \"\"\"Save file to AWS S3 using S3 functionality.\"\"\"\n import os\n\n import boto3\n\n from lfx.base.data.cloud_storage_utils import create_s3_client, validate_aws_credentials\n\n # Get AWS credentials from component inputs or fall back to environment variables\n aws_access_key_id = getattr(self, \"aws_access_key_id\", None)\n if aws_access_key_id and hasattr(aws_access_key_id, \"get_secret_value\"):\n aws_access_key_id = aws_access_key_id.get_secret_value()\n if not aws_access_key_id:\n aws_access_key_id = os.getenv(\"AWS_ACCESS_KEY_ID\")\n\n aws_secret_access_key = getattr(self, \"aws_secret_access_key\", None)\n if aws_secret_access_key and hasattr(aws_secret_access_key, \"get_secret_value\"):\n aws_secret_access_key = aws_secret_access_key.get_secret_value()\n if not aws_secret_access_key:\n aws_secret_access_key = os.getenv(\"AWS_SECRET_ACCESS_KEY\")\n\n bucket_name = getattr(self, \"bucket_name\", None)\n if not bucket_name:\n # Try to get from storage service settings\n settings = get_settings_service().settings\n bucket_name = settings.object_storage_bucket_name\n\n # Validate AWS credentials\n if not aws_access_key_id:\n msg = (\n \"AWS Access Key ID is required for S3 storage. Provide it as a component input \"\n \"or set AWS_ACCESS_KEY_ID environment variable.\"\n )\n raise ValueError(msg)\n if not aws_secret_access_key:\n msg = (\n \"AWS Secret Key is required for S3 storage. Provide it as a component input \"\n \"or set AWS_SECRET_ACCESS_KEY environment variable.\"\n )\n raise ValueError(msg)\n if not bucket_name:\n msg = (\n \"S3 Bucket Name is required for S3 storage. Provide it as a component input \"\n \"or set LANGFLOW_OBJECT_STORAGE_BUCKET_NAME environment variable.\"\n )\n raise ValueError(msg)\n\n # Validate AWS credentials\n validate_aws_credentials(self)\n\n # Create S3 client\n s3_client = create_s3_client(self)\n client_config: dict[str, Any] = {\n \"aws_access_key_id\": str(aws_access_key_id),\n \"aws_secret_access_key\": str(aws_secret_access_key),\n }\n\n # Get region from component input, environment variable, or settings\n aws_region = getattr(self, \"aws_region\", None)\n if not aws_region:\n aws_region = os.getenv(\"AWS_DEFAULT_REGION\") or os.getenv(\"AWS_REGION\")\n if aws_region:\n client_config[\"region_name\"] = str(aws_region)\n\n s3_client = boto3.client(\"s3\", **client_config)\n\n # Extract content\n content = self._extract_content_for_upload()\n file_format = self._get_file_format_for_location(\"AWS\")\n\n # Generate file path\n file_path = f\"{self.file_name}.{file_format}\"\n if hasattr(self, \"s3_prefix\") and self.s3_prefix:\n file_path = f\"{self.s3_prefix.rstrip('/')}/{file_path}\"\n\n # Create temporary file\n import tempfile\n\n with tempfile.NamedTemporaryFile(\n mode=\"w\", encoding=\"utf-8\", suffix=f\".{file_format}\", delete=False\n ) as temp_file:\n temp_file.write(content)\n temp_file_path = temp_file.name\n\n try:\n # Upload to S3\n s3_client.upload_file(temp_file_path, bucket_name, file_path)\n s3_url = f\"s3://{bucket_name}/{file_path}\"\n return Message(text=f\"File successfully uploaded to {s3_url}\")\n finally:\n # Clean up temp file\n if Path(temp_file_path).exists():\n Path(temp_file_path).unlink()\n\n async def _save_to_google_drive(self) -> Message:\n \"\"\"Save file to Google Drive using Google Drive functionality.\"\"\"\n import tempfile\n\n from googleapiclient.http import MediaFileUpload\n\n from lfx.base.data.cloud_storage_utils import create_google_drive_service\n\n # Validate Google Drive credentials\n if not getattr(self, \"service_account_key\", None):\n msg = \"GCP Credentials Secret Key is required for Google Drive storage\"\n raise ValueError(msg)\n if not getattr(self, \"folder_id\", None):\n msg = \"Google Drive Folder ID is required for Google Drive storage\"\n raise ValueError(msg)\n\n # Create Google Drive service with full drive scope (needed for folder operations)\n drive_service, credentials = create_google_drive_service(\n self.service_account_key, scopes=[\"https://www.googleapis.com/auth/drive\"], return_credentials=True\n )\n\n # Extract content and format\n content = self._extract_content_for_upload()\n file_format = self._get_file_format_for_location(\"Google Drive\")\n\n # Handle special Google Drive formats\n if file_format in [\"slides\", \"docs\"]:\n return await self._save_to_google_apps(drive_service, credentials, content, file_format)\n\n # Create temporary file\n file_path = f\"{self.file_name}.{file_format}\"\n with tempfile.NamedTemporaryFile(\n mode=\"w\",\n encoding=\"utf-8\",\n suffix=f\".{file_format}\",\n delete=False,\n ) as temp_file:\n temp_file.write(content)\n temp_file_path = temp_file.name\n\n try:\n # Upload to Google Drive\n # Note: We skip explicit folder verification since it requires broader permissions.\n # If the folder doesn't exist or isn't accessible, the create() call will fail with a clear error.\n file_metadata = {\"name\": file_path, \"parents\": [self.folder_id]}\n media = MediaFileUpload(temp_file_path, resumable=True)\n\n try:\n uploaded_file = (\n drive_service.files().create(body=file_metadata, media_body=media, fields=\"id\").execute()\n )\n except Exception as e:\n msg = (\n f\"Unable to upload file to Google Drive folder '{self.folder_id}'. \"\n f\"Error: {e!s}. \"\n \"Please ensure: 1) The folder ID is correct, 2) The folder exists, \"\n \"3) The service account has been granted access to this folder.\"\n )\n raise ValueError(msg) from e\n\n file_id = uploaded_file.get(\"id\")\n file_url = f\"https://drive.google.com/file/d/{file_id}/view\"\n return Message(text=f\"File successfully uploaded to Google Drive: {file_url}\")\n finally:\n # Clean up temp file\n if Path(temp_file_path).exists():\n Path(temp_file_path).unlink()\n\n async def _save_to_google_apps(self, drive_service, credentials, content: str, app_type: str) -> Message:\n \"\"\"Save content to Google Apps (Slides or Docs).\"\"\"\n import time\n\n if app_type == \"slides\":\n from googleapiclient.discovery import build\n\n slides_service = build(\"slides\", \"v1\", credentials=credentials)\n\n file_metadata = {\n \"name\": self.file_name,\n \"mimeType\": \"application/vnd.google-apps.presentation\",\n \"parents\": [self.folder_id],\n }\n\n created_file = drive_service.files().create(body=file_metadata, fields=\"id\").execute()\n presentation_id = created_file[\"id\"]\n\n time.sleep(2) # Wait for file to be available # noqa: ASYNC251\n\n presentation = slides_service.presentations().get(presentationId=presentation_id).execute()\n slide_id = presentation[\"slides\"][0][\"objectId\"]\n\n # Add content to slide\n requests = [\n {\n \"createShape\": {\n \"objectId\": \"TextBox_01\",\n \"shapeType\": \"TEXT_BOX\",\n \"elementProperties\": {\n \"pageObjectId\": slide_id,\n \"size\": {\n \"height\": {\"magnitude\": 3000000, \"unit\": \"EMU\"},\n \"width\": {\"magnitude\": 6000000, \"unit\": \"EMU\"},\n },\n \"transform\": {\n \"scaleX\": 1,\n \"scaleY\": 1,\n \"translateX\": 1000000,\n \"translateY\": 1000000,\n \"unit\": \"EMU\",\n },\n },\n }\n },\n {\"insertText\": {\"objectId\": \"TextBox_01\", \"insertionIndex\": 0, \"text\": content}},\n ]\n\n slides_service.presentations().batchUpdate(\n presentationId=presentation_id, body={\"requests\": requests}\n ).execute()\n file_url = f\"https://docs.google.com/presentation/d/{presentation_id}/edit\"\n\n elif app_type == \"docs\":\n from googleapiclient.discovery import build\n\n docs_service = build(\"docs\", \"v1\", credentials=credentials)\n\n file_metadata = {\n \"name\": self.file_name,\n \"mimeType\": \"application/vnd.google-apps.document\",\n \"parents\": [self.folder_id],\n }\n\n created_file = drive_service.files().create(body=file_metadata, fields=\"id\").execute()\n document_id = created_file[\"id\"]\n\n time.sleep(2) # Wait for file to be available # noqa: ASYNC251\n\n # Add content to document\n requests = [{\"insertText\": {\"location\": {\"index\": 1}, \"text\": content}}]\n docs_service.documents().batchUpdate(documentId=document_id, body={\"requests\": requests}).execute()\n file_url = f\"https://docs.google.com/document/d/{document_id}/edit\"\n\n return Message(text=f\"File successfully created in Google {app_type.title()}: {file_url}\")\n\n def _extract_content_for_upload(self) -> str:\n \"\"\"Extract content from input for upload to cloud services.\"\"\"\n if self._get_input_type() == \"DataFrame\":\n return self.input.to_csv(index=False)\n if self._get_input_type() == \"Data\":\n if hasattr(self.input, \"data\") and self.input.data:\n if isinstance(self.input.data, dict):\n import json\n\n return json.dumps(self.input.data, indent=2, ensure_ascii=False)\n return str(self.input.data)\n return str(self.input)\n if self._get_input_type() == \"Message\":\n return str(self.input.text) if self.input.text else str(self.input)\n return str(self.input)\n"
},
"file_name": {
"_input_type": "StrInput",
"advanced": false,
"display_name": "File Name",
"dynamic": false,
"info": "Name file will be saved as (without extension).",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "file_name",
"override_skip": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"tool_mode": true,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": "/data/staging_ai/rag-output/<ชื่อไฟล์>.json"
},
"folder_id": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "Google Drive Folder ID",
"dynamic": false,
"info": "The Google Drive folder ID where the file will be uploaded. The folder must be shared with the service account email.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "folder_id",
"override_skip": false,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"gdrive_format": {
"_input_type": "DropdownInput",
"advanced": false,
"combobox": false,
"dialog_inputs": {},
"display_name": "File Format",
"dynamic": false,
"external_options": {},
"info": "Select the file format for Google Drive storage.",
"name": "gdrive_format",
"options": ["txt", "json", "csv", "xlsx", "slides", "docs", "jpg", "mp3"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "txt"
},
"input": {
"_input_type": "HandleInput",
"advanced": false,
"display_name": "File Content",
"dynamic": true,
"info": "The input to save.",
"input_types": ["Data", "DataFrame", "Message"],
"list": false,
"list_add_label": "Add More",
"name": "input",
"override_skip": false,
"placeholder": "",
"required": true,
"show": true,
"title_case": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "other",
"value": ""
},
"is_refresh": false,
"local_format": {
"_input_type": "DropdownInput",
"advanced": false,
"combobox": false,
"dialog_inputs": {},
"display_name": "File Format",
"dynamic": false,
"external_options": {},
"info": "Select the file format for local storage.",
"name": "local_format",
"options": ["csv", "excel", "json", "markdown", "txt"],
"options_metadata": [],
"override_skip": false,
"placeholder": "",
"required": false,
"show": true,
"title_case": false,
"toggle": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": true,
"type": "str",
"value": "json"
},
"s3_prefix": {
"_input_type": "StrInput",
"advanced": true,
"display_name": "S3 Prefix",
"dynamic": false,
"info": "Prefix for all files in S3.",
"list": false,
"list_add_label": "Add More",
"load_from_db": false,
"name": "s3_prefix",
"override_skip": false,
"placeholder": "",
"required": false,
"show": false,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"service_account_key": {
"_input_type": "SecretStrInput",
"advanced": true,
"display_name": "GCP Credentials Secret Key",
"dynamic": false,
"info": "Your Google Cloud Platform service account JSON key as a secret string (complete JSON content).",
"input_types": [],
"load_from_db": false,
"name": "service_account_key",
"override_skip": false,
"password": true,
"placeholder": "",
"required": true,
"show": false,
"title_case": false,
"track_in_telemetry": false,
"type": "str",
"value": ""
},
"storage_location": {
"_input_type": "SortableListInput",
"advanced": true,
"display_name": "Storage Location",
"dynamic": false,
"info": "Choose where to save the file.",
"limit": 1,
"name": "storage_location",
"options": [
{
"icon": "hard-drive",
"name": "Local"
},
{
"icon": "Amazon",
"name": "AWS"
},
{
"icon": "google",
"name": "Google Drive"
}
],
"override_skip": false,
"placeholder": "Select Location",
"real_time_refresh": true,
"required": false,
"search_category": [],
"show": true,
"title_case": false,
"tool_mode": false,
"trace_as_metadata": true,
"track_in_telemetry": false,
"type": "sortableList",
"value": [
{
"chosen": false,
"icon": "hard-drive",
"name": "Local",
"selected": false
}
]
}
},
"tool_mode": false
},
"showNode": true,
"type": "SaveToFile"
},
"dragging": false,
"id": "SaveToFile-M0RUY",
"measured": {
"height": 488,
"width": 400
},
"position": {
"x": 1055.8955765668504,
"y": 296.8332683000652
},
"selected": false,
"type": "genericNode"
}
],
"viewport": {
"x": -209.03647850835887,
"y": 116.68545635016744,
"zoom": 0.954841619659289
}
},
"description": "Language Models, Mapped and Mastered.",
"endpoint_name": null,
"id": "4a538191-04b4-41cf-98d7-8e62aaccf3a8",
"is_component": false,
"last_tested_version": "1.8.0",
"locked": false,
"name": "OpenRAG V0.1",
"tags": []
}