{
  "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.",
  "type" : "scalar",
  "name" : "chunk",
  "description" : "Use `CHUNK` to split a text field into smaller chunks.",
  "signatures" : [
    {
      "params" : [
        {
          "name" : "field",
          "type" : "keyword",
          "optional" : false,
          "description" : "The input to chunk."
        }
      ],
      "variadic" : false,
      "returnType" : "keyword"
    },
    {
      "params" : [
        {
          "name" : "field",
          "type" : "keyword",
          "optional" : false,
          "description" : "The input to chunk."
        },
        {
          "name" : "chunking_settings",
          "type" : "function_named_parameters",
          "mapParams" : "{name='separator_group', values=[markdown, plaintext], description='Sets a predefined lists of separators based on the selected text type. Values may be `markdown` or `plaintext`.\nOnly applicable to the `recursive` chunking strategy. When using the `recursive` chunking strategy one of\n`separators` or `separator_group` must be specified.\n', type=[keyword]}, {name='overlap', values=[0], description='The number of overlapping words for chunks. It is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.\n', type=[integer]}, {name='sentence_overlap', values=[1, 0], description='The number of overlapping sentences for chunks. It is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.\n', type=[integer]}, {name='strategy', values=[sentence, word, none, recursive], description='The chunking strategy to use. Default value is `sentence`.', type=[keyword]}, {name='max_chunk_size', values=[300], description='The maximum size of a chunk in words. This value cannot be lower than `20` (for `sentence` strategy)\nor `10` (for `word` or `recursive` strategies). This model should not exceed the window size for any\nassociated models using the output of this function.\n', type=[integer]}, {name='separators', values=[(?<!\\n)\\n\\n(?!\\n), (?<!\\n)\\n(?!\\n)], description='A list of strings used as possible split points when chunking text. Each string can be a plain string or a\nregular expression (regex) pattern. The system tries each separator in order to split the text, starting from\nthe first item in the list. After splitting, it attempts to recombine smaller pieces into larger chunks that stay\nwithin the `max_chunk_size` limit, to reduce the total number of chunks generated. Only applicable to the\n`recursive` chunking strategy. When using the `recursive` chunking strategy one of `separators` or `separator_group`\nmust be specified.\n', type=[keyword]}",
          "optional" : true,
          "description" : "Options to customize chunking behavior. Defaults to {\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}."
        }
      ],
      "variadic" : false,
      "returnType" : "keyword"
    },
    {
      "params" : [
        {
          "name" : "field",
          "type" : "text",
          "optional" : false,
          "description" : "The input to chunk."
        }
      ],
      "variadic" : false,
      "returnType" : "keyword"
    },
    {
      "params" : [
        {
          "name" : "field",
          "type" : "text",
          "optional" : false,
          "description" : "The input to chunk."
        },
        {
          "name" : "chunking_settings",
          "type" : "function_named_parameters",
          "mapParams" : "{name='separator_group', values=[markdown, plaintext], description='Sets a predefined lists of separators based on the selected text type. Values may be `markdown` or `plaintext`.\nOnly applicable to the `recursive` chunking strategy. When using the `recursive` chunking strategy one of\n`separators` or `separator_group` must be specified.\n', type=[keyword]}, {name='overlap', values=[0], description='The number of overlapping words for chunks. It is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.\n', type=[integer]}, {name='sentence_overlap', values=[1, 0], description='The number of overlapping sentences for chunks. It is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.\n', type=[integer]}, {name='strategy', values=[sentence, word, none, recursive], description='The chunking strategy to use. Default value is `sentence`.', type=[keyword]}, {name='max_chunk_size', values=[300], description='The maximum size of a chunk in words. This value cannot be lower than `20` (for `sentence` strategy)\nor `10` (for `word` or `recursive` strategies). This model should not exceed the window size for any\nassociated models using the output of this function.\n', type=[integer]}, {name='separators', values=[(?<!\\n)\\n\\n(?!\\n), (?<!\\n)\\n(?!\\n)], description='A list of strings used as possible split points when chunking text. Each string can be a plain string or a\nregular expression (regex) pattern. The system tries each separator in order to split the text, starting from\nthe first item in the list. After splitting, it attempts to recombine smaller pieces into larger chunks that stay\nwithin the `max_chunk_size` limit, to reduce the total number of chunks generated. Only applicable to the\n`recursive` chunking strategy. When using the `recursive` chunking strategy one of `separators` or `separator_group`\nmust be specified.\n', type=[keyword]}",
          "optional" : true,
          "description" : "Options to customize chunking behavior. Defaults to {\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}."
        }
      ],
      "variadic" : false,
      "returnType" : "keyword"
    }
  ],
  "examples" : [
    "ROW result = CHUNK(\"It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief.\", {\"strategy\": \"word\", \"max_chunk_size\": 10, \"overlap\": 1})\n| MV_EXPAND result"
  ],
  "preview" : true,
  "snapshot_only" : false
}
