Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

{
  "servers": [
    {
      "host": "localhost",
      "port": 8125
    }
  ],
  "connectTimeout": 1000,
  "readTimeout": 1000,
  "sourceField": [
    "tika",
    "other"
  ],
  "multiSourceFieldSeparator": " ",
  "output": "bertService",
  "chunkExpansion": {
    "append": "fieldA",
    "prepend": "fieldB",
    "separator": " - "
  },
  "chunkerType": "SIMPLE",
  "single": true,
  "maxChunks": 10,
  "minChunkSize": 25,
  "maxChunkSize": 100,
  "removePunctuation": true,
  "breakOnBlankLine": true,
  "lineLengthThreshold": 100,
  "htmlTags": [
    "<p>p",
"</p>"   ],
  "name": "BERT Service Processor",
  "active": true,
  "id": "b25f9a02-a8ca-471c-858e-51853c9e76a6",
  "type": "tika-processor"
}

...

(Optional, String) separator to join multiple source fields. Default is an empty space.

outputFieldoutput

(Optional, String) field where extracted content should be placed. Default is "bertService".

...