Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

This is configured at the seed and allows queries, filters and aggregations on the target repository. For this to be used you MUST set the "scanAction" on the seed to "query" and also provide a query in the /seed/scan configuration.

Important: The query scan action doesn't support projections or aggregations just yet. See PDP-425 and PDP-427 for details.

Example seed that pulls documents with no "Content-Type" header:

...

(Optional, JSON) A JSON object with the scroll properties.

Examples

Simple example to extract all content from bucket:

{
  "seed": {
    "servers": [
      {
        "host": "localhost",
        "port": 8081
      }
    ],
    "scan": {
      "bucket": "staging_source"
    }
  },
  "type": "staging-connector",
  "name": "Staging connector",
  "active": true,
  "processAction": "process",
  "scanAction": "scan",
  "batchSize": 25,
  "pipelineId": "1dc3b770-db77-4009-a20d-40d1aa5c3c85"
}

Filter

Use filter in query to extract a record with specific payload value

Filter object:

"query": {
    "filter": {
        "equals": {
            "fieldName": "content.payload",
            "value": "2vnBU7jguQYM0lfTDf1BHb0aRJYF8sKcriFFx29rqn4sbnrCqF50mYBX6h7C2WwrOx1b7bHJEwPzEPMxUNWIlqA6waWasuFjhnPyHxT6EihhQFyyebiE8dfj0CXD8vtH"
        }
    }
}

Filter object in seed configuration:

{
      "type": "staging-connector",
      "id": "5e50d861-ec81-48f6-bd4a-2dd498a43c61",
      "creationTimestamp": 1685999446303,
      "lastUpdatedTimestamp": 1685999446303,
      "name": "Staging connector Test",
      "description": null,
      "labels": {},
      "active": true,
      "config": {
        "seed": {
          "servers": [
            {
              "host": "localhost",
              "port": 8081
            }
          ],
          "scan": {
            "bucket": "target-bucket",
            "scroll": {
              "size": 10
            },
            "query": {
              "filter": {
                "equals": {
                  "fieldName": "content.payload",
                  "value": "2vnBU7jguQYM0lfTDf1BHb0aRJYF8sKcriFFx29rqn4sbnrCqF50mYBX6h7C2WwrOx1b7bHJEwPzEPMxUNWIlqA6waWasuFjhnPyHxT6EihhQFyyebiE8dfj0CXD8vtH"
                }
              }
            }
          }
        }
      },
      "pipelineId": "019d2492-6712-4444-8703-7fcc70998b34",
      "credentialId": null,
      "scanAction": "query",
      "processAction": "process",
      "batchSize": 25,
      "properties": null,
      "idPrefix": null,
      "hashRecordIds": false,
      "jobRetries": 2,
      "erroredRecordThreshold": 0.95,
      "jobTimeout": "PT24H"
    }

Projection

Use projection to exclude a field

Projection object:

"projection": {
    "fields": ["content.counter"],
    "type": "EXCLUDE"
}

Query with projection object:

{
      "type": "staging-connector",
      "id": "5e50d861-ec81-48f6-bd4a-2dd498a43c61",
      "creationTimestamp": 1685999446303,
      "lastUpdatedTimestamp": 1686001568951,
      "name": "Staging connector Test",
      "description": null,
      "labels": {},
      "active": true,
      "config": {
        "seed": {
          "servers": [
            {
              "host": "localhost",
              "port": 8081
            }
          ],
          "scan": {
            "bucket": "target-bucket",
            "scroll": {
              "size": 10
            },
            "query": {
              "projection": {
                "fields": ["content.counter"],
                "type": "EXCLUDE"
              },
              "filter": {
                "equals": {
                  "fieldName": "content.payload",
                  "value": "2vnBU7jguQYM0lfTDf1BHb0aRJYF8sKcriFFx29rqn4sbnrCqF50mYBX6h7C2WwrOx1b7bHJEwPzEPMxUNWIlqA6waWasuFjhnPyHxT6EihhQFyyebiE8dfj0CXD8vtH"
                }
              }
            }
          }
        }
      },
      "pipelineId": "019d2492-6712-4444-8703-7fcc70998b34",
      "credentialId": null,
      "scanAction": "query",
      "processAction": "process",
      "batchSize": 25,
      "properties": null,
      "idPrefix": null,
      "hashRecordIds": false,
      "jobRetries": 2,
      "erroredRecordThreshold": 0.95,
      "jobTimeout": "PT24H"
    }

Aggregate

Use aggregation to group by date

{
    "group": {
        "fieldName": "content.generatedAt"
    }
}

Query with group object

{
  "type": "staging-connector",
  "id": "5e50d861-ec81-48f6-bd4a-2dd498a43c61",
  "creationTimestamp": 1685999446303,
  "lastUpdatedTimestamp": 1686066679572,
  "name": "Staging connector Test",
  "description": null,
  "labels": {},
  "active": true,
  "config": {
    "seed": {
      "servers": [
        {
          "host": "localhost",
          "port": 8081
        }
      ],
      "scan": {
        "bucket": "target-bucket",
        "scroll": {
          "size": 10
        },
        "query": {
          "filter": {
            "exists": {
              "fieldName": "content.generatedAt",
              "present": true
            }
          },
          "aggregate": [
            {
              "group": {
                "fieldName": "content.generatedAt"
              }
            }
          ]
        }
      }
    }
  },
  "pipelineId": "019d2492-6712-4444-8703-7fcc70998b34",
  "credentialId": null,
  "scanAction": "query",
  "processAction": "process",
  "batchSize": 25,
  "properties": null,
  "idPrefix": null,
  "hashRecordIds": false,
  "jobRetries": 2,
  "erroredRecordThreshold": 0.95,
  "jobTimeout": "PT24H"
}

Known limitations

Lookup action

...