n8n-workflows/workflows/Vector Database as a Big Data Analysis Tool for AI Agents [2_2 KNN] (1).json

{
  "id": "itzURpN5wbUNOXOw",
  "meta": {
    "instanceId": "205b3bc06c96f2dc835b4f00e1cbf9a937a74eeb3b47c99d0c30b0586dbf85aa"
  },
  "name": "[2/2] KNN classifier (lands dataset)",
  "tags": [
    {
      "id": "QN7etptCmdcGIpkS",
      "name": "classifier",
      "createdAt": "2024-12-08T22:08:15.968Z",
      "updatedAt": "2024-12-09T19:25:04.113Z"
    }
  ],
  "nodes": [
    {
      "id": "33373ccb-164e-431c-8a9a-d68668fc70be",
      "name": "Embed image",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -140,
        -240
      ],
      "parameters": {
        "url": "https://api.voyageai.com/v1/multimodalembeddings",
        "method": "POST",
        "options": {},
        "jsonBody": "={{\n{\n \"inputs\": [\n {\n \"content\": [\n {\n \"type\": \"image_url\",\n \"image_url\": $json.imageURL\n }\n ]\n }\n ],\n \"model\": \"voyage-multimodal-3\",\n \"input_type\": \"document\"\n}\n}}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "Vb0RNVDnIHmgnZOP",
          "name": "Voyage API"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "58adecfa-45c7-4928-b850-053ea6f3b1c5",
      "name": "Query Qdrant",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        440,
        -240
      ],
      "parameters": {
        "url": "={{ $json.qdrantCloudURL }}/collections/{{ $json.collectionName }}/points/query",
        "method": "POST",
        "options": {},
        "jsonBody": "={{\n{\n \"query\": $json.ImageEmbedding,\n \"using\": \"voyage\",\n \"limit\": $json.limitKNN,\n \"with_payload\": true\n}\n}}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "qdrantApi"
      },
      "credentials": {
        "qdrantApi": {
          "id": "it3j3hP9FICqhgX6",
          "name": "QdrantApi account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "258026b7-2dda-4165-bfe1-c4163b9caf78",
      "name": "Majority Vote",
      "type": "n8n-nodes-base.code",
      "position": [
        840,
        -240
      ],
      "parameters": {
        "language": "python",
        "pythonCode": "from collections import Counter\n\ninput_json = _input.all()[0]\npoints = input_json['json']['result']['points']\nmajority_vote_two_most_common = Counter([point[\"payload\"][\"landscape_name\"] for point in points]).most_common(2)\n\nreturn [{\n \"json\": {\n \"result\": majority_vote_two_most_common \n }\n}]\n"
      },
      "typeVersion": 2
    },
    {
      "id": "e83e7a0c-cb36-46d0-8908-86ee1bddf638",
      "name": "Increase limitKNN",
      "type": "n8n-nodes-base.set",
      "position": [
        1240,
        -240
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "0b5d257b-1b27-48bc-bec2-78649bc844cc",
              "name": "limitKNN",
              "type": "number",
              "value": "={{ $('Propagate loop variables').item.json.limitKNN + 5}}"
            },
            {
              "id": "afee4bb3-f78b-4355-945d-3776e33337a4",
              "name": "ImageEmbedding",
              "type": "array",
              "value": "={{ $('Qdrant variables + embedding + KNN neigbours').first().json.ImageEmbedding }}"
            },
            {
              "id": "701ed7ba-d112-4699-a611-c0c134757a6c",
              "name": "qdrantCloudURL",
              "type": "string",
              "value": "={{ $('Qdrant variables + embedding + KNN neigbours').first().json.qdrantCloudURL }}"
            },
            {
              "id": "f5612f78-e7d8-4124-9c3a-27bd5870c9bf",
              "name": "collectionName",
              "type": "string",
              "value": "={{ $('Qdrant variables + embedding + KNN neigbours').first().json.collectionName }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "8edbff53-cba6-4491-9d5e-bac7ad6db418",
      "name": "Propagate loop variables",
      "type": "n8n-nodes-base.set",
      "position": [
        640,
        -240
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "880838bf-2be2-4f5f-9417-974b3cbee163",
              "name": "=limitKNN",
              "type": "number",
              "value": "={{ $json.result.points.length}}"
            },
            {
              "id": "5fff2bea-f644-4fd9-ad04-afbecd19a5bc",
              "name": "result",
              "type": "object",
              "value": "={{ $json.result }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "6fad4cc0-f02c-429d-aa4e-0d69ebab9d65",
      "name": "Image Test URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -320,
        -240
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "46ceba40-fb25-450c-8550-d43d8b8aa94c",
              "name": "imageURL",
              "type": "string",
              "value": "={{ $json.query.imageURL }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "f02e79e2-32c8-4af0-8bf9-281119b23cc0",
      "name": "Return class",
      "type": "n8n-nodes-base.set",
      "position": [
        1240,
        0
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "bd8ca541-8758-4551-b667-1de373231364",
              "name": "class",
              "type": "string",
              "value": "={{ $json.result[0][0] }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "83ca90fb-d5d5-45f4-8957-4363a4baf8ed",
      "name": "Check tie",
      "type": "n8n-nodes-base.if",
      "position": [
        1040,
        -240
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "980663f6-9d7d-4e88-87b9-02030882472c",
              "operator": {
                "type": "number",
                "operation": "gt"
              },
              "leftValue": "={{ $json.result.length }}",
              "rightValue": 1
            },
            {
              "id": "9f46fdeb-0f89-4010-99af-624c1c429d6a",
              "operator": {
                "type": "number",
                "operation": "equals"
              },
              "leftValue": "={{ $json.result[0][1] }}",
              "rightValue": "={{ $json.result[1][1] }}"
            },
            {
              "id": "c59bc4fe-6821-4639-8595-fdaf4194c1e1",
              "operator": {
                "type": "number",
                "operation": "lte"
              },
              "leftValue": "={{ $('Propagate loop variables').item.json.limitKNN }}",
              "rightValue": 100
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "847ced21-4cfd-45d8-98fa-b578adc054d6",
      "name": "Qdrant variables + embedding + KNN neigbours",
      "type": "n8n-nodes-base.set",
      "position": [
        120,
        -240
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "de66070d-5e74-414e-8af7-d094cbc26f62",
              "name": "ImageEmbedding",
              "type": "array",
              "value": "={{ $json.data[0].embedding }}"
            },
            {
              "id": "58b7384d-fd0c-44aa-9f8e-0306a99be431",
              "name": "qdrantCloudURL",
              "type": "string",
              "value": "=https://152bc6e2-832a-415c-a1aa-fb529f8baf8d.eu-central-1-0.aws.cloud.qdrant.io"
            },
            {
              "id": "e34c4d88-b102-43cc-a09e-e0553f2da23a",
              "name": "collectionName",
              "type": "string",
              "value": "=land-use"
            },
            {
              "id": "db37e18d-340b-4624-84f6-df993af866d6",
              "name": "limitKNN",
              "type": "number",
              "value": "=10"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "d1bc4edc-37d2-43ac-8d8b-560453e68d1f",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -940,
        -120
      ],
      "parameters": {
        "color": 6,
        "width": 320,
        "height": 540,
        "content": "Here we're classifying existing types of satellite imagery of land types:\n- 'agricultural',\n- 'airplane',\n- 'baseballdiamond',\n- 'beach',\n- 'buildings',\n- 'chaparral',\n- 'denseresidential',\n- 'forest',\n- 'freeway',\n- 'golfcourse',\n- 'harbor',\n- 'intersection',\n- 'mediumresidential',\n- 'mobilehomepark',\n- 'overpass',\n- 'parkinglot',\n- 'river',\n- 'runway',\n- 'sparseresidential',\n- 'storagetanks',\n- 'tenniscourt'\n"
      },
      "typeVersion": 1
    },
    {
      "id": "13560a31-3c72-43b8-9635-3f9ca11f23c9",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -520,
        -460
      ],
      "parameters": {
        "color": 6,
        "content": "I tested this KNN classifier on a whole `test` set of a dataset (it's not a part of the collection, only `validation` + `train` parts). Accuracy of classification on `test` is **93.24%**, no fine-tuning, no metric learning."
      },
      "typeVersion": 1
    },
    {
      "id": "8c9dcbcb-a1ad-430f-b7dd-e19b5645b0f6",
      "name": "Execute Workflow Trigger",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -520,
        -240
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "b36fb270-2101-45e9-bb5c-06c4e07b769c",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1080,
        -520
      ],
      "parameters": {
        "width": 460,
        "height": 380,
        "content": "## KNN classification workflow-tool\n### This n8n template takes an image URL (as anomaly detection tool does), and as output, it returns a class of the object on the image (out of land types list)\n\n* An image URL is received via the Execute Workflow Trigger, which is then sent to the Voyage.ai Multimodal Embeddings API to fetch its embedding.\n* The image's embedding vector is then used to query Qdrant, returning a set of X similar images with pre-labeled classes.\n* Majority voting is done for classes of neighbouring images.\n* A loop is used to resolve scenarios where there is a tie in Majority Voting (for example, we have 5 \"forest\" and 5 \"beach\"), and we increase the number of neighbours to retrieve.\n* When the loop finally resolves, the identified class is returned to the calling workflow."
      },
      "typeVersion": 1
    },
    {
      "id": "51ece7fc-fd85-4d20-ae26-4df2d3893251",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        120,
        -40
      ],
      "parameters": {
        "height": 200,
        "content": "Variables define another Qdrant's collection with landscapes (uploaded similarly as the crops collection, don't forget to switch it with your data) + amount of neighbours **limitKNN** in the database we'll use for an input image classification."
      },
      "typeVersion": 1
    },
    {
      "id": "7aad5904-eb0b-4389-9d47-cc91780737ba",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -180,
        -60
      ],
      "parameters": {
        "height": 80,
        "content": "Similarly to anomaly detection tool, we're embedding input image with the Voyage model"
      },
      "typeVersion": 1
    },
    {
      "id": "d3702707-ee4a-481f-82ca-d9386f5b7c8a",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        440,
        -500
      ],
      "parameters": {
        "width": 740,
        "height": 200,
        "content": "## Tie loop\nHere we're [querying](https://api.qdrant.tech/api-reference/search/query-points) Qdrant, getting **limitKNN** nearest neighbours to our image <*Query Qdrant node*>, parsing their classes from payloads (images were pre-labeled & uploaded with their labels to Qdrant) & calculating the most frequent class name <*Majority Vote node*>. If there is a tie <*check tie node*> in 2 most common classes, for example, we have 5 \"forest\" and 5 \"harbor\", we repeat the procedure with the number of neighbours increased by 5 <*propagate loop variables node* and *increase limitKNN node*>.\nIf there is no tie, or we have already checked 100 neighbours, we exit the loop <*check tie node*> and return the class-answer."
      },
      "typeVersion": 1
    },
    {
      "id": "d26911bb-0442-4adc-8511-7cec2d232393",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1240,
        160
      ],
      "parameters": {
        "height": 80,
        "content": "Here, we extract the name of the input image class decided by the Majority Vote\n"
      },
      "typeVersion": 1
    },
    {
      "id": "84ffc859-1d5c-4063-9051-3587f30a0017",
      "name": "Sticky Note10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -520,
        80
      ],
      "parameters": {
        "color": 4,
        "width": 540,
        "height": 260,
        "content": "### KNN (k nearest neighbours) classification\n1. The first pipeline is uploading (lands) dataset to Qdrant's collection.\n2. **This is the KNN classifier tool, which takes any image as input and classifies it based on queries to the Qdrant (lands) collection.**\n\n### To recreate it\nYou'll have to upload [lands](https://www.kaggle.com/datasets/apollo2506/landuse-scene-classification) dataset from Kaggle to your own Google Storage bucket, and re-create APIs/connections to [Qdrant Cloud](https://qdrant.tech/documentation/quickstart-cloud/) (you can use **Free Tier** cluster), Voyage AI API & Google Cloud Storage\n\n**In general, pipelines are adaptable to any dataset of images**\n"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {
    "Execute Workflow Trigger": [
      {
        "json": {
          "query": {
            "imageURL": "https://storage.googleapis.com/n8n-qdrant-demo/land-use/images_train_test_val/test/buildings/buildings_000323.png"
          }
        }
      }
    ]
  },
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "c8cfe732-fd78-4985-9540-ed8cb2de7ef3",
  "connections": {
    "Check tie": {
      "main": [
        [
          {
            "node": "Increase limitKNN",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Return class",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embed image": {
      "main": [
        [
          {
            "node": "Qdrant variables + embedding + KNN neigbours",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Query Qdrant": {
      "main": [
        [
          {
            "node": "Propagate loop variables",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Majority Vote": {
      "main": [
        [
          {
            "node": "Check tie",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Image Test URL": {
      "main": [
        [
          {
            "node": "Embed image",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Increase limitKNN": {
      "main": [
        [
          {
            "node": "Query Qdrant",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Execute Workflow Trigger": {
      "main": [
        [
          {
            "node": "Image Test URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Propagate loop variables": {
      "main": [
        [
          {
            "node": "Majority Vote",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Qdrant variables + embedding + KNN neigbours": {
      "main": [
        [
          {
            "node": "Query Qdrant",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}