1023 lines
39 KiB
JSON
1023 lines
39 KiB
JSON
{
|
||
"nodes": [
|
||
{
|
||
"id": "6cdc45e5-1fa4-47fe-b80a-0e1560996936",
|
||
"name": "Text",
|
||
"type": "@n8n/n8n-nodes-langchain.toolWorkflow",
|
||
"position": [
|
||
1460,
|
||
980
|
||
],
|
||
"parameters": {
|
||
"name": "text_retrieval_tool",
|
||
"source": "parameter",
|
||
"description": "Call this tool to return all text from the given website. Query should be full website URL.",
|
||
"workflowJson": "{\n \"nodes\": [\n {\n \"parameters\": {},\n \"id\": \"05107436-c9cb-419b-ae8a-b74d309a130d\",\n \"name\": \"Execute workflow\",\n \"type\": \"n8n-nodes-base.manualTrigger\",\n \"typeVersion\": 1,\n \"position\": [\n 2220,\n 620\n ]\n },\n {\n \"parameters\": {\n \"assignments\": {\n \"assignments\": [\n {\n \"id\": \"253c2b17-c749-4f0a-93e8-5ff74f1ce49b\",\n \"name\": \"domain\",\n \"value\": \"={{ $json.query }}\",\n \"type\": \"string\"\n }\n ]\n },\n \"options\": {}\n },\n \"id\": \"bb8be616-3227-4705-8520-1827069faacd\",\n \"name\": \"Set domain\",\n \"type\": \"n8n-nodes-base.set\",\n \"typeVersion\": 3.3,\n \"position\": [\n 2440,\n 620\n ]\n },\n {\n \"parameters\": {\n \"assignments\": {\n \"assignments\": [\n {\n \"id\": \"ed0f1505-82b6-4393-a0d8-088055137ec9\",\n \"name\": \"domain\",\n \"value\": \"={{ $json.domain.startsWith(\\\"http\\\") ? $json.domain : \\\"http://\\\" + $json.domain }}\",\n \"type\": \"string\"\n }\n ]\n },\n \"options\": {}\n },\n \"id\": \"bdf29340-f135-489f-848e-1c7fa43a01df\",\n \"name\": \"Add protocool to domain\",\n \"type\": \"n8n-nodes-base.set\",\n \"typeVersion\": 3.3,\n \"position\": [\n 2640,\n 620\n ]\n },\n {\n \"parameters\": {\n \"assignments\": {\n \"assignments\": [\n {\n \"id\": \"2b1c7ff8-06a7-448b-99b7-5ede4b2e0bf0\",\n \"name\": \"response\",\n \"value\": \"={{ $json.data }}\",\n \"type\": \"string\"\n }\n ]\n },\n \"options\": {}\n },\n \"id\": \"9f0aa264-08c1-459a-bb99-e28599fe8f76\",\n \"name\": \"Set response\",\n \"type\": \"n8n-nodes-base.set\",\n \"typeVersion\": 3.3,\n \"position\": [\n 3300,\n 620\n ]\n },\n {\n \"parameters\": {\n \"url\": \"={{ $json.domain }}\",\n \"options\": {}\n },\n \"id\": \"cec7c8e8-bf5e-43d5-aa41-876293dbec78\",\n \"name\": \"Get website\",\n \"type\": \"n8n-nodes-base.httpRequest\",\n \"typeVersion\": 4.2,\n \"position\": [\n 2860,\n 620\n ]\n },\n {\n \"parameters\": {\n \"html\": \"={{ $json.data }}\",\n \"options\": {\n \"ignore\": \"a,img\"\n }\n },\n \"id\": \"1af94fcb-bca3-45c4-9277-18878c75d417\",\n \"name\": \"Convert HTML to Markdown\",\n \"type\": \"n8n-nodes-base.markdown\",\n \"typeVersion\": 1,\n \"position\": [\n 3080,\n 620\n ]\n }\n ],\n \"connections\": {\n \"Execute workflow\": {\n \"main\": [\n [\n {\n \"node\": \"Set domain\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Set domain\": {\n \"main\": [\n [\n {\n \"node\": \"Add protocool to domain\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Add protocool to domain\": {\n \"main\": [\n [\n {\n \"node\": \"Get website\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Get website\": {\n \"main\": [\n [\n {\n \"node\": \"Convert HTML to Markdown\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Convert HTML to Markdown\": {\n \"main\": [\n [\n {\n \"node\": \"Set response\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n }\n },\n \"pinData\": {}\n}",
|
||
"requestOptions": {}
|
||
},
|
||
"typeVersion": 1.1
|
||
},
|
||
{
|
||
"id": "af8efccb-ba3c-44de-85f7-b932d7a2e3ca",
|
||
"name": "URLs",
|
||
"type": "@n8n/n8n-nodes-langchain.toolWorkflow",
|
||
"position": [
|
||
1640,
|
||
980
|
||
],
|
||
"parameters": {
|
||
"name": "url_retrieval_tool",
|
||
"source": "parameter",
|
||
"description": "Call this tool to return all URLs from the given website. Query should be full website URL.",
|
||
"workflowJson": "{\n \"nodes\": [\n {\n \"parameters\": {},\n \"id\": \"05107436-c9cb-419b-ae8a-b74d309a130d\",\n \"name\": \"Execute workflow\",\n \"type\": \"n8n-nodes-base.manualTrigger\",\n \"typeVersion\": 1,\n \"position\": [\n 2200,\n 740\n ]\n },\n {\n \"parameters\": {\n \"operation\": \"extractHtmlContent\",\n \"extractionValues\": {\n \"values\": [\n {\n \"key\": \"output\",\n \"cssSelector\": \"a\",\n \"returnValue\": \"attribute\",\n \"returnArray\": true\n }\n ]\n },\n \"options\": {}\n },\n \"id\": \"1972e13e-d923-45e8-9752-e4bf45faaccf\",\n \"name\": \"Retrieve URLs\",\n \"type\": \"n8n-nodes-base.html\",\n \"typeVersion\": 1.2,\n \"position\": [\n 3060,\n 740\n ]\n },\n {\n \"parameters\": {\n \"fieldToSplitOut\": \"output\",\n \"options\": {}\n },\n \"id\": \"19703fbc-05ff-4d80-ab53-85ba6d39fc3f\",\n \"name\": \"Split out URLs\",\n \"type\": \"n8n-nodes-base.splitOut\",\n \"typeVersion\": 1,\n \"position\": [\n 3280,\n 740\n ]\n },\n {\n \"parameters\": {\n \"compare\": \"selectedFields\",\n \"fieldsToCompare\": \"href\",\n \"options\": {}\n },\n \"id\": \"5cc988e7-de9b-4177-b5e7-edb3842202c8\",\n \"name\": \"Remove duplicated\",\n \"type\": \"n8n-nodes-base.removeDuplicates\",\n \"typeVersion\": 1,\n \"position\": [\n 3720,\n 740\n ]\n },\n {\n \"parameters\": {\n \"assignments\": {\n \"assignments\": [\n {\n \"id\": \"04ced063-09f0-496c-9b28-b8095f9e2297\",\n \"name\": \"href\",\n \"value\": \"={{ $json.href.startsWith(\\\"/\\\") ? $('Add protocool to domain (URL)').item.json[\\\"domain\\\"] + $json.href : $json.href }}\",\n \"type\": \"string\"\n }\n ]\n },\n \"includeOtherFields\": true,\n \"include\": \"selected\",\n \"includeFields\": \"title\",\n \"options\": {}\n },\n \"id\": \"4715a25d-93a7-4056-8768-e3f886a1a0c9\",\n \"name\": \"Set domain to path\",\n \"type\": \"n8n-nodes-base.set\",\n \"typeVersion\": 3.3,\n \"position\": [\n 3940,\n 740\n ]\n },\n {\n \"parameters\": {\n \"conditions\": {\n \"options\": {\n \"caseSensitive\": true,\n \"leftValue\": \"\",\n \"typeValidation\": \"strict\"\n },\n \"conditions\": [\n {\n \"id\": \"d01ea6a8-7e75-40d4-98f2-25d42b245f36\",\n \"leftValue\": \"={{ $json.href.isUrl() }}\",\n \"rightValue\": \"\",\n \"operator\": {\n \"type\": \"boolean\",\n \"operation\": \"true\",\n \"singleValue\": true\n }\n }\n ],\n \"combinator\": \"and\"\n },\n \"options\": {}\n },\n \"id\": \"353deefb-ae69-440c-95b6-fdadacf4bf91\",\n \"name\": \"Filter out invalid URLs\",\n \"type\": \"n8n-nodes-base.filter\",\n \"typeVersion\": 2,\n \"position\": [\n 4160,\n 740\n ]\n },\n {\n \"parameters\": {\n \"aggregate\": \"aggregateAllItemData\",\n \"include\": \"specifiedFields\",\n \"fieldsToInclude\": \"title,href\",\n \"options\": {}\n },\n \"id\": \"9f87be8c-72d7-4ab1-b297-dc7069b2dd11\",\n \"name\": \"Aggregate URLs\",\n \"type\": \"n8n-nodes-base.aggregate\",\n \"typeVersion\": 1,\n \"position\": [\n 4380,\n 740\n ]\n },\n {\n \"parameters\": {\n \"conditions\": {\n \"options\": {\n \"caseSensitive\": true,\n \"leftValue\": \"\",\n \"typeValidation\": \"strict\"\n },\n \"conditions\": [\n {\n \"id\": \"5b9b7353-bd04-4af2-9480-8de135ff4223\",\n \"leftValue\": \"={{ $json.href }}\",\n \"rightValue\": \"\",\n \"operator\": {\n \"type\": \"string\",\n \"operation\": \"exists\",\n \"singleValue\": true\n }\n }\n ],\n \"combinator\": \"and\"\n },\n \"options\": {}\n },\n \"id\": \"35c8323a-5350-403a-9c2d-114b0527e395\",\n \"name\": \"Filter out empty hrefs\",\n \"type\": \"n8n-nodes-base.filter\",\n \"typeVersion\": 2,\n \"position\": [\n 3500,\n 740\n ]\n },\n {\n \"parameters\": {\n \"assignments\": {\n \"assignments\": [\n {\n \"id\": \"253c2b17-c749-4f0a-93e8-5ff74f1ce49b\",\n \"name\": \"domain\",\n \"value\": \"={{ $json.query }}\",\n \"type\": \"string\"\n }\n ]\n },\n \"options\": {}\n },\n \"id\": \"d9f6a148-6c8c-4a58-89f5-4e9cfcd8d910\",\n \"name\": \"Set domain (URL)\",\n \"type\": \"n8n-nodes-base.set\",\n \"typeVersion\": 3.3,\n \"position\": [\n 2400,\n 740\n ]\n },\n {\n \"parameters\": {\n \"assignments\": {\n \"assignments\": [\n {\n \"id\": \"ed0f1505-82b6-4393-a0d8-088055137ec9\",\n \"name\": \"domain\",\n \"value\": \"={{ $json.domain.startsWith(\\\"http\\\") ? $json.domain : \\\"http://\\\" + $json.domain }}\",\n \"type\": \"string\"\n }\n ]\n },\n \"options\": {}\n },\n \"id\": \"1f974444-da58-4a47-a9c3-ba3091fc1e96\",\n \"name\": \"Add protocool to domain (URL)\",\n \"type\": \"n8n-nodes-base.set\",\n \"typeVersion\": 3.3,\n \"position\": [\n 2620,\n 740\n ]\n },\n {\n \"parameters\": {\n \"url\": \"={{ $json.domain }}\",\n \"options\": {}\n },\n \"id\": \"31d7c7d4-8f61-402b-858d-63dd68ac69ee\",\n \"name\": \"Get website (URL)\",\n \"type\": \"n8n-nodes-base.httpRequest\",\n \"typeVersion\": 4.2,\n \"position\": [\n 2840,\n 740\n ]\n },\n {\n \"parameters\": {\n \"assignments\": {\n \"assignments\": [\n {\n \"id\": \"53c1c016-7983-4eba-a91d-da2a0523d805\",\n \"name\": \"response\",\n \"value\": \"={{ JSON.stringify($json.data) }}\",\n \"type\": \"string\"\n }\n ]\n },\n \"options\": {}\n },\n \"id\": \"f4b6df77-96be-4b12-9a8b-ae9b7009f13d\",\n \"name\": \"Set response (URL)\",\n \"type\": \"n8n-nodes-base.set\",\n \"typeVersion\": 3.3,\n \"position\": [\n 4600,\n 740\n ]\n }\n ],\n \"connections\": {\n \"Execute workflow\": {\n \"main\": [\n [\n {\n \"node\": \"Set domain (URL)\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Retrieve URLs\": {\n \"main\": [\n [\n {\n \"node\": \"Split out URLs\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Split out URLs\": {\n \"main\": [\n [\n {\n \"node\": \"Filter out empty hrefs\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Remove duplicated\": {\n \"main\": [\n [\n {\n \"node\": \"Set domain to path\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Set domain to path\": {\n \"main\": [\n [\n {\n \"node\": \"Filter out invalid URLs\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Filter out invalid URLs\": {\n \"main\": [\n [\n {\n \"node\": \"Aggregate URLs\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Aggregate URLs\": {\n \"main\": [\n [\n {\n \"node\": \"Set response (URL)\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Filter out empty hrefs\": {\n \"main\": [\n [\n {\n \"node\": \"Remove duplicated\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Set domain (URL)\": {\n \"main\": [\n [\n {\n \"node\": \"Add protocool to domain (URL)\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Add protocool to domain (URL)\": {\n \"main\": [\n [\n {\n \"node\": \"Get website (URL)\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n },\n \"Get website (URL)\": {\n \"main\": [\n [\n {\n \"node\": \"Retrieve URLs\",\n \"type\": \"main\",\n \"index\": 0\n }\n ]\n ]\n }\n },\n \"pinData\": {}\n}",
|
||
"requestOptions": {}
|
||
},
|
||
"typeVersion": 1.1
|
||
},
|
||
{
|
||
"id": "725dc9d9-dc10-4895-aedb-93ecd7494d76",
|
||
"name": "OpenAI Chat Model",
|
||
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
|
||
"position": [
|
||
1300,
|
||
980
|
||
],
|
||
"parameters": {
|
||
"model": "gpt-4o",
|
||
"options": {
|
||
"temperature": 0,
|
||
"responseFormat": "json_object"
|
||
},
|
||
"requestOptions": {}
|
||
},
|
||
"credentials": {
|
||
"openAiApi": {
|
||
"id": "Qp9mop4DylpfqiTH",
|
||
"name": "OpenAI (avirago@avirago.pl)"
|
||
}
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "2b9aa18b-e72e-486a-b307-db50e408842b",
|
||
"name": "JSON Parser",
|
||
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
|
||
"position": [
|
||
1800,
|
||
980
|
||
],
|
||
"parameters": {
|
||
"schemaType": "manual",
|
||
"inputSchema": "{\n \"type\": \"object\",\n \"properties\": {\n \"social_media\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"platform\": {\n \"type\": \"string\",\n \"description\": \"The name of the social media platform (e.g., LinkedIn, Instagram)\"\n },\n \"urls\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\",\n \"format\": \"uri\",\n \"description\": \"A URL for the social media platform\"\n }\n }\n },\n \"required\": [\"platform\", \"urls\"],\n \"additionalProperties\": false\n }\n }\n },\n \"required\": [\"platforms\"],\n \"additionalProperties\": false\n}\n",
|
||
"requestOptions": {}
|
||
},
|
||
"typeVersion": 1.2
|
||
},
|
||
{
|
||
"id": "87dcfe83-01f3-439c-8175-7da3d96391b4",
|
||
"name": "Map company name and website",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
1400,
|
||
300
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "ae484e44-36bc-4d88-9772-545e579a261c",
|
||
"name": "company_name",
|
||
"type": "string",
|
||
"value": "={{ $json.name }}"
|
||
},
|
||
{
|
||
"id": "c426ab19-649c-4443-aabb-eb0826680452",
|
||
"name": "company_website",
|
||
"type": "string",
|
||
"value": "={{ $json.website }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "a904bd16-b470-4c98-ac05-50bbc09bf24b",
|
||
"name": "Execute workflow",
|
||
"type": "n8n-nodes-base.manualTrigger",
|
||
"position": [
|
||
540,
|
||
620
|
||
],
|
||
"parameters": {},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "a9801b62-a691-457c-a52f-ac0d68c8e8b3",
|
||
"name": "Get companies",
|
||
"type": "n8n-nodes-base.supabase",
|
||
"position": [
|
||
780,
|
||
620
|
||
],
|
||
"parameters": {
|
||
"tableId": "companies_input",
|
||
"operation": "getAll"
|
||
},
|
||
"credentials": {
|
||
"supabaseApi": {
|
||
"id": "TZeFGe5qO3z7X5Zk",
|
||
"name": "Supabase (workfloows@gmail.com)"
|
||
}
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "40d8fe8a-2975-4ea5-b6ac-46e19d158eea",
|
||
"name": "Select company name and website",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
1040,
|
||
620
|
||
],
|
||
"parameters": {
|
||
"include": "selected",
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": []
|
||
},
|
||
"includeFields": "name,website",
|
||
"includeOtherFields": true
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "20aa3aea-f1f6-435c-a511-d4e8db047c6d",
|
||
"name": "Set social media array",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
1800,
|
||
720
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "a6e109b7-9333-44e8-aa13-590aeb91a56b",
|
||
"name": "social_media",
|
||
"type": "array",
|
||
"value": "={{ $json.output.social_media }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "53f64ebf-8d9f-4718-9a33-aaae06e9cf9a",
|
||
"name": "Merge all data",
|
||
"type": "n8n-nodes-base.merge",
|
||
"position": [
|
||
2040,
|
||
620
|
||
],
|
||
"parameters": {
|
||
"mode": "combine",
|
||
"options": {},
|
||
"combinationMode": "mergeByPosition"
|
||
},
|
||
"typeVersion": 2.1
|
||
},
|
||
{
|
||
"id": "e38e590e-cc1c-485f-b6c4-e7631f1c8381",
|
||
"name": "Insert new row",
|
||
"type": "n8n-nodes-base.supabase",
|
||
"position": [
|
||
2260,
|
||
620
|
||
],
|
||
"parameters": {
|
||
"tableId": "companies_output",
|
||
"dataToSend": "autoMapInputData"
|
||
},
|
||
"credentials": {
|
||
"supabaseApi": {
|
||
"id": "TZeFGe5qO3z7X5Zk",
|
||
"name": "Supabase (workfloows@gmail.com)"
|
||
}
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "aac08494-b324-4307-a5c5-5d5345cc9070",
|
||
"name": "Convert HTML to Markdown",
|
||
"type": "n8n-nodes-base.markdown",
|
||
"position": [
|
||
2100,
|
||
1314
|
||
],
|
||
"parameters": {
|
||
"html": "={{ $json.data }}",
|
||
"options": {
|
||
"ignore": "a,img"
|
||
}
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "ca6733cb-973f-4e7b-9d52-48f1af2e08e3",
|
||
"name": "Sticky Note",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
1420,
|
||
940
|
||
],
|
||
"parameters": {
|
||
"color": 5,
|
||
"width": 157.8125,
|
||
"height": 166.55000000000004,
|
||
"content": ""
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "4acd71c9-9e31-43fc-bda6-66d6a057306b",
|
||
"name": "Sticky Note1",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
1600,
|
||
940
|
||
],
|
||
"parameters": {
|
||
"color": 4,
|
||
"width": 157.8125,
|
||
"height": 166.55000000000004,
|
||
"content": ""
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "359adcd6-6bb9-4d64-8dde-6a45b0439fd6",
|
||
"name": "Sticky Note2",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
1420,
|
||
1180
|
||
],
|
||
"parameters": {
|
||
"color": 5,
|
||
"width": 1117.5005339977713,
|
||
"height": 329.45390772033636,
|
||
"content": "### Text scraper tool\nThis tool is designed to return all text from the given webpage.\n\n💡 **Consider adding proxy for better crawling accuracy.**\n"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "84133903-dcec-4c0c-8684-fdeb49f5702d",
|
||
"name": "Retrieve URLs",
|
||
"type": "n8n-nodes-base.html",
|
||
"position": [
|
||
2120,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"operation": "extractHtmlContent",
|
||
"extractionValues": {
|
||
"values": [
|
||
{
|
||
"key": "output",
|
||
"cssSelector": "a",
|
||
"returnArray": true,
|
||
"returnValue": "attribute"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 1.2
|
||
},
|
||
{
|
||
"id": "2ebffed6-5517-47ff-9fcd-5ce503aa3b63",
|
||
"name": "Split out URLs",
|
||
"type": "n8n-nodes-base.splitOut",
|
||
"position": [
|
||
2340,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"fieldToSplitOut": "output"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "215da9b2-0c0d-4d0e-b5f9-9887be75b0c4",
|
||
"name": "Remove duplicated",
|
||
"type": "n8n-nodes-base.removeDuplicates",
|
||
"position": [
|
||
2780,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"compare": "selectedFields",
|
||
"options": {},
|
||
"fieldsToCompare": "href"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "55825a1c-9351-413c-858a-c44cd3078f11",
|
||
"name": "Set domain to path",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
3000,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"include": "selected",
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "04ced063-09f0-496c-9b28-b8095f9e2297",
|
||
"name": "href",
|
||
"type": "string",
|
||
"value": "={{ $json.href.startsWith(\"/\") ? $('Add protocool to domain (URL)').item.json[\"domain\"] + $json.href : $json.href }}"
|
||
}
|
||
]
|
||
},
|
||
"includeFields": "title",
|
||
"includeOtherFields": true
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "57858d59-2727-4291-9dc6-238101de25ea",
|
||
"name": "Filter out invalid URLs",
|
||
"type": "n8n-nodes-base.filter",
|
||
"position": [
|
||
3220,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"conditions": {
|
||
"options": {
|
||
"leftValue": "",
|
||
"caseSensitive": true,
|
||
"typeValidation": "strict"
|
||
},
|
||
"combinator": "and",
|
||
"conditions": [
|
||
{
|
||
"id": "d01ea6a8-7e75-40d4-98f2-25d42b245f36",
|
||
"operator": {
|
||
"type": "boolean",
|
||
"operation": "true",
|
||
"singleValue": true
|
||
},
|
||
"leftValue": "={{ $json.href.isUrl() }}",
|
||
"rightValue": ""
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 2
|
||
},
|
||
{
|
||
"id": "0e487a35-8a6c-48f7-9048-fe66a5a346e8",
|
||
"name": "Aggregate URLs",
|
||
"type": "n8n-nodes-base.aggregate",
|
||
"position": [
|
||
3440,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"include": "specifiedFields",
|
||
"options": {},
|
||
"aggregate": "aggregateAllItemData",
|
||
"fieldsToInclude": "title,href"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "0062af28-8727-4ed4-b283-e250146c2085",
|
||
"name": "Filter out empty hrefs",
|
||
"type": "n8n-nodes-base.filter",
|
||
"position": [
|
||
2560,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"conditions": {
|
||
"options": {
|
||
"leftValue": "",
|
||
"caseSensitive": true,
|
||
"typeValidation": "strict"
|
||
},
|
||
"combinator": "and",
|
||
"conditions": [
|
||
{
|
||
"id": "5b9b7353-bd04-4af2-9480-8de135ff4223",
|
||
"operator": {
|
||
"type": "string",
|
||
"operation": "exists",
|
||
"singleValue": true
|
||
},
|
||
"leftValue": "={{ $json.href }}",
|
||
"rightValue": ""
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 2
|
||
},
|
||
{
|
||
"id": "995e04f2-f5e3-48b8-879e-913f3a9fb657",
|
||
"name": "Set domain (text)",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
1460,
|
||
1314
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "253c2b17-c749-4f0a-93e8-5ff74f1ce49b",
|
||
"name": "domain",
|
||
"type": "string",
|
||
"value": "={{ $json.query }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "c88f1008-00f8-4285-b595-a936e1f925a5",
|
||
"name": "Add protocool to domain (text)",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
1660,
|
||
1314
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "ed0f1505-82b6-4393-a0d8-088055137ec9",
|
||
"name": "domain",
|
||
"type": "string",
|
||
"value": "={{ $json.domain.startsWith(\"http\") ? $json.domain : \"http://\" + $json.domain }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "3bc68a89-8bab-423a-b4bf-4739739aeb07",
|
||
"name": "Get website (text)",
|
||
"type": "n8n-nodes-base.httpRequest",
|
||
"position": [
|
||
1880,
|
||
1314
|
||
],
|
||
"parameters": {
|
||
"url": "={{ $json.domain }}",
|
||
"options": {}
|
||
},
|
||
"typeVersion": 4.2
|
||
},
|
||
{
|
||
"id": "9d4782c3-872b-4e3c-9f8c-02cfea7a8ff2",
|
||
"name": "Set response (text)",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
2320,
|
||
1314
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "2b1c7ff8-06a7-448b-99b7-5ede4b2e0bf0",
|
||
"name": "response",
|
||
"type": "string",
|
||
"value": "={{ $json.data }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "2b6ffbd9-892d-4246-b47c-86ad51362ac9",
|
||
"name": "Set domain (URL)",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
1460,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "253c2b17-c749-4f0a-93e8-5ff74f1ce49b",
|
||
"name": "domain",
|
||
"type": "string",
|
||
"value": "={{ $json.query }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "2477677e-262e-45a3-99c3-06607b5ae270",
|
||
"name": "Get website (URL)",
|
||
"type": "n8n-nodes-base.httpRequest",
|
||
"position": [
|
||
1900,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"url": "={{ $json.domain }}",
|
||
"options": {}
|
||
},
|
||
"typeVersion": 4.2
|
||
},
|
||
{
|
||
"id": "4f84eb31-7ad4-4b10-8043-b474fc7f367a",
|
||
"name": "Set response (URL)",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
3660,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "53c1c016-7983-4eba-a91d-da2a0523d805",
|
||
"name": "response",
|
||
"type": "string",
|
||
"value": "={{ JSON.stringify($json.data) }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "2d2288dd-2ab5-41a1-984c-ff7c5bbab8d1",
|
||
"name": "Sticky Note3",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
1420,
|
||
1560
|
||
],
|
||
"parameters": {
|
||
"color": 4,
|
||
"width": 2467.2678721043376,
|
||
"height": 328.79842054012374,
|
||
"content": "### URL scraper tool\nThis tool is designed to return all links (URLs) from the given webpage.\n\n💡 **Consider adding proxy for better crawling accuracy.**"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "61c1b30f-38e5-44a5-a8be-edd4df1b13e5",
|
||
"name": "Sticky Note4",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
720,
|
||
400
|
||
],
|
||
"parameters": {
|
||
"width": 221.7729148148145,
|
||
"height": 400.16865185185225,
|
||
"content": "### Get companies from database\nRetrieve names and websites of companies from Supabase table to process crawling.\n\n💡 **You can replace Supabase with other database of your choice.**"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "b6c6643a-4450-4576-b9c3-e28bc9ebed5d",
|
||
"name": "Sticky Note5",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
980,
|
||
429.32034814814835
|
||
],
|
||
"parameters": {
|
||
"width": 221.7729148148145,
|
||
"height": 370.14757037037066,
|
||
"content": "### Set parameters for execution\nPass only `name` and `website` values from database. \n\n⚠️ **If you use other field namings, update this node.**"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "52196e71-c2c2-4ec9-91ab-f7ebc9874d6c",
|
||
"name": "Sticky Note6",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
1360,
|
||
536.6201859111013
|
||
],
|
||
"parameters": {
|
||
"width": 339.7128777777775,
|
||
"height": 328.4957622370491,
|
||
"content": "### Crawling agent (retrieve social media profile links)\nCrawl website to extract social media profile links and return them in unified JSON format.\n\n💡 **You can change type of retrieved data by editing prompt and parser schema.**"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "ea11931b-c1c7-43c4-a728-f10479863e38",
|
||
"name": "Sticky Note7",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
2200,
|
||
435.3819888888892
|
||
],
|
||
"parameters": {
|
||
"width": 221.7729148148145,
|
||
"height": 364.786662962963,
|
||
"content": "### Insert data to database\nAdd new rows in database table with extracted data.\n\n💡 **You can replace Supabase with other database of your choice.**"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "bc3d3337-a5b9-45ec-bb73-810cea9c0e73",
|
||
"name": "Add protocool to domain (URL)",
|
||
"type": "n8n-nodes-base.set",
|
||
"position": [
|
||
1680,
|
||
1700
|
||
],
|
||
"parameters": {
|
||
"options": {},
|
||
"assignments": {
|
||
"assignments": [
|
||
{
|
||
"id": "ed0f1505-82b6-4393-a0d8-088055137ec9",
|
||
"name": "domain",
|
||
"type": "string",
|
||
"value": "={{ $json.domain.startsWith(\"http\") ? $json.domain : \"http://\" + $json.domain }}"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"typeVersion": 3.3
|
||
},
|
||
{
|
||
"id": "db91703c-0133-4030-a9b5-fc3ab4331784",
|
||
"name": "Sticky Note8",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
0,
|
||
660
|
||
],
|
||
"parameters": {
|
||
"color": 3,
|
||
"width": 369.60264559047334,
|
||
"height": 256.26672065702303,
|
||
"content": "## ⚠️ Note\n\n1. Complete video guide for this workflow is available [on my YouTube](https://youtu.be/2W09puFZwtY). \n2. Remember to add your credentials and configure nodes.\n3. If you like this workflow, please subscribe to [my YouTube channel](https://www.youtube.com/@workfloows) and/or [my newsletter](https://workfloows.com/).\n\n**Thank you for your support!**"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "54530733-f8dc-44c7-a645-6f279e9a2c21",
|
||
"name": "Sticky Note9",
|
||
"type": "n8n-nodes-base.stickyNote",
|
||
"position": [
|
||
0,
|
||
420
|
||
],
|
||
"parameters": {
|
||
"color": 7,
|
||
"width": 369.93062670813185,
|
||
"height": 212.09880341753203,
|
||
"content": "## Autonomous AI crawler\nThis workflow autonomously navigates through given websites and retrieves social media profile links. \n\n💡 **You can modify this workflow to retrieve other type of data (e.g. contact details or company profile summary).**"
|
||
},
|
||
"typeVersion": 1
|
||
},
|
||
{
|
||
"id": "b43aee3c-47b5-47fd-89c4-7d213b26b4ca",
|
||
"name": "Crawl website",
|
||
"type": "@n8n/n8n-nodes-langchain.agent",
|
||
"position": [
|
||
1400,
|
||
720
|
||
],
|
||
"parameters": {
|
||
"text": "=Retrieve social media profile URLs from this website: {{ $json.website }}",
|
||
"options": {
|
||
"systemMessage": "You are an automated web crawler tasked with extracting social media URLs from a webpage provided by the user. You have access to a text retrieval tool to gather all text content from the page and a URL retrieval tool to identify and navigate through links on the page. Utilize the URLs retrieved to crawl additional pages. Your objective is to provide a unified JSON output containing the extracted data (links to all possible social media profiles from the website)."
|
||
},
|
||
"promptType": "define",
|
||
"hasOutputParser": true
|
||
},
|
||
"retryOnFail": true,
|
||
"typeVersion": 1.6
|
||
}
|
||
],
|
||
"pinData": {
|
||
"Get companies": [
|
||
{
|
||
"id": 1,
|
||
"name": "n8n",
|
||
"website": "https://n8n.io"
|
||
}
|
||
]
|
||
},
|
||
"connections": {
|
||
"Text": {
|
||
"ai_tool": [
|
||
[
|
||
{
|
||
"node": "Crawl website",
|
||
"type": "ai_tool",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"URLs": {
|
||
"ai_tool": [
|
||
[
|
||
{
|
||
"node": "Crawl website",
|
||
"type": "ai_tool",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"JSON Parser": {
|
||
"ai_outputParser": [
|
||
[
|
||
{
|
||
"node": "Crawl website",
|
||
"type": "ai_outputParser",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Crawl website": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Set social media array",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Get companies": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Select company name and website",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Retrieve URLs": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Split out URLs",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Aggregate URLs": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Set response (URL)",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Merge all data": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Insert new row",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Split out URLs": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Filter out empty hrefs",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Execute workflow": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Get companies",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Set domain (URL)": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Add protocool to domain (URL)",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Get website (URL)": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Retrieve URLs",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"OpenAI Chat Model": {
|
||
"ai_languageModel": [
|
||
[
|
||
{
|
||
"node": "Crawl website",
|
||
"type": "ai_languageModel",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Remove duplicated": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Set domain to path",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Set domain (text)": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Add protocool to domain (text)",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Get website (text)": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Convert HTML to Markdown",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Set domain to path": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Filter out invalid URLs",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Filter out empty hrefs": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Remove duplicated",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Set social media array": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Merge all data",
|
||
"type": "main",
|
||
"index": 1
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Filter out invalid URLs": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Aggregate URLs",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Convert HTML to Markdown": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Set response (text)",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Map company name and website": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Merge all data",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Add protocool to domain (URL)": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Get website (URL)",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Add protocool to domain (text)": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Get website (text)",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
},
|
||
"Select company name and website": {
|
||
"main": [
|
||
[
|
||
{
|
||
"node": "Crawl website",
|
||
"type": "main",
|
||
"index": 0
|
||
},
|
||
{
|
||
"node": "Map company name and website",
|
||
"type": "main",
|
||
"index": 0
|
||
}
|
||
]
|
||
]
|
||
}
|
||
}
|
||
} |