n8n-workflows/workflows/14_extract_swifts.json
2025-05-14 11:58:29 +03:00

654 lines
14 KiB
JSON

{
"id": "14",
"name": "extract_swifts",
"nodes": [
{
"name": "On clicking 'execute'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-140,
820
],
"parameters": {},
"typeVersion": 1
},
{
"name": "HTTP Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
320,
820
],
"parameters": {
"url": "https://www.theswiftcodes.com/browse-by-country/",
"options": {},
"responseFormat": "string"
},
"typeVersion": 1
},
{
"name": "HTML Extract",
"type": "n8n-nodes-base.htmlExtract",
"position": [
510,
820
],
"parameters": {
"options": {},
"extractionValues": {
"values": [
{
"key": "countries",
"attribute": "href",
"cssSelector": "ol > li > a",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1
},
{
"name": "SplitInBatches",
"type": "n8n-nodes-base.splitInBatches",
"position": [
910,
820
],
"parameters": {
"options": {
"reset": false
},
"batchSize": 1
},
"typeVersion": 1
},
{
"name": "HTTP Request1",
"type": "n8n-nodes-base.httpRequest",
"position": [
2250,
740
],
"parameters": {
"url": "={{$node[\"Set\"].json[\"url\"]}}",
"options": {},
"responseFormat": "file"
},
"typeVersion": 1
},
{
"name": "HTML Extract1",
"type": "n8n-nodes-base.htmlExtract",
"position": [
2750,
590
],
"parameters": {
"options": {},
"sourceData": "binary",
"extractionValues": {
"values": [
{
"key": "next_button",
"attribute": "href",
"cssSelector": "span.next > a",
"returnValue": "attribute"
},
{
"key": "names",
"cssSelector": "td.table-name",
"returnArray": true
},
{
"key": "swifts",
"cssSelector": "td.table-swift",
"returnArray": true
},
{
"key": "cities",
"cssSelector": "td.table-city",
"returnArray": true
},
{
"key": "branches",
"cssSelector": "td.table-branch",
"returnArray": true
}
]
}
},
"typeVersion": 1
},
{
"name": "MongoDB1",
"type": "n8n-nodes-base.mongoDb",
"position": [
3280,
590
],
"parameters": {
"fields": "iso_code,country,page,name,branch,city,swift_code,createdAt,updatedAt",
"options": {
"dateFields": "createdAt,updatedAt"
},
"operation": "insert",
"collection": "swifts.meetup"
},
"credentials": {
"mongoDb": "db-mongo"
},
"typeVersion": 1
},
{
"name": "uProc",
"type": "n8n-nodes-base.uproc",
"position": [
1100,
820
],
"parameters": {
"tool": "getCountryNormalized",
"group": "geographic",
"country": "={{$node[\"SplitInBatches\"].json[\"country\"].replace(/[\\/0-9]/g, \"\")}}",
"additionalOptions": {}
},
"credentials": {
"uprocApi": "uproc-miquel"
},
"typeVersion": 1
},
{
"name": "Prepare Documents",
"type": "n8n-nodes-base.function",
"position": [
2930,
590
],
"parameters": {
"functionCode": "var newItems = [];\n\nfor (i = 0; i < items[0].json.swifts.length; i++) {\n var item = {\n iso_code: $node['uProc'].json.message.code,\n country: $node['SplitInBatches'].json.country.replace(/[-\\/0-9]/g, \"\"),\n page: $node['Set Page to Scrape'].json.page,\n name: items[0].json.names[i],\n city: items[0].json.cities[i],\n branch: items[0].json.branches[i],\n swift_code: items[0].json.swifts[i],\n createdAt: new Date(),\n updatedAt: new Date()\n }\n newItems.push({json: item});\n}\n\nreturn newItems;\n\n"
},
"typeVersion": 1
},
{
"name": "More Countries",
"type": "n8n-nodes-base.if",
"position": [
2810,
1100
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$node[\"SplitInBatches\"].context[\"noItemsLeft\"] + \"\"}}",
"value2": "true"
}
]
}
},
"typeVersion": 1
},
{
"name": "Set Page to Scrape",
"type": "n8n-nodes-base.functionItem",
"position": [
1290,
680
],
"parameters": {
"functionCode": "const staticData = getWorkflowStaticData('global');\n\nitem.page = \"\";\nif (staticData.page && staticData.page.length) {\n item.page = staticData.page;\n} else {\n item.page = $node['SplitInBatches'].json.country;\n}\nreturn item;\n"
},
"typeVersion": 1
},
{
"name": "More Pages",
"type": "n8n-nodes-base.if",
"position": [
3070,
1020
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$json[\"more_pages\"] + \"\"}}",
"value2": "true"
}
]
}
},
"typeVersion": 1
},
{
"name": "Set More Pages",
"type": "n8n-nodes-base.function",
"position": [
3470,
590
],
"parameters": {
"functionCode": "var next_page = $node['HTML Extract1'].json.next_button && $node['HTML Extract1'].json.next_button.length ? $node['HTML Extract1'].json.next_button : \"\";\nvar more_pages = next_page.length > 0;\nconst staticData = getWorkflowStaticData('global');\n\n//all current items are after date: needs pagination\nif (more_pages) {\n staticData.page = next_page;\n} else {\n //don't check more items in previous pages;\n delete staticData.page;\n}\n\nreturn [\n {\n json: {\n more_pages: more_pages\n }\n }\n];\n"
},
"typeVersion": 1
},
{
"name": "Set",
"type": "n8n-nodes-base.set",
"position": [
1440,
680
],
"parameters": {
"values": {
"string": [
{
"name": "url",
"value": "=https://www.theswiftcodes.com{{$node[\"Set Page to Scrape\"].json[\"page\"]}}"
}
]
},
"options": {}
},
"typeVersion": 1
},
{
"name": "Generate filename",
"type": "n8n-nodes-base.functionItem",
"position": [
1600,
610
],
"parameters": {
"functionCode": "var generateNameFromUrl = function(url){\n return url.replace(/[^a-z0-9]/gi, \"_\");\n}\n\nitem.file = generateNameFromUrl(item.url) + \".html\"\nreturn item;"
},
"typeVersion": 1
},
{
"name": "Read Binary File",
"type": "n8n-nodes-base.readBinaryFile",
"position": [
1770,
610
],
"parameters": {
"filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true
},
{
"name": "File exists?",
"type": "n8n-nodes-base.if",
"position": [
1950,
610
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$node[\"Read Binary File\"].binary.data.mimeType}}",
"value2": "text/html"
}
]
}
},
"typeVersion": 1
},
{
"name": "Write Binary File",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
2400,
740
],
"parameters": {
"fileName": "=/home/node/.cache/scrapper/{{$node[\"Generate filename\"].json[\"file\"]}}",
"dataPropertyName": "=data"
},
"typeVersion": 1
},
{
"name": "Read Binary File1",
"type": "n8n-nodes-base.readBinaryFile",
"position": [
2570,
590
],
"parameters": {
"filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true
},
{
"name": "Wait",
"type": "n8n-nodes-base.function",
"position": [
2090,
740
],
"parameters": {
"functionCode": "const waitTimeSeconds = 1;\n\nreturn new Promise((resolve) => {\n setTimeout(() => {\n resolve([]);\n }, waitTimeSeconds * 1000);\n});\n"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true
},
{
"name": "Prepare countries",
"type": "n8n-nodes-base.function",
"position": [
700,
820
],
"parameters": {
"functionCode": "return items[0].json.countries.map(function(country) {\n return {\n json: {country: country}\n }\n});"
},
"typeVersion": 1
},
{
"name": "Create Directory",
"type": "n8n-nodes-base.executeCommand",
"position": [
70,
820
],
"parameters": {
"command": "mkdir -p /home/node/.cache/scrapper/"
},
"typeVersion": 1,
"continueOnFail": true
},
{
"name": "MongoDB",
"type": "n8n-nodes-base.mongoDb",
"disabled": true,
"position": [
3100,
520
],
"parameters": {
"query": "={\"swift_code\": \"{{$json[\"swift_code\"]}}\"}",
"options": {},
"collection": "swifts.meetup"
},
"credentials": {
"mongoDb": "db-mongo"
},
"executeOnce": false,
"typeVersion": 1,
"alwaysOutputData": true
}
],
"active": false,
"settings": {},
"connections": {
"Set": {
"main": [
[
{
"node": "Generate filename",
"type": "main",
"index": 0
}
]
]
},
"Wait": {
"main": [
[
{
"node": "HTTP Request1",
"type": "main",
"index": 0
}
]
]
},
"uProc": {
"main": [
[
{
"node": "Set Page to Scrape",
"type": "main",
"index": 0
}
]
]
},
"MongoDB": {
"main": [
[]
]
},
"MongoDB1": {
"main": [
[
{
"node": "Set More Pages",
"type": "main",
"index": 0
}
]
]
},
"More Pages": {
"main": [
[
{
"node": "Set Page to Scrape",
"type": "main",
"index": 0
}
],
[
{
"node": "More Countries",
"type": "main",
"index": 0
}
]
]
},
"File exists?": {
"main": [
[
{
"node": "Read Binary File1",
"type": "main",
"index": 0
}
],
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
},
"HTML Extract": {
"main": [
[
{
"node": "Prepare countries",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "HTML Extract",
"type": "main",
"index": 0
}
]
]
},
"HTML Extract1": {
"main": [
[
{
"node": "Prepare Documents",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request1": {
"main": [
[
{
"node": "Write Binary File",
"type": "main",
"index": 0
}
]
]
},
"More Countries": {
"main": [
[],
[
{
"node": "SplitInBatches",
"type": "main",
"index": 0
}
]
]
},
"Set More Pages": {
"main": [
[
{
"node": "More Pages",
"type": "main",
"index": 0
}
]
]
},
"SplitInBatches": {
"main": [
[
{
"node": "uProc",
"type": "main",
"index": 0
}
]
]
},
"Create Directory": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File": {
"main": [
[
{
"node": "File exists?",
"type": "main",
"index": 0
}
]
]
},
"Generate filename": {
"main": [
[
{
"node": "Read Binary File",
"type": "main",
"index": 0
}
]
]
},
"Prepare Documents": {
"main": [
[
{
"node": "MongoDB1",
"type": "main",
"index": 0
}
]
]
},
"Prepare countries": {
"main": [
[
{
"node": "SplitInBatches",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File1": {
"main": [
[
{
"node": "HTML Extract1",
"type": "main",
"index": 0
}
]
]
},
"Write Binary File": {
"main": [
[
{
"node": "Read Binary File1",
"type": "main",
"index": 0
}
]
]
},
"Set Page to Scrape": {
"main": [
[
{
"node": "Set",
"type": "main",
"index": 0
}
]
]
},
"On clicking 'execute'": {
"main": [
[
{
"node": "Create Directory",
"type": "main",
"index": 0
}
]
]
}
}
}