{ "id": "14", "name": "extract_swifts", "nodes": [ { "name": "On clicking 'execute'", "type": "n8n-nodes-base.manualTrigger", "position": [ -140, 820 ], "parameters": {}, "typeVersion": 1 }, { "name": "HTTP Request", "type": "n8n-nodes-base.httpRequest", "position": [ 320, 820 ], "parameters": { "url": "https://www.theswiftcodes.com/browse-by-country/", "options": {}, "responseFormat": "string" }, "typeVersion": 1 }, { "name": "HTML Extract", "type": "n8n-nodes-base.htmlExtract", "position": [ 510, 820 ], "parameters": { "options": {}, "extractionValues": { "values": [ { "key": "countries", "attribute": "href", "cssSelector": "ol > li > a", "returnArray": true, "returnValue": "attribute" } ] } }, "typeVersion": 1 }, { "name": "SplitInBatches", "type": "n8n-nodes-base.splitInBatches", "position": [ 910, 820 ], "parameters": { "options": { "reset": false }, "batchSize": 1 }, "typeVersion": 1 }, { "name": "HTTP Request1", "type": "n8n-nodes-base.httpRequest", "position": [ 2250, 740 ], "parameters": { "url": "={{$node[\"Set\"].json[\"url\"]}}", "options": {}, "responseFormat": "file" }, "typeVersion": 1 }, { "name": "HTML Extract1", "type": "n8n-nodes-base.htmlExtract", "position": [ 2750, 590 ], "parameters": { "options": {}, "sourceData": "binary", "extractionValues": { "values": [ { "key": "next_button", "attribute": "href", "cssSelector": "span.next > a", "returnValue": "attribute" }, { "key": "names", "cssSelector": "td.table-name", "returnArray": true }, { "key": "swifts", "cssSelector": "td.table-swift", "returnArray": true }, { "key": "cities", "cssSelector": "td.table-city", "returnArray": true }, { "key": "branches", "cssSelector": "td.table-branch", "returnArray": true } ] } }, "typeVersion": 1 }, { "name": "MongoDB1", "type": "n8n-nodes-base.mongoDb", "position": [ 3280, 590 ], "parameters": { "fields": "iso_code,country,page,name,branch,city,swift_code,createdAt,updatedAt", "options": { "dateFields": "createdAt,updatedAt" }, "operation": "insert", "collection": "swifts.meetup" }, "credentials": { "mongoDb": "db-mongo" }, "typeVersion": 1 }, { "name": "uProc", "type": "n8n-nodes-base.uproc", "position": [ 1100, 820 ], "parameters": { "tool": "getCountryNormalized", "group": "geographic", "country": "={{$node[\"SplitInBatches\"].json[\"country\"].replace(/[\\/0-9]/g, \"\")}}", "additionalOptions": {} }, "credentials": { "uprocApi": "uproc-miquel" }, "typeVersion": 1 }, { "name": "Prepare Documents", "type": "n8n-nodes-base.function", "position": [ 2930, 590 ], "parameters": { "functionCode": "var newItems = [];\n\nfor (i = 0; i < items[0].json.swifts.length; i++) {\n var item = {\n iso_code: $node['uProc'].json.message.code,\n country: $node['SplitInBatches'].json.country.replace(/[-\\/0-9]/g, \"\"),\n page: $node['Set Page to Scrape'].json.page,\n name: items[0].json.names[i],\n city: items[0].json.cities[i],\n branch: items[0].json.branches[i],\n swift_code: items[0].json.swifts[i],\n createdAt: new Date(),\n updatedAt: new Date()\n }\n newItems.push({json: item});\n}\n\nreturn newItems;\n\n" }, "typeVersion": 1 }, { "name": "More Countries", "type": "n8n-nodes-base.if", "position": [ 2810, 1100 ], "parameters": { "conditions": { "string": [ { "value1": "={{$node[\"SplitInBatches\"].context[\"noItemsLeft\"] + \"\"}}", "value2": "true" } ] } }, "typeVersion": 1 }, { "name": "Set Page to Scrape", "type": "n8n-nodes-base.functionItem", "position": [ 1290, 680 ], "parameters": { "functionCode": "const staticData = getWorkflowStaticData('global');\n\nitem.page = \"\";\nif (staticData.page && staticData.page.length) {\n item.page = staticData.page;\n} else {\n item.page = $node['SplitInBatches'].json.country;\n}\nreturn item;\n" }, "typeVersion": 1 }, { "name": "More Pages", "type": "n8n-nodes-base.if", "position": [ 3070, 1020 ], "parameters": { "conditions": { "string": [ { "value1": "={{$json[\"more_pages\"] + \"\"}}", "value2": "true" } ] } }, "typeVersion": 1 }, { "name": "Set More Pages", "type": "n8n-nodes-base.function", "position": [ 3470, 590 ], "parameters": { "functionCode": "var next_page = $node['HTML Extract1'].json.next_button && $node['HTML Extract1'].json.next_button.length ? $node['HTML Extract1'].json.next_button : \"\";\nvar more_pages = next_page.length > 0;\nconst staticData = getWorkflowStaticData('global');\n\n//all current items are after date: needs pagination\nif (more_pages) {\n staticData.page = next_page;\n} else {\n //don't check more items in previous pages;\n delete staticData.page;\n}\n\nreturn [\n {\n json: {\n more_pages: more_pages\n }\n }\n];\n" }, "typeVersion": 1 }, { "name": "Set", "type": "n8n-nodes-base.set", "position": [ 1440, 680 ], "parameters": { "values": { "string": [ { "name": "url", "value": "=https://www.theswiftcodes.com{{$node[\"Set Page to Scrape\"].json[\"page\"]}}" } ] }, "options": {} }, "typeVersion": 1 }, { "name": "Generate filename", "type": "n8n-nodes-base.functionItem", "position": [ 1600, 610 ], "parameters": { "functionCode": "var generateNameFromUrl = function(url){\n return url.replace(/[^a-z0-9]/gi, \"_\");\n}\n\nitem.file = generateNameFromUrl(item.url) + \".html\"\nreturn item;" }, "typeVersion": 1 }, { "name": "Read Binary File", "type": "n8n-nodes-base.readBinaryFile", "position": [ 1770, 610 ], "parameters": { "filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}" }, "typeVersion": 1, "continueOnFail": true, "alwaysOutputData": true }, { "name": "File exists?", "type": "n8n-nodes-base.if", "position": [ 1950, 610 ], "parameters": { "conditions": { "string": [ { "value1": "={{$node[\"Read Binary File\"].binary.data.mimeType}}", "value2": "text/html" } ] } }, "typeVersion": 1 }, { "name": "Write Binary File", "type": "n8n-nodes-base.writeBinaryFile", "position": [ 2400, 740 ], "parameters": { "fileName": "=/home/node/.cache/scrapper/{{$node[\"Generate filename\"].json[\"file\"]}}", "dataPropertyName": "=data" }, "typeVersion": 1 }, { "name": "Read Binary File1", "type": "n8n-nodes-base.readBinaryFile", "position": [ 2570, 590 ], "parameters": { "filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}" }, "typeVersion": 1, "continueOnFail": true, "alwaysOutputData": true }, { "name": "Wait", "type": "n8n-nodes-base.function", "position": [ 2090, 740 ], "parameters": { "functionCode": "const waitTimeSeconds = 1;\n\nreturn new Promise((resolve) => {\n setTimeout(() => {\n resolve([]);\n }, waitTimeSeconds * 1000);\n});\n" }, "typeVersion": 1, "continueOnFail": true, "alwaysOutputData": true }, { "name": "Prepare countries", "type": "n8n-nodes-base.function", "position": [ 700, 820 ], "parameters": { "functionCode": "return items[0].json.countries.map(function(country) {\n return {\n json: {country: country}\n }\n});" }, "typeVersion": 1 }, { "name": "Create Directory", "type": "n8n-nodes-base.executeCommand", "position": [ 70, 820 ], "parameters": { "command": "mkdir -p /home/node/.cache/scrapper/" }, "typeVersion": 1, "continueOnFail": true }, { "name": "MongoDB", "type": "n8n-nodes-base.mongoDb", "disabled": true, "position": [ 3100, 520 ], "parameters": { "query": "={\"swift_code\": \"{{$json[\"swift_code\"]}}\"}", "options": {}, "collection": "swifts.meetup" }, "credentials": { "mongoDb": "db-mongo" }, "executeOnce": false, "typeVersion": 1, "alwaysOutputData": true } ], "active": false, "settings": {}, "connections": { "Set": { "main": [ [ { "node": "Generate filename", "type": "main", "index": 0 } ] ] }, "Wait": { "main": [ [ { "node": "HTTP Request1", "type": "main", "index": 0 } ] ] }, "uProc": { "main": [ [ { "node": "Set Page to Scrape", "type": "main", "index": 0 } ] ] }, "MongoDB": { "main": [ [] ] }, "MongoDB1": { "main": [ [ { "node": "Set More Pages", "type": "main", "index": 0 } ] ] }, "More Pages": { "main": [ [ { "node": "Set Page to Scrape", "type": "main", "index": 0 } ], [ { "node": "More Countries", "type": "main", "index": 0 } ] ] }, "File exists?": { "main": [ [ { "node": "Read Binary File1", "type": "main", "index": 0 } ], [ { "node": "Wait", "type": "main", "index": 0 } ] ] }, "HTML Extract": { "main": [ [ { "node": "Prepare countries", "type": "main", "index": 0 } ] ] }, "HTTP Request": { "main": [ [ { "node": "HTML Extract", "type": "main", "index": 0 } ] ] }, "HTML Extract1": { "main": [ [ { "node": "Prepare Documents", "type": "main", "index": 0 } ] ] }, "HTTP Request1": { "main": [ [ { "node": "Write Binary File", "type": "main", "index": 0 } ] ] }, "More Countries": { "main": [ [], [ { "node": "SplitInBatches", "type": "main", "index": 0 } ] ] }, "Set More Pages": { "main": [ [ { "node": "More Pages", "type": "main", "index": 0 } ] ] }, "SplitInBatches": { "main": [ [ { "node": "uProc", "type": "main", "index": 0 } ] ] }, "Create Directory": { "main": [ [ { "node": "HTTP Request", "type": "main", "index": 0 } ] ] }, "Read Binary File": { "main": [ [ { "node": "File exists?", "type": "main", "index": 0 } ] ] }, "Generate filename": { "main": [ [ { "node": "Read Binary File", "type": "main", "index": 0 } ] ] }, "Prepare Documents": { "main": [ [ { "node": "MongoDB1", "type": "main", "index": 0 } ] ] }, "Prepare countries": { "main": [ [ { "node": "SplitInBatches", "type": "main", "index": 0 } ] ] }, "Read Binary File1": { "main": [ [ { "node": "HTML Extract1", "type": "main", "index": 0 } ] ] }, "Write Binary File": { "main": [ [ { "node": "Read Binary File1", "type": "main", "index": 0 } ] ] }, "Set Page to Scrape": { "main": [ [ { "node": "Set", "type": "main", "index": 0 } ] ] }, "On clicking 'execute'": { "main": [ [ { "node": "Create Directory", "type": "main", "index": 0 } ] ] } } }