{ "id": "xEij0kj2I1DHbL3I", "meta": { "instanceId": "31e69f7f4a77bf465b805824e303232f0227212ae922d12133a0f96ffeab4fef", "templateCredsSetupCompleted": true }, "name": "💡🌐 Essential Multipage Website Scraper with Jina.ai", "tags": [], "nodes": [ { "id": "3a503859-ef0a-492d-81c6-37e4f0c4c25e", "name": "Sticky Note", "type": "n8n-nodes-base.stickyNote", "position": [ -840, 0 ], "parameters": { "color": 3, "width": 340, "height": 320, "content": "## Jina.ai Web Scraper\n### No API Key Required\n" }, "typeVersion": 1 }, { "id": "c5217a1a-f074-409b-8340-72afdc5fc8b5", "name": "When clicking ‘Test workflow’", "type": "n8n-nodes-base.manualTrigger", "position": [ -1500, -300 ], "parameters": {}, "typeVersion": 1 }, { "id": "72af3b00-2632-4877-a0b6-7477e2f468f7", "name": "Loop Over Items", "type": "n8n-nodes-base.splitInBatches", "position": [ -1080, 20 ], "parameters": { "options": {} }, "typeVersion": 3 }, { "id": "11f0fa02-51f8-41cc-b789-5c452b6899aa", "name": "Wait", "type": "n8n-nodes-base.wait", "position": [ 80, 220 ], "webhookId": "081ce124-0cbf-4a21-a1e7-2c465f460448", "parameters": {}, "typeVersion": 1.1 }, { "id": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb", "name": "Limit", "type": "n8n-nodes-base.limit", "position": [ 80, -300 ], "parameters": { "maxItems": 20 }, "typeVersion": 1 }, { "id": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a", "name": "Get List of Website URLs", "type": "n8n-nodes-base.httpRequest", "position": [ -780, -300 ], "parameters": { "url": "={{ $json.sitemap_url }}", "options": {} }, "typeVersion": 4.2 }, { "id": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55", "name": "Convert to JSON", "type": "n8n-nodes-base.xml", "position": [ -560, -300 ], "parameters": { "options": {} }, "typeVersion": 1 }, { "id": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966", "name": "Create List of Website URLs", "type": "n8n-nodes-base.splitOut", "position": [ -340, -300 ], "parameters": { "options": {}, "fieldToSplitOut": "urlset.url" }, "typeVersion": 1 }, { "id": "61555239-8a16-424e-8a60-700f6ebaa270", "name": "Filter By Topics or Pages", "type": "n8n-nodes-base.filter", "position": [ -120, -300 ], "parameters": { "options": {}, "conditions": { "options": { "version": 2, "leftValue": "", "caseSensitive": true, "typeValidation": "strict" }, "combinator": "or", "conditions": [ { "id": "d66c304d-879a-4dc4-908f-ab0665093672", "operator": { "name": "filter.operator.equals", "type": "string", "operation": "equals" }, "leftValue": "={{ $json.loc }}", "rightValue": "=https://ai.pydantic.dev/" }, { "id": "3c930950-bee4-442b-82e6-4437fd39a933", "operator": { "type": "string", "operation": "contains" }, "leftValue": "={{ $json.loc.toLowerCase() }}", "rightValue": "agent" }, { "id": "aaeaf34e-ad5a-4673-b3bd-8bddf3500988", "operator": { "type": "string", "operation": "contains" }, "leftValue": "={{ $json.loc.toLowerCase() }}", "rightValue": "tool" } ] } }, "typeVersion": 2.2 }, { "id": "dd25fb57-64a3-4c47-be04-6eb66d16520a", "name": "Set Website URL", "type": "n8n-nodes-base.set", "position": [ -1080, -300 ], "parameters": { "options": {}, "assignments": { "assignments": [ { "id": "1601dc3e-8024-4e19-b592-93a4e4f77641", "name": "sitemap_url", "type": "string", "value": "https://ai.pydantic.dev/sitemap.xml" } ] } }, "typeVersion": 3.4 }, { "id": "14ac1c87-29fe-44c8-9c1e-f247a292dde5", "name": "Jina.ai Web Scraper", "type": "n8n-nodes-base.httpRequest", "position": [ -720, 120 ], "parameters": { "url": "=https://r.jina.ai/{{ $json.loc }}", "options": {} }, "typeVersion": 4.2 }, { "id": "be253ec2-f088-4895-8ef2-61a3720cf68b", "name": "Save Webpage Contents to Google Drive", "type": "n8n-nodes-base.googleDrive", "position": [ -120, 120 ], "parameters": { "name": "={{ $('Loop Over Items').item.json.loc }} - {{ $json.title }}", "content": "={{ $json.markdown }}", "driveId": { "__rl": true, "mode": "list", "value": "My Drive" }, "options": {}, "folderId": { "__rl": true, "mode": "list", "value": "root", "cachedResultName": "/ (Root folder)" }, "operation": "createFromText" }, "credentials": { "googleDriveOAuth2Api": { "id": "UhdXGYLTAJbsa0xX", "name": "Google Drive account" } }, "typeVersion": 3 }, { "id": "95d808c7-a3ca-4f59-a385-cc77bdff322e", "name": "Extract Title & Markdown Content", "type": "n8n-nodes-base.code", "position": [ -380, 120 ], "parameters": { "jsCode": "// Get the text output from the previous node\nconst data = $input.first().json.data;\n\n// Regular expression to capture the title line\nconst titleRegex = /^Title:\\s*(.+)$/m;\n// Regular expression to capture everything after \"Markdown Content:\"\nconst markdownRegex = /Markdown Content:\\n([\\s\\S]+)/;\n\n// Extract the title using the first capture group\nconst titleMatch = data.match(titleRegex);\nconst title = titleMatch ? titleMatch[1].trim() : '';\n\n// Extract the markdown content using the first capture group\nconst markdownMatch = data.match(markdownRegex);\nconst markdown = markdownMatch ? markdownMatch[1].trim() : '';\n\n// Return a single object with title and markdown as unique values\nreturn { title, markdown };" }, "typeVersion": 2 }, { "id": "2fb86c81-c144-4450-908c-559855deadef", "name": "Sticky Note1", "type": "n8n-nodes-base.stickyNote", "position": [ -1240, -580 ], "parameters": { "color": 7, "width": 1540, "height": 1080, "content": "# 💡🌐 Essential Multipage Website Scraper with Jina.ai\n## Scrape entire websites with this workflow\n**Use responsibly and follow local rules and regulations**" }, "typeVersion": 1 }, { "id": "b470b294-95d0-4e51-a9cc-2fe17316a771", "name": "Sticky Note2", "type": "n8n-nodes-base.stickyNote", "position": [ -1580, -400 ], "parameters": { "color": 4, "width": 280, "height": 300, "content": "## 👍Try Me!" }, "typeVersion": 1 }, { "id": "fafd0623-a423-4e73-9609-cee8e81f5c13", "name": "Sticky Note3", "type": "n8n-nodes-base.stickyNote", "position": [ -1180, -400 ], "parameters": { "width": 300, "height": 300, "content": "## 👇Add Website Sitemap URL" }, "typeVersion": 1 } ], "active": false, "pinData": {}, "settings": { "executionOrder": "v1" }, "versionId": "2e815787-d83b-4ab7-a959-2f33006a37a5", "connections": { "Wait": { "main": [ [ { "node": "Loop Over Items", "type": "main", "index": 0 } ] ] }, "Limit": { "main": [ [ { "node": "Loop Over Items", "type": "main", "index": 0 } ] ] }, "Convert to JSON": { "main": [ [ { "node": "Create List of Website URLs", "type": "main", "index": 0 } ] ] }, "Loop Over Items": { "main": [ [], [ { "node": "Jina.ai Web Scraper", "type": "main", "index": 0 } ] ] }, "Set Website URL": { "main": [ [ { "node": "Get List of Website URLs", "type": "main", "index": 0 } ] ] }, "Jina.ai Web Scraper": { "main": [ [ { "node": "Extract Title & Markdown Content", "type": "main", "index": 0 } ] ] }, "Get List of Website URLs": { "main": [ [ { "node": "Convert to JSON", "type": "main", "index": 0 } ] ] }, "Filter By Topics or Pages": { "main": [ [ { "node": "Limit", "type": "main", "index": 0 } ] ] }, "Create List of Website URLs": { "main": [ [ { "node": "Filter By Topics or Pages", "type": "main", "index": 0 } ] ] }, "Extract Title & Markdown Content": { "main": [ [ { "node": "Save Webpage Contents to Google Drive", "type": "main", "index": 0 } ] ] }, "When clicking ‘Test workflow’": { "main": [ [ { "node": "Set Website URL", "type": "main", "index": 0 } ] ] }, "Save Webpage Contents to Google Drive": { "main": [ [ { "node": "Wait", "type": "main", "index": 0 } ] ] } } }