{ "id": "0JsHmmyeHw5Ffz5m", "meta": { "instanceId": "d4d7965840e96e50a3e02959a8487c692901dfa8d5cc294134442c67ce1622d3", "templateCredsSetupCompleted": true }, "name": "HN Who is Hiring Scrape", "tags": [], "nodes": [ { "id": "f7cdb3ee-9bb0-4006-829a-d4ce797191d5", "name": "When clicking \u2018Test workflow\u2019", "type": "n8n-nodes-base.manualTrigger", "position": [ -20, -220 ], "parameters": {}, "typeVersion": 1 }, { "id": "0475e25d-9bf4-450d-abd3-a04608a438a4", "name": "Sticky Note", "type": "n8n-nodes-base.stickyNote", "position": [ 60, -620 ], "parameters": { "width": 460, "height": 340, "content": "## Go to https://hn.algolia.com\n- filter by \"Ask HN: Who is hiring?\" (important with quotes for full match)\n- sort by date\n- Chrome Network Tab > find API call > click \"Copy as cURL\"\n- n8n HTTP node -> import cURL and paste \n- I've set the API key as Header Auth so you will have to do the above yourself to make this work" }, "typeVersion": 1 }, { "id": "a686852b-ff84-430b-92bb-ce02a6808e19", "name": "Split Out", "type": "n8n-nodes-base.splitOut", "position": [ 400, -220 ], "parameters": { "options": {}, "fieldToSplitOut": "hits" }, "typeVersion": 1 }, { "id": "cdaaa738-d561-4fa0-b2c7-8ea9e6778eb1", "name": "Sticky Note1", "type": "n8n-nodes-base.stickyNote", "position": [ 1260, -620 ], "parameters": { "width": 500, "height": 340, "content": "## Go to HN API \nhttps://github.com/HackerNews/API\n\nWe'll need following endpoints: \n- For example, a story: https://hacker-news.firebaseio.com/v0/item/8863.json?print=pretty\n- comment: https://hacker-news.firebaseio.com/v0/item/2921983.json?print=pretty\n\n" }, "typeVersion": 1 }, { "id": "4f353598-9e32-4be4-9e7b-c89cc05305fd", "name": "OpenAI Chat Model", "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi", "position": [ 2680, -20 ], "parameters": { "model": { "__rl": true, "mode": "list", "value": "gpt-4o-mini" }, "options": {} }, "credentials": { "openAiApi": { "id": "Fbb2ueT0XP5xMRme", "name": "OpenAi account 2" } }, "typeVersion": 1.2 }, { "id": "5bd0d7cc-497a-497c-aa4c-589d9ceeca14", "name": "Structured Output Parser", "type": "@n8n/n8n-nodes-langchain.outputParserStructured", "position": [ 2840, -20 ], "parameters": { "schemaType": "manual", "inputSchema": "{\n \"type\": \"object\",\n \"properties\": {\n \"company\": {\n \"type\": [\n \"string\",\n null\n ],\n \"description\": \"Name of the hiring company\"\n },\n \"title\": {\n \"type\": [\n \"string\",\n null\n ],\n \"description\": \"Job title/role being advertised\"\n },\n \"location\": {\n \"type\": [\n \"string\",\n null\n ],\n \"description\": \"Work location including remote/hybrid status\"\n },\n \"type\": {\n \"type\": [\n \"string\",\n null\n ],\n \"enum\": [\n \"FULL_TIME\",\n \"PART_TIME\",\n \"CONTRACT\",\n \"INTERNSHIP\",\n \"FREELANCE\",\n null\n ],\n \"description\": \"Employment type (Full-time, Contract, etc)\"\n },\n \"work_location\": {\n \"type\": [\n \"string\",\n null\n ],\n \"enum\": [\n \"REMOTE\",\n \"HYBRID\",\n \"ON_SITE\",\n null\n ],\n \"description\": \"Work arrangement type\"\n },\n \"salary\": {\n \"type\": [\n \"string\",\n null\n ],\n \"description\": \"Compensation details if provided\"\n },\n \"description\": {\n \"type\": [\n \"string\",\n null\n ],\n \"description\": \"Main job description text including requirements and team info\"\n },\n \"apply_url\": {\n \"type\": [\n \"string\",\n null\n ],\n \"description\": \"Direct application/job posting URL\"\n },\n \"company_url\": {\n \"type\": [\n \"string\",\n null\n ],\n \"description\": \"Company website or careers page\"\n }\n }\n}\n" }, "typeVersion": 1.2 }, { "id": "b84ca004-6f3b-4577-8910-61b8584b161d", "name": "Search for Who is hiring posts", "type": "n8n-nodes-base.httpRequest", "position": [ 200, -220 ], "parameters": { "url": "https://uj5wyc0l7x-dsn.algolia.net/1/indexes/Item_dev_sort_date/query", "method": "POST", "options": {}, "jsonBody": "{\n \"query\": \"\\\"Ask HN: Who is hiring\\\"\",\n \"analyticsTags\": [\n \"web\"\n ],\n \"page\": 0,\n \"hitsPerPage\": 30,\n \"minWordSizefor1Typo\": 4,\n \"minWordSizefor2Typos\": 8,\n \"advancedSyntax\": true,\n \"ignorePlurals\": false,\n \"clickAnalytics\": true,\n \"minProximity\": 7,\n \"numericFilters\": [],\n \"tagFilters\": [\n [\n \"story\"\n ],\n []\n ],\n \"typoTolerance\": \"min\",\n \"queryType\": \"prefixNone\",\n \"restrictSearchableAttributes\": [\n \"title\",\n \"comment_text\",\n \"url\",\n \"story_text\",\n \"author\"\n ],\n \"getRankingInfo\": true\n}", "sendBody": true, "sendQuery": true, "sendHeaders": true, "specifyBody": "json", "authentication": "genericCredentialType", "genericAuthType": "httpHeaderAuth", "queryParameters": { "parameters": [ { "name": "x-algolia-agent", "value": "Algolia for JavaScript (4.13.1); Browser (lite)" }, { "name": "x-algolia-application-id", "value": "UJ5WYC0L7X" } ] }, "headerParameters": { "parameters": [ { "name": "Accept", "value": "*/*" }, { "name": "Accept-Language", "value": "en-GB,en-US;q=0.9,en;q=0.8" }, { "name": "Connection", "value": "keep-alive" }, { "name": "DNT", "value": "1" }, { "name": "Origin", "value": "https://hn.algolia.com" }, { "name": "Referer", "value": "https://hn.algolia.com/" }, { "name": "Sec-Fetch-Dest", "value": "empty" }, { "name": "Sec-Fetch-Mode", "value": "cors" }, { "name": "Sec-Fetch-Site", "value": "cross-site" }, { "name": "User-Agent", "value": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36" }, { "name": "sec-ch-ua", "value": "\"Chromium\";v=\"133\", \"Not(A:Brand\";v=\"99\"" }, { "name": "sec-ch-ua-mobile", "value": "?0" }, { "name": "sec-ch-ua-platform", "value": "\"macOS\"" } ] } }, "credentials": { "httpHeaderAuth": { "id": "oVEXp2ZbYCXypMVz", "name": "Algolia Auth" } }, "typeVersion": 4.2 }, { "id": "205e66f6-cd6b-4cfd-a6ec-2226c35ddaac", "name": "Get relevant data", "type": "n8n-nodes-base.set", "position": [ 700, -220 ], "parameters": { "options": {}, "assignments": { "assignments": [ { "id": "73dd2325-faa7-4650-bd78-5fc97cc202de", "name": "title", "type": "string", "value": "={{ $json.title }}" }, { "id": "44918eac-4510-440e-9ac0-bf14d2b2f3af", "name": "createdAt", "type": "string", "value": "={{ $json.created_at }}" }, { "id": "00eb6f09-2c22-411c-949c-886b2d95b6eb", "name": "updatedAt", "type": "string", "value": "={{ $json.updated_at }}" }, { "id": "2b4f9da6-f60e-46e0-ba9d-3242fa955a55", "name": "storyId", "type": "string", "value": "={{ $json.story_id }}" } ] } }, "typeVersion": 3.4 }, { "id": "16bc5628-8a29-4eac-8be9-b4e9da802e1e", "name": "Get latest post", "type": "n8n-nodes-base.filter", "position": [ 900, -220 ], "parameters": { "options": {}, "conditions": { "options": { "version": 2, "leftValue": "", "caseSensitive": true, "typeValidation": "strict" }, "combinator": "and", "conditions": [ { "id": "d7dd7175-2a50-45aa-bd3e-4c248c9193c4", "operator": { "type": "dateTime", "operation": "after" }, "leftValue": "={{ $json.createdAt }}", "rightValue": "={{$now.minus({days: 30})}} " } ] } }, "typeVersion": 2.2 }, { "id": "92e1ef74-5ae1-4195-840b-115184db464f", "name": "Split out children (jobs)", "type": "n8n-nodes-base.splitOut", "position": [ 1460, -220 ], "parameters": { "options": {}, "fieldToSplitOut": "kids" }, "typeVersion": 1 }, { "id": "d0836aae-b98a-497f-a6f7-0ad563c262a0", "name": "Trun into structured data", "type": "@n8n/n8n-nodes-langchain.chainLlm", "position": [ 2600, -220 ], "parameters": { "text": "={{ $json.cleaned_text }}", "messages": { "messageValues": [ { "message": "Extract the JSON data" } ] }, "promptType": "define", "hasOutputParser": true }, "typeVersion": 1.5 }, { "id": "fd818a93-627c-435d-91ba-5d759d5a9004", "name": "Sticky Note2", "type": "n8n-nodes-base.stickyNote", "position": [ 2600, -620 ], "parameters": { "width": 840, "height": 340, "content": "## Data Structure\n\nWe use Openai GPT-4o-mini to transform the raw data in a unified data structure. Feel free to change this.\n\n```json\n{\n \"company\": \"Name of the hiring company\",\n \"title\": \"Job title/role being advertised\",\n \"location\": \"Work location including remote/hybrid status\",\n \"type\": \"Employment type (Full-time, Contract, etc)\",\n \"salary\": \"Compensation details if provided\",\n \"description\": \"Main job description text including requirements and team info\",\n \"apply_url\": \"Direct application/job posting URL\",\n \"company_url\": \"Company website or careers page\"\n}\n```" }, "typeVersion": 1 }, { "id": "b70c5578-5b81-467a-8ac2-65374e4e52f3", "name": "Extract text", "type": "n8n-nodes-base.set", "position": [ 1860, -220 ], "parameters": { "options": {}, "assignments": { "assignments": [ { "id": "6affa370-56ce-4ad8-8534-8f753fdf07fc", "name": "text", "type": "string", "value": "={{ $json.text }}" } ] } }, "typeVersion": 3.4 }, { "id": "acb68d88-9417-42e9-9bcc-7c2fa95c4afd", "name": "Clean text", "type": "n8n-nodes-base.code", "position": [ 2060, -220 ], "parameters": { "jsCode": "// In a Function node in n8n\nconst inputData = $input.all();\n\nfunction cleanAllPosts(data) {\n return data.map(item => {\n try {\n // Check if item exists and has the expected structure\n if (!item || typeof item !== 'object') {\n return { cleaned_text: '', error: 'Invalid item structure' };\n }\n\n // Get the text, with multiple fallbacks\n let text = '';\n if (typeof item === 'string') {\n text = item;\n } else if (item.json && item.json.text) {\n text = item.json.text;\n } else if (typeof item.json === 'string') {\n text = item.json;\n } else {\n text = JSON.stringify(item);\n }\n\n // Make sure text is a string\n text = String(text);\n \n // Perform the cleaning operations\n try {\n text = text.replace(///g, '/');\n text = text.replace(/'/g, \"'\");\n text = text.replace(/&\\w+;/g, ' ');\n text = text.replace(/<[^>]*>/g, '');\n text = text.replace(/\\|\\s*/g, '| ');\n text = text.replace(/\\s+/g, ' ');\n text = text.replace(/\\s*(https?:\\/\\/[^\\s]+)\\s*/g, '\\n$1\\n');\n text = text.replace(/\\n{3,}/g, '\\n\\n');\n text = text.trim();\n } catch (cleaningError) {\n console.log('Error during text cleaning:', cleaningError);\n // Return original text if cleaning fails\n return { cleaned_text: text, warning: 'Partial cleaning applied' };\n }\n\n return { cleaned_text: text };\n \n } catch (error) {\n console.log('Error processing item:', error);\n return { \n cleaned_text: '', \n error: `Processing error: ${error.message}`,\n original: item\n };\n }\n }).filter(result => result.cleaned_text || result.error); \n}\n\ntry {\n return cleanAllPosts(inputData);\n} catch (error) {\n console.log('Fatal error:', error);\n return [{ \n cleaned_text: '', \n error: `Fatal error: ${error.message}`,\n input: inputData \n }];\n}\n" }, "typeVersion": 2 }, { "id": "a0727b55-565d-47c0-9ab5-0f001f4b9941", "name": "Limit for testing (optional)", "type": "n8n-nodes-base.limit", "position": [ 2280, -220 ], "parameters": { "maxItems": 5 }, "typeVersion": 1 }, { "id": "650baf5e-c2ac-443d-8a2b-6df89717186f", "name": "Sticky Note3", "type": "n8n-nodes-base.stickyNote", "position": [ 580, -620 ], "parameters": { "width": 540, "height": 340, "content": "## Clean the result \n\n```json\n{\n\"title\": \"Ask HN: Who is hiring? (February 2025)\",\n\"createdAt\": \"2025-02-03T16:00:43Z\",\n\"updatedAt\": \"2025-02-17T08:35:44Z\",\n\"storyId\": \"42919502\"\n},\n{\n\"title\": \"Ask HN: Who is hiring? (January 2025)\",\n\"createdAt\": \"2025-01-02T16:00:09Z\",\n\"updatedAt\": \"2025-02-13T00:03:24Z\",\n\"storyId\": \"42575537\"\n},\n```" }, "typeVersion": 1 }, { "id": "1ca5c39f-f21d-455a-b63a-702e7e3ba02b", "name": "Write results to airtable", "type": "n8n-nodes-base.airtable", "position": [ 3040, -220 ], "parameters": { "base": { "__rl": true, "mode": "list", "value": "appM2JWvA5AstsGdn", "cachedResultUrl": "https://airtable.com/appM2JWvA5AstsGdn", "cachedResultName": "HN Who is hiring?" }, "table": { "__rl": true, "mode": "list", "value": "tblGvcOjqbliwM7AS", "cachedResultUrl": "https://airtable.com/appM2JWvA5AstsGdn/tblGvcOjqbliwM7AS", "cachedResultName": "Table 1" }, "columns": { "value": { "type": "={{ $json.output.type }}", "title": "={{ $json.output.title }}", "salary": "={{ $json.output.salary }}", "company": "={{ $json.output.company }}", "location": "={{ $json.output.location }}", "apply_url": "={{ $json.output.apply_url }}", "company_url": "={{ $json.output.company_url }}", "description": "={{ $json.output.description }}" }, "schema": [ { "id": "title", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "title", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "company", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "company", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "location", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "location", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "type", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "type", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "salary", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "salary", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "description", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "description", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "apply_url", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "apply_url", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "company_url", "type": "string", "display": true, "removed": false, "readOnly": false, "required": false, "displayName": "company_url", "defaultMatch": false, "canBeUsedToMatch": true }, { "id": "posted_date", "type": "string", "display": true, "removed": true, "readOnly": false, "required": false, "displayName": "posted_date", "defaultMatch": false, "canBeUsedToMatch": true } ], "mappingMode": "defineBelow", "matchingColumns": [], "attemptToConvertTypes": false, "convertFieldsToString": false }, "options": {}, "operation": "create" }, "credentials": { "airtableTokenApi": { "id": "IudXLNj7CDuc5M5a", "name": "Airtable Personal Access Token account" } }, "typeVersion": 2.1 }, { "id": "d71fa024-86a0-4f74-b033-1f755574080c", "name": "Sticky Note4", "type": "n8n-nodes-base.stickyNote", "position": [ -520, -300 ], "parameters": { "width": 380, "height": 500, "content": "## Hacker News - Who is Hiring Scrape\n\nIn this template we setup a scraper for the monthly HN Who is Hiring post. This way we can scrape the data and transform it to a common data strcutre.\n\nFirst we use the [Algolia Search](https://hn.algolia.com/) provided by hackernews to drill down the results.\n\nWe can use the official [Hacker News API](https://github.com/HackerNews/API\n) to get the post data and also all the replies!\n\nThis will obviously work for any kind of post on hacker news! Get creative \ud83d\ude03\n\nAll you need is an Openai Account to structure the text data and an Airtable Account (or similar) to write the results to a list.\n\nCopy my base https://airtable.com/appM2JWvA5AstsGdn/shrAuo78cJt5C2laR" }, "typeVersion": 1 }, { "id": "7466fb0c-9f0c-4adf-a6de-b2cf09032719", "name": "HI API: Get the individual job post", "type": "n8n-nodes-base.httpRequest", "position": [ 1660, -220 ], "parameters": { "url": "=https://hacker-news.firebaseio.com/v0/item/{{ $json.kids }}.json?print=pretty", "options": {} }, "typeVersion": 4.2 }, { "id": "184abccf-5838-49bf-9922-e0300c6b145e", "name": "HN API: Get Main Post", "type": "n8n-nodes-base.httpRequest", "position": [ 1260, -220 ], "parameters": { "url": "=https://hacker-news.firebaseio.com/v0/item/{{ $json.storyId }}.json?print=pretty", "options": {} }, "typeVersion": 4.2 } ], "active": false, "pinData": {}, "settings": { "executionOrder": "v1" }, "versionId": "387f7084-58fa-4643-9351-73c870d3f028", "connections": { "Split Out": { "main": [ [ { "node": "Get relevant data", "type": "main", "index": 0 } ] ] }, "Clean text": { "main": [ [ { "node": "Limit for testing (optional)", "type": "main", "index": 0 } ] ] }, "Extract text": { "main": [ [ { "node": "Clean text", "type": "main", "index": 0 } ] ] }, "Get latest post": { "main": [ [ { "node": "HN API: Get Main Post", "type": "main", "index": 0 } ] ] }, "Get relevant data": { "main": [ [ { "node": "Get latest post", "type": "main", "index": 0 } ] ] }, "OpenAI Chat Model": { "ai_languageModel": [ [ { "node": "Trun into structured data", "type": "ai_languageModel", "index": 0 } ] ] }, "HN API: Get Main Post": { "main": [ [ { "node": "Split out children (jobs)", "type": "main", "index": 0 } ] ] }, "Structured Output Parser": { "ai_outputParser": [ [ { "node": "Trun into structured data", "type": "ai_outputParser", "index": 0 } ] ] }, "Split out children (jobs)": { "main": [ [ { "node": "HI API: Get the individual job post", "type": "main", "index": 0 } ] ] }, "Trun into structured data": { "main": [ [ { "node": "Write results to airtable", "type": "main", "index": 0 } ] ] }, "Limit for testing (optional)": { "main": [ [ { "node": "Trun into structured data", "type": "main", "index": 0 } ] ] }, "Search for Who is hiring posts": { "main": [ [ { "node": "Split Out", "type": "main", "index": 0 } ] ] }, "When clicking \u2018Test workflow\u2019": { "main": [ [ { "node": "Search for Who is hiring posts", "type": "main", "index": 0 } ] ] }, "HI API: Get the individual job post": { "main": [ [ { "node": "Extract text", "type": "main", "index": 0 } ] ] } } }