finalized web scraper
Shared 1/10/2026
2 views
Visual Workflow
JSON Code
{
"id": "jphxJZ2fJGTU537K",
"meta": {
"instanceId": "63f635ca0b113c98811dc90775e2ec5f47c911704512d89c2f0c4e2d9430580d",
"templateCredsSetupCompleted": true
},
"name": "finalized web scraper",
"tags": [],
"nodes": [
{
"id": "c934b432-0633-40da-b996-0cb8fbaff26e",
"name": "On form submission",
"type": "n8n-nodes-base.formTrigger",
"position": [
0,
0
],
"webhookId": "b58ce2b5-0ec9-449c-a5cb-76451fafce16",
"parameters": {
"options": {},
"formTitle": "Contact Information",
"formFields": {
"values": [
{
"fieldName": "name",
"fieldLabel": "Name",
"requiredField": true
},
{
"fieldName": "email",
"fieldType": "email",
"fieldLabel": "Email",
"requiredField": true
},
{
"fieldName": "url",
"fieldLabel": "Company URL",
"requiredField": true
},
{
"fieldName": "revenue",
"fieldType": "dropdown",
"fieldLabel": "Revenue",
"fieldOptions": {
"values": [
{
"option": "<$500K"
},
{
"option": "$500K-$1M"
},
{
"option": "$1M-$5M"
},
{
"option": "$5M+"
}
]
},
"requiredField": true
},
{
"fieldName": "linkedin",
"fieldLabel": "LinkedIn Profile"
}
]
},
"formDescription": "we'll get back to you soon"
},
"executeOnce": false,
"typeVersion": 2.4
},
{
"id": "53421422-8d57-4202-86b6-78fe15894b88",
"name": "Code in JavaScript",
"type": "n8n-nodes-base.code",
"position": [
208,
0
],
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "const lead_id =\n 'lead_' +\n Date.now().toString(36) +\n '_' +\n Math.random().toString(36).substring(2, 10);\n\nreturn {\n lead_id,\n name: $json.name,\n email: $json.email,\n company_url: $json.url,\n linkedin_url: $json.linkedin || '',\n revenue_range: $json.revenue,\n\n status: 'first_mail_pending',\n email_thread_id: '',\n last_email_sent_at: '',\n reply_received: 'No',\n notes: ''\n};"
},
"typeVersion": 2
},
{
"id": "15f76dc8-f3c6-4f55-9fc8-657d45687c78",
"name": "Append row in sheet",
"type": "n8n-nodes-base.googleSheets",
"position": [
384,
0
],
"parameters": {
"columns": {
"value": {
"url": "={{ $json.company_url }}",
"name": "={{ $json.name }}",
"email": "={{ $json.email }}",
"status": "={{ $json.status }}",
"lead_id": "={{ $json.lead_id }}",
"revenue": "={{ $json.revenue_range }}",
"linkedin": "={{ $json.linkedin_url }}",
"reply_received": "={{ $json.reply_received }}"
},
"schema": [
{
"id": "lead_id",
"type": "string",
"display": true,
"required": false,
"displayName": "lead_id",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "name",
"type": "string",
"display": true,
"required": false,
"displayName": "name",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "email",
"type": "string",
"display": true,
"required": false,
"displayName": "email",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "url",
"type": "string",
"display": true,
"required": false,
"displayName": "url",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "linkedin",
"type": "string",
"display": true,
"required": false,
"displayName": "linkedin",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "revenue",
"type": "string",
"display": true,
"required": false,
"displayName": "revenue",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "status",
"type": "string",
"display": true,
"required": false,
"displayName": "status",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "threadid",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "threadid",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "last_email_sent_at",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "last_email_sent_at",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "reply_received",
"type": "string",
"display": true,
"required": false,
"displayName": "reply_received",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "notes",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "notes",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit?usp=drivesdk",
"cachedResultName": "web scrape (research)"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "vMfY8uIApdR7nuqv",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "23ecac39-fb20-4c86-893c-164daca8211e",
"name": "HTTP Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
528,
176
],
"parameters": {
"url": "={{ $json.url }}",
"options": {
"response": {
"response": {
"responseFormat": "text",
"outputPropertyName": "raw_html"
}
}
}
},
"typeVersion": 4.3
},
{
"id": "34166b0f-6fe8-4aa9-8608-d97b00d4393b",
"name": "Merge",
"type": "n8n-nodes-base.merge",
"position": [
1168,
16
],
"parameters": {
"mode": "combine",
"options": {},
"combineBy": "combineAll"
},
"typeVersion": 3.2
},
{
"id": "d6a26e27-d1b1-41be-948d-e6e365bdcfa6",
"name": "HTML",
"type": "n8n-nodes-base.html",
"position": [
720,
176
],
"parameters": {
"options": {
"trimValues": false,
"cleanUpText": false
},
"operation": "extractHtmlContent",
"dataPropertyName": "raw_html",
"extractionValues": {
"values": [
{
"key": "body_html",
"cssSelector": "body",
"skipSelectors": "script, style, nav, footer, header, iframe, video, img"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "8a9b9517-f80d-4e3c-995d-8d1ced499fd5",
"name": "Code in JavaScript1",
"type": "n8n-nodes-base.code",
"position": [
928,
176
],
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "const text = $json.body_html || '';\n\nconst cleaned = text\n .replace(/^>+/gm, '') // remove blockquote markers\n .replace(/-{3,}/g, '') // remove separators\n .replace(/\\n{3,}/g, '\\n\\n') // collapse newlines\n .replace(/\\s{2,}/g, ' ')\n .trim();\n\nreturn {\n website_text: cleaned.slice(0, 6000) // token safety\n};"
},
"typeVersion": 2
},
{
"id": "edf05732-0ef4-42dd-a285-2022ebc58f8f",
"name": "AI Agent",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
1376,
16
],
"parameters": {
"text": "=You are a senior growth consultant writing highly personalized outbound emails.\n\nYour tone is consultative, thoughtful, and insight-driven — never salesy.\n\nYou must:\n- Reference the provided website content to show genuine research\n- Avoid generic praise\n- Avoid LinkedIn references\n- Avoid hype or marketing buzzwords\n- Be concise and professional\n\nYou ALWAYS output valid JSON.\nYou NEVER output explanations, markdown, or code fences.\nYou NEVER wrap the output in ```.\n\nUse the following data:\n\nName: {{ $json.name }}\nCompany Website: {{ $json.url }}\nRevenue Band: {{ $json.revenue }}\n\nWebsite Content:\n{{ $json.website_text }}\n\nEmail structure:\n1. Professional opening (1–2 lines)\n2. One specific, concrete insight from the website (prove you read it)\n3. Business framing aligned with the revenue band\n4. Soft call-to-action asking for one short exploratory call\n\nFormatting rules for the email body:\n- Output the email body as clean HTML\n- Use <p> tags for paragraphs\n- Use <br> only where necessary\n- Do NOT use inline CSS\n- Do NOT use headings, lists, or emojis\n- Keep it readable and email-client safe\n\nContent rules:\n- Do NOT mention LinkedIn\n- Do NOT invent facts\n- Do NOT exaggerate results\n- Keep under 220 words\n- End with a single-question CTA\n- Address the recipient by first name\n- Write as a peer, not a salesperson\n\nOutput format (JSON only):\n\n{\n \"lead_id\": \"{{ $json.lead_id }}\",\n \"email\": \"{{ $json.email }}\",\n \"subject\": \"\",\n \"body\": \"\"\n}",
"options": {},
"promptType": "define"
},
"typeVersion": 3.1
},
{
"id": "e9e11949-d770-4937-bb88-bd9d76d470b6",
"name": "OpenAI Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
1376,
176
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1-mini"
},
"options": {},
"builtInTools": {}
},
"credentials": {
"openAiApi": {
"id": "lJAUjP06iO1xOwuF",
"name": "OpenAi account"
}
},
"typeVersion": 1.3
},
{
"id": "02bed463-c7a5-4358-b09f-6760444383e6",
"name": "Code in JavaScript2",
"type": "n8n-nodes-base.code",
"position": [
1728,
16
],
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// 1. Get raw model output\nlet raw = $json.output;\n\n// 2. Remove markdown code fences if present\nraw = raw\n .replace(/```json/i, '')\n .replace(/```/g, '')\n .trim();\n\n// 3. Parse JSON safely\nlet parsed;\ntry {\n parsed = JSON.parse(raw);\n} catch (e) {\n throw new Error('Failed to parse AI JSON output: ' + raw);\n}\n\n// 4. Return clean structured data\nreturn {\n lead_id: parsed.lead_id,\n email: parsed.email,\n subject: parsed.subject,\n body: parsed.body\n};"
},
"typeVersion": 2
},
{
"id": "65ddd1b8-7ce1-4342-87d9-fe5a46c624e6",
"name": "Send a message",
"type": "n8n-nodes-base.gmail",
"position": [
1920,
176
],
"webhookId": "f4ebe655-61e7-4728-91fd-362212ba4569",
"parameters": {
"sendTo": "={{ $json.email }}",
"message": "={{ $json.body }}",
"options": {
"appendAttribution": false
},
"subject": "={{ $json.subject }}"
},
"credentials": {
"gmailOAuth2": {
"id": "bCUQol7XoOfejGRM",
"name": "Gmail account"
}
},
"typeVersion": 2.2
},
{
"id": "8ec2ad3d-9457-48f9-95f4-9d480fde5ba0",
"name": "Merge1",
"type": "n8n-nodes-base.merge",
"position": [
2112,
-48
],
"parameters": {
"mode": "combine",
"options": {},
"combineBy": "combineAll"
},
"typeVersion": 3.2
},
{
"id": "88391ee0-6acd-4947-a2ef-a0bacdffdbb6",
"name": "Update row in sheet",
"type": "n8n-nodes-base.googleSheets",
"position": [
2320,
-48
],
"parameters": {
"columns": {
"value": {
"status": "first_mail_sent",
"lead_id": "={{ $json.lead_id }}",
"threadid": "={{ $json.threadId }}",
"reply_received": "no",
"last_email_sent_at": "={{ new Date().toISOString().split('T')[0] }}"
},
"schema": [
{
"id": "lead_id",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "lead_id",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "name",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "name",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "email",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "email",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "url",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "url",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "linkedin",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "linkedin",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "revenue",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "revenue",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "status",
"type": "string",
"display": true,
"required": false,
"displayName": "status",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "threadid",
"type": "string",
"display": true,
"required": false,
"displayName": "threadid",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "last_email_sent_at",
"type": "string",
"display": true,
"required": false,
"displayName": "last_email_sent_at",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "reply_received",
"type": "string",
"display": true,
"required": false,
"displayName": "reply_received",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "notes",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "notes",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "row_number",
"type": "number",
"display": true,
"removed": true,
"readOnly": true,
"required": false,
"displayName": "row_number",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"lead_id"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "update",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit?usp=drivesdk",
"cachedResultName": "web scrape (research)"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "vMfY8uIApdR7nuqv",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "3579d961-9234-4d53-9626-f518e1a2240f",
"name": "Gmail Trigger",
"type": "n8n-nodes-base.gmailTrigger",
"position": [
672,
624
],
"parameters": {
"filters": {
"q": "-in:sent",
"labelIds": [
"INBOX"
]
},
"pollTimes": {
"item": [
{
"mode": "everyMinute"
}
]
}
},
"credentials": {
"gmailOAuth2": {
"id": "bCUQol7XoOfejGRM",
"name": "Gmail account"
}
},
"typeVersion": 1.3
},
{
"id": "4537dc26-327f-4653-aa80-adae860005a2",
"name": "Get row(s) in sheet",
"type": "n8n-nodes-base.googleSheets",
"position": [
1216,
624
],
"parameters": {
"options": {},
"filtersUI": {
"values": [
{
"lookupValue": "first_mail_sent",
"lookupColumn": "status"
},
{
"lookupValue": "no",
"lookupColumn": "reply_received"
}
]
},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit?usp=drivesdk",
"cachedResultName": "web scrape (research)"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "vMfY8uIApdR7nuqv",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "32525d55-4967-4462-8cd6-306a7bbf4768",
"name": "Append or update row in sheet",
"type": "n8n-nodes-base.googleSheets",
"position": [
1488,
624
],
"parameters": {
"columns": {
"value": {
"status": "Done",
"lead_id": "={{ $json.lead_id }}",
"threadid": "={{ $json.threadid }}",
"reply_received": "Yes"
},
"schema": [
{
"id": "lead_id",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "lead_id",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "name",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "name",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "email",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "email",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "url",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "url",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "linkedin",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "linkedin",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "revenue",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "revenue",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "status",
"type": "string",
"display": true,
"required": false,
"displayName": "status",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "threadid",
"type": "string",
"display": true,
"required": false,
"displayName": "threadid",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "last_email_sent_at",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "last_email_sent_at",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "reply_received",
"type": "string",
"display": true,
"required": false,
"displayName": "reply_received",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "notes",
"type": "string",
"display": true,
"required": false,
"displayName": "notes",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"lead_id"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "appendOrUpdate",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ziPMuQrPrKJG6m-VkBvkh9zXs57cgK4Z8ylie9VFCQI/edit?usp=drivesdk",
"cachedResultName": "web scrape (research)"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "vMfY8uIApdR7nuqv",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "0c5ca914-9bcb-43d9-8ad9-8b2284360670",
"name": "Code in JavaScript3",
"type": "n8n-nodes-base.code",
"position": [
912,
624
],
"parameters": {
"jsCode": "return [{\n threadid: $json.threadId\n}];"
},
"typeVersion": 2
},
{
"id": "e49cd4ab-9efa-45bc-9ed2-a938b5a0ad4c",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
1152,
-160
],
"parameters": {
"width": 224,
"height": 80,
"content": "First workflow for sending mails"
},
"typeVersion": 1
},
{
"id": "5c0d79ca-4f59-4e87-8e3c-867ada43a37c",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
1200,
480
],
"parameters": {
"width": 150,
"height": 80,
"content": "second workflow for reply detection"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"availableInMCP": false,
"executionOrder": "v1"
},
"versionId": "374b5c31-effb-40c5-b75c-3941b884c49a",
"connections": {
"HTML": {
"main": [
[
{
"node": "Code in JavaScript1",
"type": "main",
"index": 0
}
]
]
},
"Merge": {
"main": [
[
{
"node": "AI Agent",
"type": "main",
"index": 0
}
]
]
},
"Merge1": {
"main": [
[
{
"node": "Update row in sheet",
"type": "main",
"index": 0
}
]
]
},
"AI Agent": {
"main": [
[
{
"node": "Code in JavaScript2",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "HTML",
"type": "main",
"index": 0
}
]
]
},
"Gmail Trigger": {
"main": [
[
{
"node": "Code in JavaScript3",
"type": "main",
"index": 0
}
]
]
},
"Send a message": {
"main": [
[
{
"node": "Merge1",
"type": "main",
"index": 1
}
]
]
},
"OpenAI Chat Model": {
"ai_languageModel": [
[
{
"node": "AI Agent",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Code in JavaScript": {
"main": [
[
{
"node": "Append row in sheet",
"type": "main",
"index": 0
}
]
]
},
"On form submission": {
"main": [
[
{
"node": "Code in JavaScript",
"type": "main",
"index": 0
}
]
]
},
"Append row in sheet": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
},
{
"node": "Merge",
"type": "main",
"index": 0
}
]
]
},
"Code in JavaScript1": {
"main": [
[
{
"node": "Merge",
"type": "main",
"index": 1
}
]
]
},
"Code in JavaScript2": {
"main": [
[
{
"node": "Send a message",
"type": "main",
"index": 0
},
{
"node": "Merge1",
"type": "main",
"index": 0
}
]
]
},
"Code in JavaScript3": {
"main": [
[
{
"node": "Get row(s) in sheet",
"type": "main",
"index": 0
}
]
]
},
"Get row(s) in sheet": {
"main": [
[
{
"node": "Append or update row in sheet",
"type": "main",
"index": 0
}
]
]
}
}
}