Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@decodo/mcp-server",
"version": "1.2.0",
"version": "1.2.1",
"description": "Decodo MCP Server",
"bin": {
"decodo-mcp": "./build/index.js"
Expand All @@ -9,7 +9,8 @@
"mcpName": "io.github.Decodo/mcp-web-scraper",
"scripts": {
"build": "tsc && chmod 755 build/index.js build/server.js",
"dev": "nodemon --watch src -e ts --exec 'npm run build'",
"dev": "nodemon --watch src -e ts --exec 'tsx src/server.ts'",
"dev:server": "nodemon --watch src -e ts --exec 'npm run build'",
"start": "node build/server.js",
"inspect": "mcp-inspector",
"test": "jest",
Expand Down
4 changes: 2 additions & 2 deletions server.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
"url": "https://github.com/Decodo/mcp-web-scraper",
"source": "github"
},
"version": "1.2.0",
"version": "1.2.1",
"packages": [
{
"registryType": "npm",
"registryBaseUrl": "https://registry.npmjs.org",
"identifier": "@decodo/mcp-server",
"version": "1.2.0",
"version": "1.2.1",
"transport": {
"type": "stdio"
}
Expand Down
126 changes: 73 additions & 53 deletions src/clients/scraper-api-client.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import axios, { AxiosError, AxiosResponse } from 'axios';
import { ScraperApiResponseData } from './types';
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { ProgressNotifier, ProgressExtra } from '../utils';

const MAX_RETRIES = Math.max(0, parseInt(process.env.MAX_RETRIES ?? '2', 10) || 2);
const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
const RETRYABLE_NETWORK_CODES = new Set(['ECONNRESET', 'ETIMEDOUT', 'ECONNABORTED', 'ENOTFOUND']);
const WAITING_INITIAL_DELAY_MS = 3000;
const WAITING_INTERVAL_MS = 5000;

const isRetryable = (error: AxiosError): boolean => {
if (error.response) {
Expand Down Expand Up @@ -60,68 +63,85 @@ export class ScraperApiClient {
scrape = async <T = string>({
auth,
scrapingParams,
extra,
}: {
auth: string;
scrapingParams: ScrapingMCPParams;
extra?: ProgressExtra;
}) => {
const transformedParams = this.transformScrapingParams({ scrapingParams });

const url = process.env.DECODO_SAPI_HOST || 'https://scraper-api.decodo.com';

let lastError: unknown;

for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
const res = await axios.request<ScraperApiResponseData<T>>({
url: `${url}/v2/scrape`,
method: 'POST',
headers: {
authorization: `Basic ${auth}`,
'x-integration': 'mcp',
},
timeout: 180000,
data: {
...transformedParams,
},
});

return this.transformResponse({ res });
} catch (error) {
lastError = error;

if (attempt < MAX_RETRIES && axios.isAxiosError(error) && isRetryable(error)) {
const delayMs = getRetryDelay(attempt, error);
const reason = error.response
? `status ${error.response.status}`
: `network error ${error.code}`;

console.error(
`[scraper-api-client] Retry ${
attempt + 1
}/${MAX_RETRIES} after ${reason}, waiting ${Math.round(delayMs)}ms`
);

await sleep(delayMs);
continue;
}
const notifier = new ProgressNotifier(extra);

break;
}
}
try {
await notifier.notify('Submitting request to Decodo API...', 0, 1);

if (axios.isAxiosError(lastError)) {
const status = lastError.response?.status;
let errorMessage = lastError.response?.data?.message ?? lastError.message;
notifier.startWaitingNotifications(WAITING_INITIAL_DELAY_MS, WAITING_INTERVAL_MS);

if (status === 401) {
errorMessage = 'Authentication failed.';
}
if (status === 429) {
errorMessage = JSON.stringify(lastError.response?.data);
const transformedParams = this.transformScrapingParams({ scrapingParams });
const url = process.env.DECODO_SAPI_HOST || 'https://scraper-api.decodo.com';

let lastError: unknown;

for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
const res = await axios.request<ScraperApiResponseData<T>>({
url: `${url}/v2/scrape`,
method: 'POST',
headers: {
authorization: `Basic ${auth}`,
'x-integration': 'mcp',
},
timeout: 180000,
data: {
...transformedParams,
},
});

notifier.stopWaitingNotifications();

await notifier.notify('Processing response...', 0.9, 1);

return this.transformResponse({ res });
} catch (error) {
lastError = error;

if (attempt < MAX_RETRIES && axios.isAxiosError(error) && isRetryable(error)) {
const delayMs = getRetryDelay(attempt, error);
const reason = error.response
? `status ${error.response.status}`
: `network error ${error.code}`;

console.error(
`[scraper-api-client] Retry ${
attempt + 1
}/${MAX_RETRIES} after ${reason}, waiting ${Math.round(delayMs)}ms`
);

await notifier.notify(`Retrying (${attempt + 1}/${MAX_RETRIES})...`, 0.1, 1);

await sleep(delayMs);
continue;
}

break;
}
}

throw new Error(`Scraper API request failed (${status}): ${errorMessage}`);
if (axios.isAxiosError(lastError)) {
const status = lastError.response?.status;
let errorMessage = lastError.response?.data?.message ?? lastError.message;

if (status === 401) {
errorMessage = 'Authentication failed.';
}
if (status === 429) {
errorMessage = JSON.stringify(lastError.response?.data);
}

throw new Error(`Scraper API request failed (${status}): ${errorMessage}`);
}
throw lastError;
} finally {
notifier.stopWaitingNotifications();
}
throw lastError;
};
}
2 changes: 1 addition & 1 deletion src/server/sapi-base-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export class ScraperAPIBaseServer {
constructor({ auth, toolsets = [] }: { auth: string; toolsets: TOOLSET[] }) {
this.server = new McpServer({
name: 'decodo',
version: '1.2.0',
version: '1.2.1',
});
this.sapiClient = new ScraperApiClient();

Expand Down
5 changes: 3 additions & 2 deletions src/tools/amazon-bestsellers/amazon-bestsellers-tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
import { zodDeviceType } from '../../zod/zod-types';
import { Tool, ToolRegistrationArgs } from '../tool';
import { ProgressExtra } from '../../utils';

const zodDomain = z
.string()
Expand Down Expand Up @@ -37,14 +38,14 @@ export class AmazonBestsellersTool extends Tool {
openWorldHint: true,
},
},
async (scrapingParams: ScrapingMCPParams) => {
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
const params = {
...scrapingParams,
target: SCRAPER_API_TARGETS.AMAZON_BESTSELLERS,
parse: true,
} satisfies ScraperAPIParams;

const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });

return {
content: [
Expand Down
6 changes: 3 additions & 3 deletions src/tools/amazon-pricing/amazon-pricing-tool.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import z from 'zod';
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
import { removeKeyFromNestedObject } from '../../utils';
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
import { zodJsRender, zodDeviceType } from '../../zod/zod-types';
import { Tool, ToolRegistrationArgs } from '../tool';

Expand Down Expand Up @@ -51,14 +51,14 @@ export class AmazonPricingTool extends Tool {
openWorldHint: true,
},
},
async (scrapingParams: ScrapingMCPParams) => {
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
const params = {
...scrapingParams,
target: SCRAPER_API_TARGETS.AMAZON_PRICING,
parse: true,
} satisfies ScraperAPIParams;

const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });

const { data: text } = this.transformResponse({ data });

Expand Down
6 changes: 3 additions & 3 deletions src/tools/amazon-product/amazon-product-tool.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import z from 'zod';
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
import { removeKeyFromNestedObject } from '../../utils';
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
import { zodJsRender, zodDeviceType } from '../../zod/zod-types';
import { Tool, ToolRegistrationArgs } from '../tool';

Expand Down Expand Up @@ -45,14 +45,14 @@ export class AmazonProductTool extends Tool {
openWorldHint: true,
},
},
async (scrapingParams: ScrapingMCPParams) => {
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
const params = {
...scrapingParams,
target: SCRAPER_API_TARGETS.AMAZON_PRODUCT,
parse: true,
} satisfies ScraperAPIParams;

const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });

const { data: text } = this.transformResponse({ data });

Expand Down
6 changes: 3 additions & 3 deletions src/tools/amazon-search/amazon-search-tool.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import z from 'zod';
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
import { removeKeyFromNestedObject } from '../../utils';
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
import { zodGeo, zodJsRender, zodDeviceType } from '../../zod/zod-types';
import { Tool, ToolRegistrationArgs } from '../tool';

Expand Down Expand Up @@ -46,14 +46,14 @@ export class AmazonSearchTool extends Tool {
openWorldHint: true,
},
},
async (scrapingParams: ScrapingMCPParams) => {
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
const params = {
...scrapingParams,
target: SCRAPER_API_TARGETS.AMAZON_SEARCH,
parse: true,
} satisfies ScraperAPIParams;

const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });

const { data: text } = this.transformResponse({ data });

Expand Down
5 changes: 3 additions & 2 deletions src/tools/amazon-sellers/amazon-sellers-tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
import { zodJsRender, zodDeviceType } from '../../zod/zod-types';
import { Tool, ToolRegistrationArgs } from '../tool';
import { ProgressExtra } from '../../utils';

const zodDomain = z
.string()
Expand Down Expand Up @@ -38,14 +39,14 @@ export class AmazonSellersTool extends Tool {
openWorldHint: true,
},
},
async (scrapingParams: ScrapingMCPParams) => {
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
const params = {
...scrapingParams,
target: SCRAPER_API_TARGETS.AMAZON_SELLERS,
parse: true,
} satisfies ScraperAPIParams;

const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });

return {
content: [
Expand Down
6 changes: 3 additions & 3 deletions src/tools/bing-search/bing-search-tool.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import z from 'zod';
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
import { removeKeyFromNestedObject } from '../../utils';
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
import { zodGeo, zodLocale, zodJsRender, zodDeviceType } from '../../zod/zod-types';
import { Tool, ToolRegistrationArgs } from '../tool';

Expand Down Expand Up @@ -47,14 +47,14 @@ export class BingSearchTool extends Tool {
openWorldHint: true,
},
},
async (scrapingParams: ScrapingMCPParams) => {
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
const params = {
...scrapingParams,
target: SCRAPER_API_TARGETS.BING_SEARCH,
parse: true,
} satisfies ScraperAPIParams;

const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });

const { data: text } = this.transformResponse({ data });

Expand Down
5 changes: 3 additions & 2 deletions src/tools/chatgpt/chatgpt-tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { ScraperAPIParams, ScrapingMCPParams } from 'types';
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
import { zodGeo } from '../../zod/zod-types';
import { Tool, ToolRegistrationArgs } from '../tool';
import { ProgressExtra } from '../../utils';

export class ChatGPTTool extends Tool {
toolset = TOOLSET.AI;
Expand All @@ -26,14 +27,14 @@ export class ChatGPTTool extends Tool {
openWorldHint: true,
},
},
async (scrapingParams: ScrapingMCPParams) => {
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
const params = {
...scrapingParams,
target: SCRAPER_API_TARGETS.CHATGPT,
parse: true,
} satisfies ScraperAPIParams;

const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });

const { data: text } = this.transformResponse({ data });

Expand Down
Loading
Loading