Skip to content

Commit 5f4adc9

Browse files
Add progress tracker
1 parent f44a209 commit 5f4adc9

35 files changed

Lines changed: 682 additions & 127 deletions

File tree

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
"mcpName": "io.github.Decodo/mcp-web-scraper",
1010
"scripts": {
1111
"build": "tsc && chmod 755 build/index.js build/server.js",
12-
"dev": "nodemon --watch src -e ts --exec 'npm run build'",
12+
"dev": "nodemon --watch src -e ts --exec 'tsx src/server.ts'",
13+
"dev:server": "nodemon --watch src -e ts --exec 'npm run build'",
1314
"start": "node build/server.js",
1415
"inspect": "mcp-inspector",
1516
"test": "jest",

src/clients/scraper-api-client.ts

Lines changed: 73 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import axios, { AxiosError, AxiosResponse } from 'axios';
22
import { ScraperApiResponseData } from './types';
33
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
4+
import { ProgressNotifier, ProgressExtra } from '../utils';
45

56
const MAX_RETRIES = Math.max(0, parseInt(process.env.MAX_RETRIES ?? '2', 10) || 2);
67
const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
78
const RETRYABLE_NETWORK_CODES = new Set(['ECONNRESET', 'ETIMEDOUT', 'ECONNABORTED', 'ENOTFOUND']);
9+
const WAITING_INITIAL_DELAY_MS = 3000;
10+
const WAITING_INTERVAL_MS = 5000;
811

912
const isRetryable = (error: AxiosError): boolean => {
1013
if (error.response) {
@@ -60,68 +63,85 @@ export class ScraperApiClient {
6063
scrape = async <T = string>({
6164
auth,
6265
scrapingParams,
66+
extra,
6367
}: {
6468
auth: string;
6569
scrapingParams: ScrapingMCPParams;
70+
extra?: ProgressExtra;
6671
}) => {
67-
const transformedParams = this.transformScrapingParams({ scrapingParams });
68-
69-
const url = process.env.DECODO_SAPI_HOST || 'https://scraper-api.decodo.com';
70-
71-
let lastError: unknown;
72-
73-
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
74-
try {
75-
const res = await axios.request<ScraperApiResponseData<T>>({
76-
url: `${url}/v2/scrape`,
77-
method: 'POST',
78-
headers: {
79-
authorization: `Basic ${auth}`,
80-
'x-integration': 'mcp',
81-
},
82-
timeout: 180000,
83-
data: {
84-
...transformedParams,
85-
},
86-
});
87-
88-
return this.transformResponse({ res });
89-
} catch (error) {
90-
lastError = error;
91-
92-
if (attempt < MAX_RETRIES && axios.isAxiosError(error) && isRetryable(error)) {
93-
const delayMs = getRetryDelay(attempt, error);
94-
const reason = error.response
95-
? `status ${error.response.status}`
96-
: `network error ${error.code}`;
97-
98-
console.error(
99-
`[scraper-api-client] Retry ${
100-
attempt + 1
101-
}/${MAX_RETRIES} after ${reason}, waiting ${Math.round(delayMs)}ms`
102-
);
103-
104-
await sleep(delayMs);
105-
continue;
106-
}
72+
const notifier = new ProgressNotifier(extra);
10773

108-
break;
109-
}
110-
}
74+
try {
75+
await notifier.notify('Submitting request to Decodo API...', 0, 1);
11176

112-
if (axios.isAxiosError(lastError)) {
113-
const status = lastError.response?.status;
114-
let errorMessage = lastError.response?.data?.message ?? lastError.message;
77+
notifier.startWaitingNotifications(WAITING_INITIAL_DELAY_MS, WAITING_INTERVAL_MS);
11578

116-
if (status === 401) {
117-
errorMessage = 'Authentication failed.';
118-
}
119-
if (status === 429) {
120-
errorMessage = JSON.stringify(lastError.response?.data);
79+
const transformedParams = this.transformScrapingParams({ scrapingParams });
80+
const url = process.env.DECODO_SAPI_HOST || 'https://scraper-api.decodo.com';
81+
82+
let lastError: unknown;
83+
84+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
85+
try {
86+
const res = await axios.request<ScraperApiResponseData<T>>({
87+
url: `${url}/v2/scrape`,
88+
method: 'POST',
89+
headers: {
90+
authorization: `Basic ${auth}`,
91+
'x-integration': 'mcp',
92+
},
93+
timeout: 180000,
94+
data: {
95+
...transformedParams,
96+
},
97+
});
98+
99+
notifier.stopWaitingNotifications();
100+
101+
await notifier.notify('Processing response...', 0.9, 1);
102+
103+
return this.transformResponse({ res });
104+
} catch (error) {
105+
lastError = error;
106+
107+
if (attempt < MAX_RETRIES && axios.isAxiosError(error) && isRetryable(error)) {
108+
const delayMs = getRetryDelay(attempt, error);
109+
const reason = error.response
110+
? `status ${error.response.status}`
111+
: `network error ${error.code}`;
112+
113+
console.error(
114+
`[scraper-api-client] Retry ${
115+
attempt + 1
116+
}/${MAX_RETRIES} after ${reason}, waiting ${Math.round(delayMs)}ms`
117+
);
118+
119+
await notifier.notify(`Retrying (${attempt + 1}/${MAX_RETRIES})...`, 0.1, 1);
120+
121+
await sleep(delayMs);
122+
continue;
123+
}
124+
125+
break;
126+
}
121127
}
122128

123-
throw new Error(`Scraper API request failed (${status}): ${errorMessage}`);
129+
if (axios.isAxiosError(lastError)) {
130+
const status = lastError.response?.status;
131+
let errorMessage = lastError.response?.data?.message ?? lastError.message;
132+
133+
if (status === 401) {
134+
errorMessage = 'Authentication failed.';
135+
}
136+
if (status === 429) {
137+
errorMessage = JSON.stringify(lastError.response?.data);
138+
}
139+
140+
throw new Error(`Scraper API request failed (${status}): ${errorMessage}`);
141+
}
142+
throw lastError;
143+
} finally {
144+
notifier.stopWaitingNotifications();
124145
}
125-
throw lastError;
126146
};
127147
}

src/tools/amazon-bestsellers/amazon-bestsellers-tool.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
44
import { zodDeviceType } from '../../zod/zod-types';
55
import { Tool, ToolRegistrationArgs } from '../tool';
6+
import { ProgressExtra } from '../../utils';
67

78
const zodDomain = z
89
.string()
@@ -37,14 +38,14 @@ export class AmazonBestsellersTool extends Tool {
3738
openWorldHint: true,
3839
},
3940
},
40-
async (scrapingParams: ScrapingMCPParams) => {
41+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
4142
const params = {
4243
...scrapingParams,
4344
target: SCRAPER_API_TARGETS.AMAZON_BESTSELLERS,
4445
parse: true,
4546
} satisfies ScraperAPIParams;
4647

47-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
48+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
4849

4950
return {
5051
content: [

src/tools/amazon-pricing/amazon-pricing-tool.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import z from 'zod';
22
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
4-
import { removeKeyFromNestedObject } from '../../utils';
4+
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
55
import { zodJsRender, zodDeviceType } from '../../zod/zod-types';
66
import { Tool, ToolRegistrationArgs } from '../tool';
77

@@ -51,14 +51,14 @@ export class AmazonPricingTool extends Tool {
5151
openWorldHint: true,
5252
},
5353
},
54-
async (scrapingParams: ScrapingMCPParams) => {
54+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
5555
const params = {
5656
...scrapingParams,
5757
target: SCRAPER_API_TARGETS.AMAZON_PRICING,
5858
parse: true,
5959
} satisfies ScraperAPIParams;
6060

61-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
61+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
6262

6363
const { data: text } = this.transformResponse({ data });
6464

src/tools/amazon-product/amazon-product-tool.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import z from 'zod';
22
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
4-
import { removeKeyFromNestedObject } from '../../utils';
4+
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
55
import { zodJsRender, zodDeviceType } from '../../zod/zod-types';
66
import { Tool, ToolRegistrationArgs } from '../tool';
77

@@ -45,14 +45,14 @@ export class AmazonProductTool extends Tool {
4545
openWorldHint: true,
4646
},
4747
},
48-
async (scrapingParams: ScrapingMCPParams) => {
48+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
4949
const params = {
5050
...scrapingParams,
5151
target: SCRAPER_API_TARGETS.AMAZON_PRODUCT,
5252
parse: true,
5353
} satisfies ScraperAPIParams;
5454

55-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
55+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
5656

5757
const { data: text } = this.transformResponse({ data });
5858

src/tools/amazon-search/amazon-search-tool.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import z from 'zod';
22
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
4-
import { removeKeyFromNestedObject } from '../../utils';
4+
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
55
import { zodGeo, zodJsRender, zodDeviceType } from '../../zod/zod-types';
66
import { Tool, ToolRegistrationArgs } from '../tool';
77

@@ -46,14 +46,14 @@ export class AmazonSearchTool extends Tool {
4646
openWorldHint: true,
4747
},
4848
},
49-
async (scrapingParams: ScrapingMCPParams) => {
49+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
5050
const params = {
5151
...scrapingParams,
5252
target: SCRAPER_API_TARGETS.AMAZON_SEARCH,
5353
parse: true,
5454
} satisfies ScraperAPIParams;
5555

56-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
56+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
5757

5858
const { data: text } = this.transformResponse({ data });
5959

src/tools/amazon-sellers/amazon-sellers-tool.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
44
import { zodJsRender, zodDeviceType } from '../../zod/zod-types';
55
import { Tool, ToolRegistrationArgs } from '../tool';
6+
import { ProgressExtra } from '../../utils';
67

78
const zodDomain = z
89
.string()
@@ -38,14 +39,14 @@ export class AmazonSellersTool extends Tool {
3839
openWorldHint: true,
3940
},
4041
},
41-
async (scrapingParams: ScrapingMCPParams) => {
42+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
4243
const params = {
4344
...scrapingParams,
4445
target: SCRAPER_API_TARGETS.AMAZON_SELLERS,
4546
parse: true,
4647
} satisfies ScraperAPIParams;
4748

48-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
49+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
4950

5051
return {
5152
content: [

src/tools/bing-search/bing-search-tool.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import z from 'zod';
22
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
4-
import { removeKeyFromNestedObject } from '../../utils';
4+
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
55
import { zodGeo, zodLocale, zodJsRender, zodDeviceType } from '../../zod/zod-types';
66
import { Tool, ToolRegistrationArgs } from '../tool';
77

@@ -47,14 +47,14 @@ export class BingSearchTool extends Tool {
4747
openWorldHint: true,
4848
},
4949
},
50-
async (scrapingParams: ScrapingMCPParams) => {
50+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
5151
const params = {
5252
...scrapingParams,
5353
target: SCRAPER_API_TARGETS.BING_SEARCH,
5454
parse: true,
5555
} satisfies ScraperAPIParams;
5656

57-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
57+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
5858

5959
const { data: text } = this.transformResponse({ data });
6060

src/tools/chatgpt/chatgpt-tool.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
44
import { zodGeo } from '../../zod/zod-types';
55
import { Tool, ToolRegistrationArgs } from '../tool';
6+
import { ProgressExtra } from '../../utils';
67

78
export class ChatGPTTool extends Tool {
89
toolset = TOOLSET.AI;
@@ -26,14 +27,14 @@ export class ChatGPTTool extends Tool {
2627
openWorldHint: true,
2728
},
2829
},
29-
async (scrapingParams: ScrapingMCPParams) => {
30+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
3031
const params = {
3132
...scrapingParams,
3233
target: SCRAPER_API_TARGETS.CHATGPT,
3334
parse: true,
3435
} satisfies ScraperAPIParams;
3536

36-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
37+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
3738

3839
const { data: text } = this.transformResponse({ data });
3940

src/tools/google-ads/google-ads-tool.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import z from 'zod';
22
import { ScraperAPIParams, ScrapingMCPParams } from 'types';
33
import { SCRAPER_API_TARGETS, TOOLSET } from '../../constants';
4-
import { removeKeyFromNestedObject } from '../../utils';
4+
import { removeKeyFromNestedObject, ProgressExtra } from '../../utils';
55
import { zodGeo, zodLocale, zodJsRender, zodDeviceType } from '../../zod/zod-types';
66
import { Tool, ToolRegistrationArgs } from '../tool';
77

@@ -41,14 +41,14 @@ export class GoogleAdsTool extends Tool {
4141
openWorldHint: true,
4242
},
4343
},
44-
async (scrapingParams: ScrapingMCPParams) => {
44+
async (scrapingParams: ScrapingMCPParams, extra: ProgressExtra) => {
4545
const params = {
4646
...scrapingParams,
4747
target: SCRAPER_API_TARGETS.GOOGLE_ADS,
4848
parse: true,
4949
} satisfies ScraperAPIParams;
5050

51-
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params });
51+
const { data } = await sapiClient.scrape<object>({ auth, scrapingParams: params, extra });
5252

5353
const { data: text } = this.transformResponse({ data });
5454

0 commit comments

Comments
 (0)