|
1 | 1 | import axios, { AxiosError, AxiosResponse } from 'axios'; |
2 | 2 | import { ScraperApiResponseData } from './types'; |
3 | 3 | import { ScraperAPIParams, ScrapingMCPParams } from 'types'; |
| 4 | +import { ProgressNotifier, ProgressExtra } from '../utils'; |
4 | 5 |
|
5 | 6 | const MAX_RETRIES = Math.max(0, parseInt(process.env.MAX_RETRIES ?? '2', 10) || 2); |
6 | 7 | const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]); |
7 | 8 | const RETRYABLE_NETWORK_CODES = new Set(['ECONNRESET', 'ETIMEDOUT', 'ECONNABORTED', 'ENOTFOUND']); |
| 9 | +const WAITING_INITIAL_DELAY_MS = 3000; |
| 10 | +const WAITING_INTERVAL_MS = 5000; |
8 | 11 |
|
9 | 12 | const isRetryable = (error: AxiosError): boolean => { |
10 | 13 | if (error.response) { |
@@ -60,68 +63,85 @@ export class ScraperApiClient { |
60 | 63 | scrape = async <T = string>({ |
61 | 64 | auth, |
62 | 65 | scrapingParams, |
| 66 | + extra, |
63 | 67 | }: { |
64 | 68 | auth: string; |
65 | 69 | scrapingParams: ScrapingMCPParams; |
| 70 | + extra?: ProgressExtra; |
66 | 71 | }) => { |
67 | | - const transformedParams = this.transformScrapingParams({ scrapingParams }); |
68 | | - |
69 | | - const url = process.env.DECODO_SAPI_HOST || 'https://scraper-api.decodo.com'; |
70 | | - |
71 | | - let lastError: unknown; |
72 | | - |
73 | | - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { |
74 | | - try { |
75 | | - const res = await axios.request<ScraperApiResponseData<T>>({ |
76 | | - url: `${url}/v2/scrape`, |
77 | | - method: 'POST', |
78 | | - headers: { |
79 | | - authorization: `Basic ${auth}`, |
80 | | - 'x-integration': 'mcp', |
81 | | - }, |
82 | | - timeout: 180000, |
83 | | - data: { |
84 | | - ...transformedParams, |
85 | | - }, |
86 | | - }); |
87 | | - |
88 | | - return this.transformResponse({ res }); |
89 | | - } catch (error) { |
90 | | - lastError = error; |
91 | | - |
92 | | - if (attempt < MAX_RETRIES && axios.isAxiosError(error) && isRetryable(error)) { |
93 | | - const delayMs = getRetryDelay(attempt, error); |
94 | | - const reason = error.response |
95 | | - ? `status ${error.response.status}` |
96 | | - : `network error ${error.code}`; |
97 | | - |
98 | | - console.error( |
99 | | - `[scraper-api-client] Retry ${ |
100 | | - attempt + 1 |
101 | | - }/${MAX_RETRIES} after ${reason}, waiting ${Math.round(delayMs)}ms` |
102 | | - ); |
103 | | - |
104 | | - await sleep(delayMs); |
105 | | - continue; |
106 | | - } |
| 72 | + const notifier = new ProgressNotifier(extra); |
107 | 73 |
|
108 | | - break; |
109 | | - } |
110 | | - } |
| 74 | + try { |
| 75 | + await notifier.notify('Submitting request to Decodo API...', 0, 1); |
111 | 76 |
|
112 | | - if (axios.isAxiosError(lastError)) { |
113 | | - const status = lastError.response?.status; |
114 | | - let errorMessage = lastError.response?.data?.message ?? lastError.message; |
| 77 | + notifier.startWaitingNotifications(WAITING_INITIAL_DELAY_MS, WAITING_INTERVAL_MS); |
115 | 78 |
|
116 | | - if (status === 401) { |
117 | | - errorMessage = 'Authentication failed.'; |
118 | | - } |
119 | | - if (status === 429) { |
120 | | - errorMessage = JSON.stringify(lastError.response?.data); |
| 79 | + const transformedParams = this.transformScrapingParams({ scrapingParams }); |
| 80 | + const url = process.env.DECODO_SAPI_HOST || 'https://scraper-api.decodo.com'; |
| 81 | + |
| 82 | + let lastError: unknown; |
| 83 | + |
| 84 | + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { |
| 85 | + try { |
| 86 | + const res = await axios.request<ScraperApiResponseData<T>>({ |
| 87 | + url: `${url}/v2/scrape`, |
| 88 | + method: 'POST', |
| 89 | + headers: { |
| 90 | + authorization: `Basic ${auth}`, |
| 91 | + 'x-integration': 'mcp', |
| 92 | + }, |
| 93 | + timeout: 180000, |
| 94 | + data: { |
| 95 | + ...transformedParams, |
| 96 | + }, |
| 97 | + }); |
| 98 | + |
| 99 | + notifier.stopWaitingNotifications(); |
| 100 | + |
| 101 | + await notifier.notify('Processing response...', 0.9, 1); |
| 102 | + |
| 103 | + return this.transformResponse({ res }); |
| 104 | + } catch (error) { |
| 105 | + lastError = error; |
| 106 | + |
| 107 | + if (attempt < MAX_RETRIES && axios.isAxiosError(error) && isRetryable(error)) { |
| 108 | + const delayMs = getRetryDelay(attempt, error); |
| 109 | + const reason = error.response |
| 110 | + ? `status ${error.response.status}` |
| 111 | + : `network error ${error.code}`; |
| 112 | + |
| 113 | + console.error( |
| 114 | + `[scraper-api-client] Retry ${ |
| 115 | + attempt + 1 |
| 116 | + }/${MAX_RETRIES} after ${reason}, waiting ${Math.round(delayMs)}ms` |
| 117 | + ); |
| 118 | + |
| 119 | + await notifier.notify(`Retrying (${attempt + 1}/${MAX_RETRIES})...`, 0.1, 1); |
| 120 | + |
| 121 | + await sleep(delayMs); |
| 122 | + continue; |
| 123 | + } |
| 124 | + |
| 125 | + break; |
| 126 | + } |
121 | 127 | } |
122 | 128 |
|
123 | | - throw new Error(`Scraper API request failed (${status}): ${errorMessage}`); |
| 129 | + if (axios.isAxiosError(lastError)) { |
| 130 | + const status = lastError.response?.status; |
| 131 | + let errorMessage = lastError.response?.data?.message ?? lastError.message; |
| 132 | + |
| 133 | + if (status === 401) { |
| 134 | + errorMessage = 'Authentication failed.'; |
| 135 | + } |
| 136 | + if (status === 429) { |
| 137 | + errorMessage = JSON.stringify(lastError.response?.data); |
| 138 | + } |
| 139 | + |
| 140 | + throw new Error(`Scraper API request failed (${status}): ${errorMessage}`); |
| 141 | + } |
| 142 | + throw lastError; |
| 143 | + } finally { |
| 144 | + notifier.stopWaitingNotifications(); |
124 | 145 | } |
125 | | - throw lastError; |
126 | 146 | }; |
127 | 147 | } |
0 commit comments