Skip to content

Commit 3d6e1d1

Browse files
Merge pull request #1566 from projectdiscovery/dev
release katana v1.5.0
2 parents a52d1da + 2f9ecf4 commit 3d6e1d1

45 files changed

Lines changed: 2576 additions & 137 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
## Proposed changes
2+
3+
<!-- Describe the overall picture of your modifications to help maintainers understand the pull request. PRs are required to be associated to their related issue tickets or feature request. -->
4+
5+
### Proof
6+
7+
<!-- How has this been tested? Please describe the tests that you ran to verify your changes. -->
8+
9+
## Checklist
10+
11+
<!-- Put an "x" in the boxes that apply. You can also fill these out after creating the PR. If you're unsure about any of them, don't hesitate to ask. We're here to help! This is simply a reminder of what we are going to look for before merging your code. -->
12+
13+
- [ ] Pull request is created against the [dev](https://github.com/projectdiscovery/katana/tree/dev) branch
14+
- [ ] All checks passed (lint, unit/integration/regression tests etc.) with my changes
15+
- [ ] I have added tests that prove my fix is effective or that my feature works
16+
- [ ] I have added necessary documentation (if appropriate)

.github/workflows/build-test.yml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v6
1818
- uses: projectdiscovery/actions/setup/go@v1
19-
- uses: projectdiscovery/actions/golangci-lint@v1
19+
- uses: projectdiscovery/actions/golangci-lint/v2@v1
2020

2121
build:
2222
name: Test Builds
@@ -25,16 +25,13 @@ jobs:
2525
strategy:
2626
matrix:
2727
os: [ubuntu-latest, windows-latest, macOS-latest]
28-
go-version: [1.24.x]
2928
steps:
30-
- name: Set up Go
31-
uses: actions/setup-go@v6
32-
with:
33-
go-version: ${{ matrix.go-version }}
34-
3529
- name: Check out code
3630
uses: actions/checkout@v6
3731

32+
- name: Set up Go
33+
uses: projectdiscovery/actions/setup/go@v1
34+
3835
- name: Test
3936
run: go test ./...
4037
working-directory: .

.github/workflows/functional-test.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,12 @@ jobs:
1515
matrix:
1616
os: [ubuntu-latest, windows-latest, macOS-latest]
1717
steps:
18-
- name: Set up Go
19-
uses: actions/setup-go@v6
20-
with:
21-
go-version: 1.24.x
22-
2318
- name: Check out code
2419
uses: actions/checkout@v6
2520

21+
- name: Set up Go
22+
uses: projectdiscovery/actions/setup/go@v1
23+
2624
- name: Functional Tests
2725
run: |
2826
chmod +x run.sh

.github/workflows/release-binary.yml

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,73 +12,64 @@ jobs:
1212
steps:
1313
- name: "Check out code"
1414
uses: actions/checkout@v6
15-
with:
15+
with:
1616
fetch-depth: 0
17-
17+
1818
- name: "Set up Go"
19-
uses: actions/setup-go@v6
20-
with:
21-
go-version: 1.24.x
22-
cache: true
23-
19+
uses: projectdiscovery/actions/setup/go@v1
20+
2421
- name: "Create release on GitHub"
25-
uses: goreleaser/goreleaser-action@v4
26-
with:
22+
uses: goreleaser/goreleaser-action@v7
23+
with:
2724
args: "release -f .goreleaser/mac.yml --clean"
2825
version: latest
2926
workdir: .
30-
env:
27+
env:
3128
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
3229

3330
build-windows:
3431
runs-on: windows-latest-8-cores
3532
steps:
3633
- name: "Check out code"
3734
uses: actions/checkout@v6
38-
with:
35+
with:
3936
fetch-depth: 0
40-
37+
4138
- name: "Set up Go"
42-
uses: actions/setup-go@v6
43-
with:
44-
go-version: 1.24.x
45-
cache: true
46-
39+
uses: projectdiscovery/actions/setup/go@v1
40+
4741
- name: "Create release on GitHub"
48-
uses: goreleaser/goreleaser-action@v4
49-
with:
42+
uses: goreleaser/goreleaser-action@v7
43+
with:
5044
args: "release -f .goreleaser/windows.yml --clean"
5145
version: latest
5246
workdir: .
53-
env:
47+
env:
5448
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
5549

5650
build-linux:
5751
runs-on: ubuntu-latest-16-cores
5852
steps:
5953
- name: "Check out code"
6054
uses: actions/checkout@v6
61-
with:
55+
with:
6256
fetch-depth: 0
63-
57+
6458
- name: "Set up Go"
65-
uses: actions/setup-go@v6
66-
with:
67-
go-version: 1.24.x
68-
cache: true
59+
uses: projectdiscovery/actions/setup/go@v1
6960

7061
# todo: musl compatible?
7162
- name: Install Dependences
7263
run: sudo apt install gcc-aarch64-linux-gnu
7364

7465
- name: "Create release on GitHub"
75-
uses: goreleaser/goreleaser-action@v4
76-
with:
66+
uses: goreleaser/goreleaser-action@v7
67+
with:
7768
args: "release -f .goreleaser/linux.yml --clean"
7869
version: latest
7970
workdir: .
80-
env:
71+
env:
8172
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
8273
SLACK_WEBHOOK: "${{ secrets.RELEASE_SLACK_WEBHOOK }}"
8374
DISCORD_WEBHOOK_ID: "${{ secrets.DISCORD_WEBHOOK_ID }}"
84-
DISCORD_WEBHOOK_TOKEN: "${{ secrets.DISCORD_WEBHOOK_TOKEN }}"
75+
DISCORD_WEBHOOK_TOKEN: "${{ secrets.DISCORD_WEBHOOK_TOKEN }}"

.github/workflows/release-test.yml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,7 @@ jobs:
3737
fetch-depth: 0
3838

3939
- name: Set up Go
40-
uses: actions/setup-go@v6
41-
with:
42-
go-version: 1.24.x
40+
uses: projectdiscovery/actions/setup/go@v1
4341

4442
# todo: musl compatible?
4543
- name: Install Dependences
@@ -62,9 +60,7 @@ jobs:
6260
fetch-depth: 0
6361

6462
- name: Set up Go
65-
uses: actions/setup-go@v6
66-
with:
67-
go-version: 1.24.x
63+
uses: projectdiscovery/actions/setup/go@v1
6864

6965
- name: release test
7066
uses: projectdiscovery/actions/goreleaser@v1

.github/workflows/security-crawl-maze-score.yaml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,12 @@ jobs:
88
name: Run Scoring
99
runs-on: ubuntu-latest-16-cores
1010
steps:
11-
- name: Set up Go
12-
uses: actions/setup-go@v6
13-
with:
14-
go-version: 1.24.x
15-
1611
- name: Check out code
1712
uses: actions/checkout@v6
1813

14+
- name: Set up Go
15+
uses: projectdiscovery/actions/setup/go@v1
16+
1917
- name: Build
2018
run: go build .
2119
working-directory: cmd/katana/

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.25.5-alpine AS build-env
1+
FROM golang:1.25.7-alpine AS build-env
22
RUN apk add --no-cache git gcc musl-dev
33
WORKDIR /app
44
COPY . /app

README.md

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141
## Installation
4242

43-
katana requires Go 1.24+ to install successfully. If you encounter any installation issues, we recommend trying with the latest available version of Go, as the minimum required version may have changed. Run the command below or download a pre-compiled binary from the [release page](https://github.com/projectdiscovery/katana/releases).
43+
katana requires Go 1.25+ to install successfully. If you encounter any installation issues, we recommend trying with the latest available version of Go, as the minimum required version may have changed. Run the command below or download a pre-compiled binary from the [release page](https://github.com/projectdiscovery/katana/releases).
4444

4545
```console
4646
CGO_ENABLED=1 go install github.com/projectdiscovery/katana/cmd/katana@latest
@@ -138,8 +138,11 @@ CONFIGURATION:
138138
-flc, -field-config string path to custom field configuration file
139139
-s, -strategy string Visit strategy (depth-first, breadth-first) (default "depth-first")
140140
-iqp, -ignore-query-params Ignore crawling same path with different query-param values
141+
-fsu, -filter-similar filter crawling of similar looking URLs (e.g., /users/123 and /users/456)
142+
-fst, -filter-similar-threshold int number of distinct values before a path position is treated as parameter (default 10)
141143
-tlsi, -tls-impersonate enable experimental client hello (ja3) tls randomization
142144
-dr, -disable-redirects disable following redirects (default false)
145+
-kb, -knowledge-base enable knowledge base classification
143146

144147
DEBUG:
145148
-health-check, -hc run diagnostic check up
@@ -157,6 +160,8 @@ HEADLESS:
157160
-noi, -no-incognito start headless chrome without incognito mode
158161
-cwu, -chrome-ws-url string use chrome browser instance launched elsewhere with the debugger listening at this URL
159162
-xhr, -xhr-extraction extract xhr request url,method in jsonl output
163+
-csp, -captcha-solver-provider string captcha solver provider (e.g. capsolver)
164+
-csk, -captcha-solver-key string captcha solver provider api key
160165

161166
SCOPE:
162167
-cs, -crawl-scope string[] in scope url regex to be followed by crawler
@@ -176,6 +181,7 @@ FILTER:
176181
-mdc, -match-condition string match response with dsl based condition
177182
-fdc, -filter-condition string filter response with dsl based condition
178183
-duf, -disable-unique-filter disable duplicate content filtering
184+
-fpt, -filter-page-type string[] filter response with page type (e.g. error,captcha,parked)
179185

180186
RATE-LIMIT:
181187
-c, -concurrency int number of concurrent fetchers to use (default 10)
@@ -330,6 +336,8 @@ HEADLESS:
330336
-noi, -no-incognito start headless chrome without incognito mode
331337
-cwu, -chrome-ws-url string use chrome browser instance launched elsewhere with the debugger listening at this URL
332338
-xhr, -xhr-extraction extract xhr requests
339+
-csp, -captcha-solver-provider string captcha solver provider (e.g. capsolver)
340+
-csk, -captcha-solver-key string captcha solver provider api key
333341
```
334342

335343
*`-no-sandbox`*
@@ -361,6 +369,34 @@ katana -u https://tesla.com -headless -system-chrome -headless-options --disable
361369
```
362370

363371

372+
### Captcha Solving
373+
374+
Katana supports automatic captcha detection and solving during headless crawling. When a captcha page is encountered, katana identifies the captcha provider, solves it via an external service, and continues crawling.
375+
376+
Supported captcha types: **reCAPTCHA v2**, **reCAPTCHA v3**, **reCAPTCHA Enterprise**, **Cloudflare Turnstile**, **hCaptcha**
377+
378+
*`-captcha-solver-provider`*
379+
----
380+
381+
Option to specify the captcha solver provider. Currently supported: `capsolver`.
382+
383+
*`-captcha-solver-key`*
384+
----
385+
386+
API key for the captcha solver provider.
387+
388+
```console
389+
katana -u https://example.com -headless -csp capsolver -csk YOUR_API_KEY
390+
```
391+
392+
The provider and key can also be set via environment variables:
393+
394+
```console
395+
export CAPTCHA_SOLVER_PROVIDER=capsolver
396+
export CAPTCHA_SOLVER_KEY=YOUR_API_KEY
397+
katana -u https://example.com -headless
398+
```
399+
364400
## Scope Control
365401

366402
Crawling can be endless if not scoped, as such katana comes with multiple support to define the crawl scope.
@@ -509,6 +545,21 @@ Automatic form filling is experimental feature.
509545
katana -u https://tesla.com -aff
510546
```
511547

548+
*`-filter-similar`*
549+
----
550+
551+
Option to filter crawling of similar looking URLs by normalizing variable path segments. This detects IDs, UUIDs, hashes, dates, and other dynamic values, and also learns repeating patterns at runtime. For example, `/users/123` and `/users/456` are treated as the same endpoint.
552+
553+
```
554+
katana -u https://tesla.com -fsu
555+
```
556+
557+
The promotion threshold (how many distinct values at a path position before it's treated as a parameter) can be tuned with `-fst`. Lower values are more aggressive (fewer URLs crawled), higher values are more permissive. Default is `10`.
558+
559+
```
560+
katana -u https://tesla.com -fsu -fst 5
561+
```
562+
512563
## Authenticated Crawling
513564

514565
Authenticated crawling involves including custom headers or cookies in HTTP requests to access protected resources. These headers provide authentication or authorization information, allowing you to crawl authenticated content / endpoint. You can specify headers directly in the command line or provide them as a file with katana to perform authenticated crawling.
@@ -564,6 +615,9 @@ CONFIGURATION:
564615
-fc, -form-config string path to custom form configuration file
565616
-flc, -field-config string path to custom field configuration file
566617
-s, -strategy string Visit strategy (depth-first, breadth-first) (default "depth-first")
618+
-iqp, -ignore-query-params Ignore crawling same path with different query-param values
619+
-fsu, -filter-similar filter crawling of similar looking URLs (e.g., /users/123 and /users/456)
620+
-fst, -filter-similar-threshold int number of distinct values before a path position is treated as parameter (default 10)
567621
```
568622

569623
### Connecting to Active Browser Session

cmd/katana/main.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,12 @@ pipelines offering both headless and non-headless crawling.`)
166166
flagSet.StringVarP(&options.FieldConfig, "field-config", "flc", "", "path to custom field configuration file"),
167167
flagSet.StringVarP(&options.Strategy, "strategy", "s", "depth-first", "Visit strategy (depth-first, breadth-first)"),
168168
flagSet.BoolVarP(&options.IgnoreQueryParams, "ignore-query-params", "iqp", false, "Ignore crawling same path with different query-param values"),
169+
flagSet.BoolVarP(&options.FilterSimilar, "filter-similar", "fsu", false, "filter crawling of similar looking URLs (e.g., /users/123 and /users/456)"),
170+
flagSet.IntVarP(&options.FilterSimilarThreshold, "filter-similar-threshold", "fst", 10, "number of distinct values before a path position is treated as parameter (default 10)"),
169171
flagSet.BoolVarP(&options.TlsImpersonate, "tls-impersonate", "tlsi", false, "enable experimental client hello (ja3) tls randomization"),
170172
flagSet.BoolVarP(&options.DisableRedirects, "disable-redirects", "dr", false, "disable following redirects (default false)"),
171173
flagSet.BoolVarP(&options.PathClimb, "path-climb", "pc", false, "enable path climb (auto crawl parent paths)"),
174+
flagSet.BoolVarP(&options.KnowledgeBase, "knowledge-base", "kb", false, "enable knowledge base classification"),
172175
)
173176

174177
flagSet.CreateGroup("debug", "Debug",
@@ -191,6 +194,8 @@ pipelines offering both headless and non-headless crawling.`)
191194
flagSet.BoolVarP(&options.XhrExtraction, "xhr-extraction", "xhr", false, "extract xhr request url,method in jsonl output"),
192195
flagSet.IntVarP(&options.MaxFailureCount, "max-failure-count", "mfc", 10, "maximum number of consecutive action failures before stopping"),
193196
flagSet.BoolVarP(&options.EnableDiagnostics, "enable-diagnostics", "ed", false, "enable diagnostics"),
197+
flagSet.StringVarEnv(&options.CaptchaSolverProvider, "captcha-solver-provider", "csp", "", "CAPTCHA_SOLVER_PROVIDER", "captcha solver provider (e.g. capsolver)"),
198+
flagSet.StringVarEnv(&options.CaptchaSolverAPIKey, "captcha-solver-key", "csk", "", "CAPTCHA_SOLVER_KEY", "captcha solver provider api key"),
194199
)
195200

196201
flagSet.CreateGroup("scope", "Scope",
@@ -213,6 +218,7 @@ pipelines offering both headless and non-headless crawling.`)
213218
flagSet.StringVarP(&options.OutputMatchCondition, "match-condition", "mdc", "", "match response with dsl based condition"),
214219
flagSet.StringVarP(&options.OutputFilterCondition, "filter-condition", "fdc", "", "filter response with dsl based condition"),
215220
flagSet.BoolVarP(&options.DisableUniqueFilter, "disable-unique-filter", "duf", false, "disable duplicate content filtering"),
221+
flagSet.StringSliceVarP(&options.FilterPageType, "filter-page-type", "fpt", nil, "filter response with page type (e.g. error,captcha,parked)", goflags.CommaSeparatedStringSliceOptions),
216222
)
217223

218224
flagSet.CreateGroup("ratelimit", "Rate-Limit",

0 commit comments

Comments
 (0)