projectdiscovery
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 16 additions & 0 deletions b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎.github/workflows/build-test.yml‎
Lines changed: 4 additions & 7 deletions b/‎.github/workflows/build-test.yml‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎.github/workflows/functional-test.yml‎
Lines changed: 3 additions & 5 deletions b/‎.github/workflows/functional-test.yml‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎.github/workflows/release-binary.yml‎
Lines changed: 21 additions & 30 deletions b/‎.github/workflows/release-binary.yml‎
Lines changed: 21 additions & 30 deletions
diff --git a/‎.github/workflows/release-test.yml‎
Lines changed: 2 additions & 6 deletions b/‎.github/workflows/release-test.yml‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎.github/workflows/security-crawl-maze-score.yaml‎
Lines changed: 3 additions & 5 deletions b/‎.github/workflows/security-crawl-maze-score.yaml‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 55 additions & 1 deletion b/‎README.md‎
Lines changed: 55 additions & 1 deletion
diff --git a/‎cmd/katana/main.go‎
Lines changed: 6 additions & 0 deletions b/‎cmd/katana/main.go‎
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,16 @@
+## Proposed changes
+
+<!-- Describe the overall picture of your modifications to help maintainers understand the pull request. PRs are required to be associated to their related issue tickets or feature request. -->
+
+### Proof
+
+<!-- How has this been tested? Please describe the tests that you ran to verify your changes. -->
+
+## Checklist
+
+<!-- Put an "x" in the boxes that apply. You can also fill these out after creating the PR. If you're unsure about any of them, don't hesitate to ask. We're here to help! This is simply a reminder of what we are going to look for before merging your code. -->
+
+- [ ] Pull request is created against the [dev](https://github.com/projectdiscovery/katana/tree/dev) branch
+- [ ] All checks passed (lint, unit/integration/regression tests etc.) with my changes
+- [ ] I have added tests that prove my fix is effective or that my feature works
+- [ ] I have added necessary documentation (if appropriate)
@@ -16,7 +16,7 @@ jobs:
     steps:
       - uses: actions/checkout@v6
       - uses: projectdiscovery/actions/setup/go@v1
-      - uses: projectdiscovery/actions/golangci-lint@v1
+      - uses: projectdiscovery/actions/golangci-lint/v2@v1
 
   build:
     name: Test Builds
@@ -25,16 +25,13 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
-        go-version: [1.24.x]
     steps:
-      - name: Set up Go
-        uses: actions/setup-go@v6
-        with:
-          go-version: ${{ matrix.go-version }}
-
       - name: Check out code
         uses: actions/checkout@v6
 
+      - name: Set up Go
+        uses: projectdiscovery/actions/setup/go@v1
+
       - name: Test
         run: go test ./...
         working-directory: .
 
@@ -15,14 +15,12 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
     steps:
-      - name: Set up Go
-        uses: actions/setup-go@v6
-        with:
-          go-version: 1.24.x
-
       - name: Check out code
         uses: actions/checkout@v6
 
+      - name: Set up Go
+        uses: projectdiscovery/actions/setup/go@v1
+
       - name: Functional Tests
         run: |
           chmod +x run.sh
 
@@ -12,73 +12,64 @@ jobs:
     steps:
       - name: "Check out code"
         uses: actions/checkout@v6
-        with: 
+        with:
           fetch-depth: 0
-      
+
       - name: "Set up Go"
-        uses: actions/setup-go@v6
-        with: 
-          go-version: 1.24.x
-          cache: true
-      
+        uses: projectdiscovery/actions/setup/go@v1
+
       - name: "Create release on GitHub"
-        uses: goreleaser/goreleaser-action@v4
-        with: 
+        uses: goreleaser/goreleaser-action@v7
+        with:
           args: "release -f .goreleaser/mac.yml --clean"
           version: latest
           workdir: .
-        env: 
+        env:
           GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
 
   build-windows:
     runs-on: windows-latest-8-cores
     steps:
       - name: "Check out code"
         uses: actions/checkout@v6
-        with: 
+        with:
           fetch-depth: 0
-      
+
       - name: "Set up Go"
-        uses: actions/setup-go@v6
-        with: 
-          go-version: 1.24.x
-          cache: true
-      
+        uses: projectdiscovery/actions/setup/go@v1
+
       - name: "Create release on GitHub"
-        uses: goreleaser/goreleaser-action@v4
-        with: 
+        uses: goreleaser/goreleaser-action@v7
+        with:
           args: "release -f .goreleaser/windows.yml --clean"
           version: latest
           workdir: .
-        env: 
+        env:
           GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
 
   build-linux:
     runs-on: ubuntu-latest-16-cores
     steps:
       - name: "Check out code"
         uses: actions/checkout@v6
-        with: 
+        with:
           fetch-depth: 0
-      
+
       - name: "Set up Go"
-        uses: actions/setup-go@v6
-        with: 
-          go-version: 1.24.x
-          cache: true
+        uses: projectdiscovery/actions/setup/go@v1
 
       # todo: musl compatible?
       - name: Install Dependences
         run: sudo apt install gcc-aarch64-linux-gnu
 
       - name: "Create release on GitHub"
-        uses: goreleaser/goreleaser-action@v4
-        with: 
+        uses: goreleaser/goreleaser-action@v7
+        with:
           args: "release -f .goreleaser/linux.yml --clean"
           version: latest
           workdir: .
-        env: 
+        env:
           GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
           SLACK_WEBHOOK: "${{ secrets.RELEASE_SLACK_WEBHOOK }}"
           DISCORD_WEBHOOK_ID: "${{ secrets.DISCORD_WEBHOOK_ID }}"
-          DISCORD_WEBHOOK_TOKEN: "${{ secrets.DISCORD_WEBHOOK_TOKEN }}"
+          DISCORD_WEBHOOK_TOKEN: "${{ secrets.DISCORD_WEBHOOK_TOKEN }}"
@@ -37,9 +37,7 @@ jobs:
           fetch-depth: 0
 
       - name: Set up Go
-        uses: actions/setup-go@v6
-        with:
-          go-version: 1.24.x
+        uses: projectdiscovery/actions/setup/go@v1
 
       # todo: musl compatible?
       - name: Install Dependences
@@ -62,9 +60,7 @@ jobs:
           fetch-depth: 0
 
       - name: Set up Go
-        uses: actions/setup-go@v6
-        with:
-          go-version: 1.24.x
+        uses: projectdiscovery/actions/setup/go@v1
 
       - name: release test
         uses: projectdiscovery/actions/goreleaser@v1
 
@@ -8,14 +8,12 @@ jobs:
     name: Run Scoring
     runs-on: ubuntu-latest-16-cores
     steps:
-      - name: Set up Go
-        uses: actions/setup-go@v6
-        with:
-          go-version: 1.24.x
-
       - name: Check out code
         uses: actions/checkout@v6
 
+      - name: Set up Go
+        uses: projectdiscovery/actions/setup/go@v1
+
       - name: Build
         run: go build .
         working-directory: cmd/katana/
 
@@ -1,4 +1,4 @@
-FROM golang:1.25.5-alpine AS build-env
+FROM golang:1.25.7-alpine AS build-env
 RUN apk add --no-cache git gcc musl-dev
 WORKDIR /app
 COPY . /app
 
@@ -40,7 +40,7 @@
 
 ## Installation
 
-katana requires Go 1.24+ to install successfully. If you encounter any installation issues, we recommend trying with the latest available version of Go, as the minimum required version may have changed. Run the command below or download a pre-compiled binary from the [release page](https://github.com/projectdiscovery/katana/releases).
+katana requires Go 1.25+ to install successfully. If you encounter any installation issues, we recommend trying with the latest available version of Go, as the minimum required version may have changed. Run the command below or download a pre-compiled binary from the [release page](https://github.com/projectdiscovery/katana/releases).
 
 ```console
 CGO_ENABLED=1 go install github.com/projectdiscovery/katana/cmd/katana@latest
@@ -138,8 +138,11 @@ CONFIGURATION:
    -flc, -field-config string    path to custom field configuration file
    -s, -strategy string          Visit strategy (depth-first, breadth-first) (default "depth-first")
    -iqp, -ignore-query-params    Ignore crawling same path with different query-param values
+   -fsu, -filter-similar         filter crawling of similar looking URLs (e.g., /users/123 and /users/456)
+   -fst, -filter-similar-threshold int  number of distinct values before a path position is treated as parameter (default 10)
    -tlsi, -tls-impersonate       enable experimental client hello (ja3) tls randomization
    -dr, -disable-redirects       disable following redirects (default false)
+   -kb, -knowledge-base          enable knowledge base classification
 
 DEBUG:
    -health-check, -hc        run diagnostic check up
@@ -157,6 +160,8 @@ HEADLESS:
    -noi, -no-incognito               start headless chrome without incognito mode
    -cwu, -chrome-ws-url string       use chrome browser instance launched elsewhere with the debugger listening at this URL
    -xhr, -xhr-extraction             extract xhr request url,method in jsonl output
+   -csp, -captcha-solver-provider string  captcha solver provider (e.g. capsolver)
+   -csk, -captcha-solver-key string       captcha solver provider api key
 
 SCOPE:
    -cs, -crawl-scope string[]       in scope url regex to be followed by crawler
@@ -176,6 +181,7 @@ FILTER:
    -mdc, -match-condition string          match response with dsl based condition
    -fdc, -filter-condition string         filter response with dsl based condition
    -duf, -disable-unique-filter           disable duplicate content filtering
+   -fpt, -filter-page-type string[]      filter response with page type (e.g. error,captcha,parked)
 
 RATE-LIMIT:
    -c, -concurrency int          number of concurrent fetchers to use (default 10)
@@ -330,6 +336,8 @@ HEADLESS:
    -noi, -no-incognito               start headless chrome without incognito mode
    -cwu, -chrome-ws-url string       use chrome browser instance launched elsewhere with the debugger listening at this URL
    -xhr, -xhr-extraction             extract xhr requests
+   -csp, -captcha-solver-provider string  captcha solver provider (e.g. capsolver)
+   -csk, -captcha-solver-key string       captcha solver provider api key
 ```
 
 *`-no-sandbox`*
@@ -361,6 +369,34 @@ katana -u https://tesla.com -headless -system-chrome -headless-options --disable
 ```
 
 
+### Captcha Solving
+
+Katana supports automatic captcha detection and solving during headless crawling. When a captcha page is encountered, katana identifies the captcha provider, solves it via an external service, and continues crawling.
+
+Supported captcha types: **reCAPTCHA v2**, **reCAPTCHA v3**, **reCAPTCHA Enterprise**, **Cloudflare Turnstile**, **hCaptcha**
+
+*`-captcha-solver-provider`*
+----
+
+Option to specify the captcha solver provider. Currently supported: `capsolver`.
+
+*`-captcha-solver-key`*
+----
+
+API key for the captcha solver provider.
+
+```console
+katana -u https://example.com -headless -csp capsolver -csk YOUR_API_KEY
+```
+
+The provider and key can also be set via environment variables:
+
+```console
+export CAPTCHA_SOLVER_PROVIDER=capsolver
+export CAPTCHA_SOLVER_KEY=YOUR_API_KEY
+katana -u https://example.com -headless
+```
+
 ## Scope Control
 
 Crawling can be endless if not scoped, as such katana comes with multiple support to define the crawl scope.
@@ -509,6 +545,21 @@ Automatic form filling is experimental feature.
 katana -u https://tesla.com -aff
 ```
 
+*`-filter-similar`*
+----
+
+Option to filter crawling of similar looking URLs by normalizing variable path segments. This detects IDs, UUIDs, hashes, dates, and other dynamic values, and also learns repeating patterns at runtime. For example, `/users/123` and `/users/456` are treated as the same endpoint.
+
+```
+katana -u https://tesla.com -fsu
+```
+
+The promotion threshold (how many distinct values at a path position before it's treated as a parameter) can be tuned with `-fst`. Lower values are more aggressive (fewer URLs crawled), higher values are more permissive. Default is `10`.
+
+```
+katana -u https://tesla.com -fsu -fst 5
+```
+
 ## Authenticated Crawling
 
 Authenticated crawling involves including custom headers or cookies in HTTP requests to access protected resources. These headers provide authentication or authorization information, allowing you to crawl authenticated content / endpoint. You can specify headers directly in the command line or provide them as a file with katana to perform authenticated crawling.
@@ -564,6 +615,9 @@ CONFIGURATION:
    -fc, -form-config string      path to custom form configuration file
    -flc, -field-config string    path to custom field configuration file
    -s, -strategy string          Visit strategy (depth-first, breadth-first) (default "depth-first")
+   -iqp, -ignore-query-params    Ignore crawling same path with different query-param values
+   -fsu, -filter-similar         filter crawling of similar looking URLs (e.g., /users/123 and /users/456)
+   -fst, -filter-similar-threshold int  number of distinct values before a path position is treated as parameter (default 10)
 ```
 
 ### Connecting to Active Browser Session
 
@@ -166,9 +166,12 @@ pipelines offering both headless and non-headless crawling.`)
 		flagSet.StringVarP(&options.FieldConfig, "field-config", "flc", "", "path to custom field configuration file"),
 		flagSet.StringVarP(&options.Strategy, "strategy", "s", "depth-first", "Visit strategy (depth-first, breadth-first)"),
 		flagSet.BoolVarP(&options.IgnoreQueryParams, "ignore-query-params", "iqp", false, "Ignore crawling same path with different query-param values"),
+		flagSet.BoolVarP(&options.FilterSimilar, "filter-similar", "fsu", false, "filter crawling of similar looking URLs (e.g., /users/123 and /users/456)"),
+		flagSet.IntVarP(&options.FilterSimilarThreshold, "filter-similar-threshold", "fst", 10, "number of distinct values before a path position is treated as parameter (default 10)"),
 		flagSet.BoolVarP(&options.TlsImpersonate, "tls-impersonate", "tlsi", false, "enable experimental client hello (ja3) tls randomization"),
 		flagSet.BoolVarP(&options.DisableRedirects, "disable-redirects", "dr", false, "disable following redirects (default false)"),
 		flagSet.BoolVarP(&options.PathClimb, "path-climb", "pc", false, "enable path climb (auto crawl parent paths)"),
+		flagSet.BoolVarP(&options.KnowledgeBase, "knowledge-base", "kb", false, "enable knowledge base classification"),
 	)
 
 	flagSet.CreateGroup("debug", "Debug",
@@ -191,6 +194,8 @@ pipelines offering both headless and non-headless crawling.`)
 		flagSet.BoolVarP(&options.XhrExtraction, "xhr-extraction", "xhr", false, "extract xhr request url,method in jsonl output"),
 		flagSet.IntVarP(&options.MaxFailureCount, "max-failure-count", "mfc", 10, "maximum number of consecutive action failures before stopping"),
 		flagSet.BoolVarP(&options.EnableDiagnostics, "enable-diagnostics", "ed", false, "enable diagnostics"),
+		flagSet.StringVarEnv(&options.CaptchaSolverProvider, "captcha-solver-provider", "csp", "", "CAPTCHA_SOLVER_PROVIDER", "captcha solver provider (e.g. capsolver)"),
+		flagSet.StringVarEnv(&options.CaptchaSolverAPIKey, "captcha-solver-key", "csk", "", "CAPTCHA_SOLVER_KEY", "captcha solver provider api key"),
 	)
 
 	flagSet.CreateGroup("scope", "Scope",
@@ -213,6 +218,7 @@ pipelines offering both headless and non-headless crawling.`)
 		flagSet.StringVarP(&options.OutputMatchCondition, "match-condition", "mdc", "", "match response with dsl based condition"),
 		flagSet.StringVarP(&options.OutputFilterCondition, "filter-condition", "fdc", "", "filter response with dsl based condition"),
 		flagSet.BoolVarP(&options.DisableUniqueFilter, "disable-unique-filter", "duf", false, "disable duplicate content filtering"),
+		flagSet.StringSliceVarP(&options.FilterPageType, "filter-page-type", "fpt", nil, "filter response with page type (e.g. error,captcha,parked)", goflags.CommaSeparatedStringSliceOptions),
 	)
 
 	flagSet.CreateGroup("ratelimit", "Rate-Limit",
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM golang:1.25.5-alpine AS build-env`
	`1`	`+FROM golang:1.25.7-alpine AS build-env`
`2`	`2`	`RUN apk add --no-cache git gcc musl-dev`
`3`	`3`	`WORKDIR /app`
`4`	`4`	`COPY . /app`