Skip to content

Commit 766c90c

Browse files
committed
Initial commit
0 parents  commit 766c90c

7 files changed

Lines changed: 710 additions & 0 deletions

File tree

.github/workflows/ci.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
pull_request:
6+
7+
jobs:
8+
workflow:
9+
runs-on: ubuntu-latest
10+
11+
steps:
12+
- name: Checkout
13+
uses: actions/checkout@v4
14+
15+
- name: Set up Python
16+
uses: actions/setup-python@v5
17+
with:
18+
python-version: '3.12'
19+
20+
- name: Validate converter syntax
21+
run: python -m compileall container/sqlsmith_to_slt.py
22+
23+
- name: Build generator image
24+
run: docker build -t sqlsmith-slt .
25+
26+
- name: Generate sample corpus
27+
run: |
28+
mkdir -p out
29+
docker run --rm -v "${PWD}/out":/out \
30+
-e SQLSMITH_PASS_TARGET=10 \
31+
-e SQLSMITH_MAX_ERRORS=2 \
32+
-e SQLSMITH_BATCH_QUERIES=50 \
33+
sqlsmith-slt
34+
35+
- name: Verify corpus mix
36+
run: |
37+
python - <<'PY'
38+
from pathlib import Path
39+
import sys
40+
41+
out_dir = Path('out')
42+
cases = sorted(out_dir.glob('case_*.test'))
43+
if not cases:
44+
sys.exit('no SQLLogicTest cases generated')
45+
46+
passes = errors = 0
47+
for path in cases:
48+
head = path.read_text().splitlines()[0].strip().lower()
49+
if head.startswith('statement error'):
50+
errors += 1
51+
elif head.startswith('statement ok') or head.startswith('query '):
52+
passes += 1
53+
54+
if passes < 10:
55+
sys.exit(f'expected at least 10 passing cases, got {passes}')
56+
if errors > 2:
57+
sys.exit(f'expected <= 2 error cases, got {errors}')
58+
59+
print(f'cases={len(cases)} passes={passes} errors={errors}')
60+
PY

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
out/*
2+
*.pyc
3+
__pycache__/
4+
venv/
5+
.env
6+
.DS_Store

Dockerfile

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
ARG DEBIAN_FRONTEND=noninteractive
2+
3+
FROM debian:bookworm AS builder
4+
5+
ARG SQLSMITH_REF=master
6+
7+
RUN apt-get update \
8+
&& apt-get install -y --no-install-recommends \
9+
build-essential \
10+
ca-certificates \
11+
cmake \
12+
curl \
13+
flex \
14+
git \
15+
libfmt-dev \
16+
libpqxx-dev \
17+
libpq-dev \
18+
libsqlite3-dev \
19+
libmariadb-dev \
20+
libreadline-dev \
21+
libncurses-dev \
22+
pkg-config \
23+
python3 \
24+
python3-pip \
25+
&& rm -rf /var/lib/apt/lists/*
26+
27+
RUN git clone --depth 1 --branch "${SQLSMITH_REF}" https://github.com/anse1/sqlsmith /opt/sqlsmith
28+
29+
RUN cmake -S /opt/sqlsmith -B /opt/sqlsmith/build -DCMAKE_BUILD_TYPE=Release \
30+
&& cmake --build /opt/sqlsmith/build --target sqlsmith -- -j"$(nproc)" \
31+
&& cmake --install /opt/sqlsmith/build --prefix /opt/sqlsmith/install
32+
33+
# Build sqllogictest runner to validate generated corpora inside the image.
34+
RUN mkdir -p /tmp/sqllogictest \
35+
&& curl -fsSL "https://sqlite.org/sqllogictest/raw/src/sqllogictest.c?ci=trunk" -o /tmp/sqllogictest/sqllogictest.c \
36+
&& curl -fsSL "https://sqlite.org/sqllogictest/raw/src/sqllogictest.h?ci=trunk" -o /tmp/sqllogictest/sqllogictest.h \
37+
&& curl -fsSL "https://sqlite.org/sqllogictest/raw/src/slt_sqlite.c?ci=trunk" -o /tmp/sqllogictest/slt_sqlite.c \
38+
&& curl -fsSL "https://sqlite.org/sqllogictest/raw/src/slt_odbc3.c?ci=trunk" -o /tmp/sqllogictest/slt_odbc3.c \
39+
&& curl -fsSL "https://sqlite.org/sqllogictest/raw/src/md5.c?ci=trunk" -o /tmp/sqllogictest/md5.c \
40+
&& python3 -c "from pathlib import Path; path = Path('/tmp/sqllogictest/slt_sqlite.c'); text = path.read_text(); needle = ' if( zConnectStr && zConnectStr[0] ){\\n#ifndef _WIN32\\n unlink(zConnectStr);\\n#else\\n _unlink(zConnectStr);\\n#endif\\n }\\n'; replacement = ' if( zConnectStr && zConnectStr[0] ){\\n /* Preserve existing database contents for validation; no unlink */\\n }\\n'; assert needle in text, 'expected sqlite connect reset block not found'; path.write_text(text.replace(needle, replacement))" \
41+
&& gcc -O2 -DOMIT_ODBC -I/tmp/sqllogictest \
42+
/tmp/sqllogictest/sqllogictest.c \
43+
/tmp/sqllogictest/md5.c \
44+
-lsqlite3 -ldl -lpthread \
45+
-o /usr/local/bin/sqllogictest \
46+
&& rm -rf /tmp/sqllogictest
47+
48+
FROM debian:bookworm-slim AS runtime
49+
50+
ARG DEBIAN_FRONTEND=noninteractive
51+
52+
RUN apt-get update \
53+
&& apt-get install -y --no-install-recommends \
54+
ca-certificates \
55+
libfmt9 \
56+
libmariadb3 \
57+
libncurses6 \
58+
libpq5 \
59+
libpqxx-dev \
60+
libsqlite3-0 \
61+
sqlite3 \
62+
python3 \
63+
&& rm -rf /var/lib/apt/lists/*
64+
65+
COPY --from=builder /opt/sqlsmith/build/sqlsmith /usr/local/bin/sqlsmith
66+
COPY --from=builder /usr/local/bin/sqllogictest /usr/local/bin/sqllogictest
67+
68+
# Layer all container scripts under /usr/local/bin so nothing runs on the host.
69+
COPY container/entrypoint.sh /usr/local/bin/generate_slt.sh
70+
COPY container/sqlsmith_to_slt.py /usr/local/bin/sqlsmith_to_slt.py
71+
COPY container/init.sql /usr/local/share/sqlsmith/init.sql
72+
73+
RUN chmod +x /usr/local/bin/generate_slt.sh
74+
75+
ENV OUTPUT_DIR=/out \
76+
TARGET_ENGINE=sqlite \
77+
ENGINE_URI=/tmp/sqlsmith.db \
78+
SQLSMITH_MAX_QUERIES=1000 \
79+
SQLSMITH_SEED=1 \
80+
SQLITE_URI=/tmp/sqlsmith.db \
81+
OUTPUT_MODE=slt \
82+
SQLITE_TIMEOUT=1.0 \
83+
SQLLOGICTEST_ROWSORT=rowsort \
84+
SEED_FILENAME=seeds.sql \
85+
VALIDATE_SLT=1 \
86+
SQLITE_INIT_SQL=/usr/local/share/sqlsmith/init.sql
87+
88+
VOLUME ["/out"]
89+
90+
ENTRYPOINT ["/usr/local/bin/generate_slt.sh"]

README.md

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# SQLsmith SQLLogicTest Corpus Generator
2+
3+
This repo builds a container image that compiles [SQLsmith](https://github.com/anse1/sqlsmith) and turns its generated queries into [SQLLogicTest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki) cases. Everything runs inside Docker; the host machine only needs the Docker CLI.
4+
5+
## Build the image
6+
7+
```bash
8+
# from the repository root
9+
docker build -t sqlsmith-slt .
10+
```
11+
12+
The build compiles SQLsmith from the `master` branch and installs the helper scripts under `/usr/local/bin` inside the image.
13+
14+
## Generate a corpus
15+
16+
```bash
17+
mkdir -p out
18+
19+
docker run --rm \
20+
-v "$(pwd)/out":/out \
21+
sqlsmith-slt
22+
```
23+
24+
The container writes results into `/out` (mapped to `./out` on the host):
25+
26+
- `seeds.sql` – raw SQLsmith statements (aggregated across all batches) for reproducibility.
27+
- `case_*.test` – SQLLogicTest files containing expected results computed by SQLite.
28+
29+
## Customize generation
30+
31+
Override the environment variables below with `-e NAME=value` flags when running the container:
32+
33+
| Variable | Default | Description |
34+
| --- | --- | --- |
35+
| `TARGET_ENGINE` | `sqlite` | Execution backend used for result materialization. Only `sqlite` is supported today. |
36+
| `SQLSMITH_BATCH_QUERIES` | `250` | Number of statements per SQLsmith batch. `SQLSMITH_MAX_QUERIES` remains as an alias. |
37+
| `SQLSMITH_SEED` | `1` | Base seed passed to SQLsmith; each batch increments it by one for variety. |
38+
| `OUTPUT_MODE` | `slt` | Set to `slt` for SQLLogicTest cases or `statements` for a plain SQL file. |
39+
| `SQLLOGICTEST_ROWSORT` | `rowsort` | Switch to `nosort` to omit the `rowsort` directive. |
40+
| `SQLITE_TIMEOUT` | `1.0` | Seconds allowed for each SQLite execution (not yet enforced). |
41+
| `SEED_FILENAME` | `seeds.sql` | Name of the raw SQL dump written to `/out`. |
42+
| `SQLITE_INIT_SQL` | `/usr/local/share/sqlsmith/init.sql` | SQL script executed once to seed the SQLite database before SQLsmith runs. Set to empty to skip. |
43+
| `SQLSMITH_PASS_TARGET` | _(unset)_ | Minimum number of passing cases (`query` + `statement ok`) to retain. When set, the container keeps running SQLsmith until the target is met. |
44+
| `SQLSMITH_MAX_ERRORS` | _(unset)_ | Maximum number of `statement error` cases to keep. Excess failures are discarded. |
45+
| `SQLSMITH_MAX_CASES` | _(unset)_ | Optional cap on the number of new cases admitted per SQLsmith batch. |
46+
47+
The SQLite connection defaults to `/tmp/sqlsmith.db`; set `ENGINE_URI` (or the legacy `SQLITE_URI`) when you need to target a different database file or URI.
48+
49+
When `SQLSMITH_PASS_TARGET` is specified the entrypoint loops, running SQLsmith in batches of `SQLSMITH_BATCH_QUERIES` statements until the accumulated corpus contains at least that many passing cases. `SQLSMITH_MAX_ERRORS` bounds how many failure cases are retained. Each batch bumps the SQLsmith seed by one to broaden coverage while keeping the run reproducible.
50+
51+
Example: generate a corpus with at least 20 passing cases and at most 3 expected failures:
52+
53+
```bash
54+
docker run --rm \
55+
-v "$(pwd)/out":/out \
56+
-e SQLSMITH_PASS_TARGET=20 \
57+
-e SQLSMITH_MAX_ERRORS=3 \
58+
-e SQLSMITH_BATCH_QUERIES=50 \
59+
sqlsmith-slt
60+
```
61+
62+
To capture non-empty result sets, point SQLsmith (and the executor) at a populated SQLite database, for example:
63+
64+
```bash
65+
docker run --rm \
66+
-v "$(pwd)/northwind.db":/data/northwind.db:ro \
67+
-v "$(pwd)/out":/out \
68+
-e ENGINE_URI="file:/data/northwind.db?mode=ro" \
69+
sqlsmith-slt
70+
```
71+
72+
By default the container seeds `/tmp/sqlsmith.db` using `SQLITE_INIT_SQL`, provisioning sample commerce-style tables so the generated queries have data to read from. Replace that script or mount your own to tailor the schema.
73+
74+
Pass extra flags directly through to SQLsmith by appending them after the image name. Example: `docker run … sqlsmith-slt --exclude-catalog`.
75+
76+
## Verify the output
77+
78+
After running the container you should see files in `out/`:
79+
80+
```bash
81+
ls out | head
82+
# case_000001.test
83+
# case_000002.test
84+
# ...
85+
# seeds.sql
86+
```
87+
88+
Each `.test` file follows SQLLogicTest formatting and can be executed with your preferred SLT runner.
89+
90+
Count the generated cases:
91+
92+
```bash
93+
ls out/case_*.test | wc -l
94+
```

0 commit comments

Comments
 (0)