From bf87e1f854fb683233e737c7e0d8e473ed186522 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 1 May 2026 18:50:29 -0700 Subject: [PATCH] zynq7000: add Xilinx Zynq-7000 (ZC702) port --- Makefile | 4 + arch.mk | 40 ++ config/examples/zynq7000.config | 42 ++ docs/Targets.md | 215 ++++++++ hal/zynq7000.c | 774 +++++++++++++++++++++++++++++ hal/zynq7000.h | 186 +++++++ hal/zynq7000.ld | 62 +++ src/boot_arm32.c | 8 +- src/boot_zynq7000_start.S | 208 ++++++++ test-app/ARM-zynq7000.ld | 52 ++ test-app/Makefile | 5 + test-app/app_zynq7000.c | 67 +++ tools/scripts/zc702/jtag_load.tcl | 76 +++ tools/scripts/zc702/zc702_qspi.bif | 19 + 14 files changed, 1754 insertions(+), 4 deletions(-) create mode 100644 config/examples/zynq7000.config create mode 100644 hal/zynq7000.c create mode 100644 hal/zynq7000.h create mode 100644 hal/zynq7000.ld create mode 100644 src/boot_zynq7000_start.S create mode 100644 test-app/ARM-zynq7000.ld create mode 100644 test-app/app_zynq7000.c create mode 100644 tools/scripts/zc702/jtag_load.tcl create mode 100644 tools/scripts/zc702/zc702_qspi.bif diff --git a/Makefile b/Makefile index 2986d8db51..cd9a66e3f5 100644 --- a/Makefile +++ b/Makefile @@ -285,6 +285,10 @@ ifeq ($(TARGET),sama5d3) MAIN_TARGET:=wolfboot.bin test-app/image_v1_signed.bin endif +ifeq ($(TARGET),zynq7000) + MAIN_TARGET:=wolfboot.bin test-app/image_v1_signed.bin +endif + ifeq ($(TARGET),rp2350) MAIN_TARGET:=include/target.h keytools wolfboot_signing_private_key.der pico-sdk-info endif diff --git a/arch.mk b/arch.mk index 261ace49c4..38ca8dda01 100644 --- a/arch.mk +++ b/arch.mk @@ -303,6 +303,19 @@ ifeq ($(ARCH),ARM) CFLAGS+=-DWOLFBOOT_USE_STDLIBC endif + ifeq ($(TARGET),zynq7000) + # AMD/Xilinx Zynq-7000 (Cortex-A9, ARMv7-A) - ZC702 Evaluation Kit. + # Loaded by Xilinx FSBL into DDR; see hal/zynq7000.{c,h,ld}. + CORTEX_A9=1 + UPDATE_OBJS:=src/update_ram.o + CFLAGS+=-DWOLFBOOT_DUALBOOT -fno-builtin -ffreestanding + # Do NOT define WOLFBOOT_USE_STDLIBC: newlib's memcpy uses unaligned + # LDRs which fault on Cortex-A9 when MMU is off (FSBL leaves MMU off + # on Zynq-7000). Use wolfBoot's own aligned-safe memcpy from src/string.c. + # U-Boot legacy header detection for Linux/U-Boot payloads (Milestone 5) + CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY + endif + ifeq ($(TARGET),va416x0) CFLAGS+=-I$(WOLFBOOT_ROOT)/hal/vorago/ \ -I$(VORAGO_SDK_DIR)/common/drivers/hdr/ \ @@ -344,6 +357,32 @@ ifeq ($(CORTEX_A5),1) -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON endif endif +else +ifeq ($(CORTEX_A9),1) + # Cortex-A9 (ARMv7-A, 32-bit) - Zynq-7000. + # Build in ARM state (-marm); reset vector lands in ARM mode after FSBL. + # Note: do not filter out -mthumb from CFLAGS/LDFLAGS - that converts the + # variables to simple-expansion flavor and breaks lazy $(LSCRIPT) expansion + # in test-app/Makefile. -marm appended later wins over -mthumb anyway. + FPU=-mfpu=vfp3-d16 + CFLAGS+=-mcpu=cortex-a9 -mtune=cortex-a9 -marm -static -z noexecstack \ + -mno-unaligned-access + LDFLAGS+=-mcpu=cortex-a9 -mtune=cortex-a9 -marm -static -z noexecstack + # Cortex-A9 uses boot_arm32.o (shared do_boot) + a Zynq-7000-specific + # startup (VBAR, MMU/cache disable, all-mode stacks). The shared + # boot_arm32_start.S used by SAMA5D3/Cortex-A5 lacks those. + OBJS+=src/boot_arm32.o src/boot_zynq7000_start.o + ifeq ($(NO_ASM),1) + MATH_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_c32.o + else + MATH_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_arm32.o + ifneq ($(NO_ARM_ASM),1) + OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/armv8-32-sha256-asm.o + OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.o + CFLAGS+=-DWOLFSSL_SP_ARM32_ASM -DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO \ + -DWOLFSSL_ARM_ARCH=7 -DWOLFSSL_ARMASM_INLINE -DWOLFSSL_ARMASM_NO_NEON + endif + endif else # All others use boot_arm.o OBJS+=src/boot_arm.o @@ -456,6 +495,7 @@ else endif endif endif +endif ## Renesas RX diff --git a/config/examples/zynq7000.config b/config/examples/zynq7000.config new file mode 100644 index 0000000000..7a9ee2b459 --- /dev/null +++ b/config/examples/zynq7000.config @@ -0,0 +1,42 @@ +ARCH?=ARM +TARGET?=zynq7000 +SIGN?=ECC256 +HASH?=SHA256 + +# Cortex-A9 (Zynq-7000) - selected automatically via TARGET=zynq7000 in arch.mk +DEBUG?=0 +DEBUG_UART?=1 +V?=0 +SPMATH?=1 + +# wolfBoot itself is loaded by Xilinx FSBL to DDR at 0x04000000 (hal/zynq7000.ld). +# WOLFBOOT_LOAD_ADDRESS is the *app* staging address: where wolfBoot copies +# the verified signed image before do_boot. Must NOT overlap wolfBoot itself +# AND src/update_ram.c expects dst > wolfBoot's _end - so place it above the +# wolfBoot region (0x04000000-0x040FFFFF) at 16 MB. +WOLFBOOT_LOAD_ADDRESS=0x10000000 + +# QSPI flash (16 MB N25Q128A on ZC702) via XQspiPs (hal/zynq7000.c). +# Override EXT_FLASH=0 on the make command line for JTAG-only dev builds. +EXT_FLASH?=1 +NO_XIP=1 + +# QSPI partition layout (16 MB total): +# 0x000000 - 0x0FFFFF BOOT.BIN (FSBL + wolfboot) +# 0x100000 - 0x6FFFFF BOOT_A (~6 MB primary) +# 0x700000 - 0xCFFFFF UPDATE_B (~6 MB update) +# 0xD00000 - 0xD0FFFF SWAP scratch (64 KB sector) +WOLFBOOT_PARTITION_BOOT_ADDRESS=0x00100000 +WOLFBOOT_PARTITION_UPDATE_ADDRESS=0x00700000 +WOLFBOOT_PARTITION_SWAP_ADDRESS=0x00D00000 +WOLFBOOT_PARTITION_SIZE=0x00600000 +WOLFBOOT_SECTOR_SIZE=0x10000 + +# DTS placeholders (used in Milestone 5 for Linux payload) +WOLFBOOT_LOAD_DTS_ADDRESS=0x00100000 +WOLFBOOT_DTS_BOOT_ADDRESS=0x00080000 +WOLFBOOT_DTS_UPDATE_ADDRESS=0x00680000 + +IMAGE_HEADER_SIZE=1024 + +CROSS_COMPILE=arm-none-eabi- diff --git a/docs/Targets.md b/docs/Targets.md index bbce15e5cb..319cd580b2 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -53,6 +53,7 @@ This README describes configuration of supported targets. * [TI Hercules TMS570LC435](#ti-hercules-tms570lc435) * [Vorago VA416x0](#vorago-va416x0) * [Xilinx Zynq UltraScale](#xilinx-zynq-ultrascale) +* [Xilinx Zynq-7000 (ZC702)](#xilinx-zynq-7000-zc702) * [Versal Gen 1 VMK180](#versal-gen-1-vmk180) ## STM32F4 @@ -3391,6 +3392,220 @@ Entering idle loop... ``` +## Xilinx Zynq-7000 (ZC702) + +AMD/Xilinx Zynq-7000 (XC7Z020) on the ZC702 Evaluation Kit - dual ARM Cortex-A9 (ARMv7-A 32-bit), 1 GB DDR3, 16 MB QSPI NOR (N25Q128A), SDIO, dual UART. Older sibling of the ZynqMP family - distinct silicon, different controllers (`XQspiPs` not `XQspiPsu`, Arasan SDHCI v2.0 not v3.0, no CSU/PMU/PUF, PL310 L2). + +wolfBoot is loaded by the Xilinx Zynq-7000 FSBL into DDR: +``` +BootROM -> FSBL -> wolfBoot -> signed app (or U-Boot/Linux) +``` + +The FSBL handles all PS init (DDR, MIO, clocks, QSPI ref clock); wolfBoot only initializes UART, the QSPI controller, runs the verify/swap logic, and chain-loads the next stage. + +This target supports: +- **QSPI boot** (primary): `config/examples/zynq7000.config` +- **SD card boot** (Milestone 6, planned): `config/examples/zc702_sdcard.config` +- **JTAG-loaded dev** via Platform Cable II + xsdb (no flash required) + +### Prerequisites + +1. **Toolchain**: `arm-none-eabi-gcc` (Arm bare-metal). Tested with 13.2. +2. **Xilinx Vitis** (provides `bootgen`, `xsdb`, and `program_flash`). Source the env once per shell: + ```sh + source /opt/Xilinx/2025.2/Vitis/settings64.sh + ``` + Vivado's `settings64.sh` works equivalently if you don't have Vitis installed. +3. **Platform Cable II USB drivers** (one-time, requires root). Without these the + cable enumerates as `03fd:0013` with empty descriptors and `xsdb` reports no + JTAG targets: + ```sh + sudo /opt/Xilinx/2025.2/Vitis/data/xicom/cable_drivers/lin64/install_script/install_drivers/install_drivers + ``` + Unplug/replug the cable afterward so udev can load the firmware. +4. **Pre-built ZC702 FSBL + DTB** (clone next to wolfboot-alt2): + ```sh + git clone https://github.com/wolfSSL/soc-prebuilt-firmware.git + export PREBUILT_DIR=$(pwd)/../soc-prebuilt-firmware/zc702-zynq + ls $PREBUILT_DIR/zynq_fsbl.elf # required + ``` +5. **Hardware**: ZC702 with Platform Cable II (USB JTAG) connected to J22 and powered. + +### Configuration Options + +Key options in `config/examples/zynq7000.config`: + +- `ARCH=ARM` - 32-bit ARM +- `TARGET=zynq7000` - selects `hal/zynq7000.{c,h,ld}` and the `CORTEX_A9` arch.mk block +- `SIGN=ECC256` / `HASH=SHA256` - smaller and faster than RSA on Cortex-A9 +- `EXT_FLASH=1` - QSPI as external flash via `XQspiPs` +- `WOLFBOOT_LOAD_ADDRESS=0x10000000` - DDR offset 256 MB, where the verified app is staged before `do_boot`. Must be **above** wolfBoot's own region (`0x04000000`-`0x040FFFFF`) because `src/update_ram.c` enforces `dst > _end`. +- `WOLFBOOT_PARTITION_BOOT_ADDRESS=0x00100000` - 16 MB QSPI layout below +- `CROSS_COMPILE=arm-none-eabi-` + +DDR layout: + +| Region | Address range | Contents | +|---|---|---| +| App stage | `0x10000000`+ | Verified signed image, app text/data/bss/stack | +| Image header staging | `0x0FFFFC00`-`0x0FFFFFFF` | wolfBoot copies the 1 KB header here just before the load address | +| wolfBoot | `0x04000000`-`0x040FFFFF` | Loaded by FSBL, runs in place | +| FSBL/BootROM/OCM | `0x00000000`-`0x000FFFFF` | OCM low-mapped during boot | + +QSPI partition layout (16 MB on-board flash): + +| Offset | Size | Contents | +|-------------|---------|-----------------------------------| +| `0x000000` | ~512 KB | BOOT.BIN (FSBL + wolfboot) | +| `0x100000` | 6 MB | BOOT_A (signed primary image) | +| `0x700000` | 6 MB | UPDATE_B (signed update slot) | +| `0xD00000` | 64 KB | SWAP scratch sector | +| `0xD10000`+ | | reserved | + +### Building wolfBoot + +```sh +cp config/examples/zynq7000.config .config +make keysclean && make keytools +make TARGET=zynq7000 wolfboot.elf +``` + +The result is a 32-bit ARM ELF with entry point `0x04000000` and `.text` start at the same address (vector table at the load base). + +### Building BOOT.BIN (production QSPI boot) + +```sh +cp ${PREBUILT_DIR}/zynq_fsbl.elf . +bootgen -arch zynq -image tools/scripts/zc702/zc702_qspi.bif -w -o BOOT.BIN +``` + +`bootgen` ships with Vitis. The `.bif` template at `tools/scripts/zc702/zc702_qspi.bif` is the minimum bootable image; add `download.bit` and a DTB if you also need to load the PL bitstream and a Linux device tree (see Milestone 5). + +### Programming QSPI + +Set ZC702 boot mode straps to **JTAG** for programming, then either: +- Vitis: `program_flash -f BOOT.BIN -flash_type qspi_single -fsbl ${PREBUILT_DIR}/zynq_fsbl.elf` +- Vivado Hardware Manager: Tools -> Add Configuration Memory Device -> select N25Q128 -> program with BOOT.BIN at offset 0. + +After programming, set boot mode to **QSPI** (SW16 - see UG850 ch.1.2.4) and power-cycle. Console comes up on UART1 (J17 USB-UART), 115200 8N1. + +### JTAG-loaded development (no flash) + +For driver bring-up or quick iteration, skip bootgen and load directly via Platform Cable II: + +```sh +source /opt/Xilinx/2025.2/Vitis/settings64.sh # once per shell +xsdb tools/scripts/zc702/jtag_load.tcl +``` + +The script runs the prebuilt FSBL (PS init: DDR/MIO/clocks/UART), then loads `wolfboot.elf` over the top, sets PC to `0x04000000` and CPSR to SVC with IRQ/FIQ masked, and resumes. Override paths via `FSBL_ELF=...` or `WOLFBOOT_ELF=...` env vars. + +With a signed image programmed at QSPI offset `0x100000` (see "Building and flashing the signed test app" below), expected UART output is: + +``` +wolfBoot Zynq-7000 (ZC702) hal_init +Versions: Boot 1, Update 0 +Trying Boot partition at 0x100000 +Loading header 1024 bytes from 0x100000 to 0xFFFFC00 +Loading image 396 bytes from 0x100400 to 0x10000000...done +Boot partition: 0xFFFFC00 (sz 396, ver 0x1, type 0x201) +Checking integrity...done +Verifying signature...done +Successfully selected image in part: 0 +Firmware Valid +Booting at 0x10000000 + +=== ZC702 test-app: BOOT OK === +wolfBoot verified + chain-loaded this image +..... +``` + +On a **blank** QSPI (no signed image yet), wolfBoot prints `Versions: Boot 0, Update 0 / No valid image found! / wolfBoot: PANIC!` instead - that is correct behavior, not a bug. + +If `xsdb` reports `no targets found` or empty `jtag servers`, either: +- Cable USB drivers not installed - see step 3 of Prerequisites, OR +- A previous run left the CPU in a stuck JTAG state - power-cycle the ZC702 (SW10, the Pi4 GPIO 20 power relay, or your PSU control) and retry. + +A separate JTAG-only dev build (no QSPI driver) can be produced with `make EXT_FLASH=0`. + +### Building and flashing the signed test app + +A minimal Cortex-A9 test app lives at `test-app/app_zynq7000.c` (UART banner + heartbeat dots). The top-level `make` target produces both `wolfboot.elf` and `test-app/image_v1_signed.bin` with the keys generated under `wolfboot_signing_private_key.der`: + +```sh +cp config/examples/zynq7000.config .config +make keysclean && make # builds wolfboot.elf + test-app/image_v1_signed.bin +``` + +Program the signed image to QSPI offset `0x100000` (the BOOT_A partition): + +```sh +program_flash -f test-app/image_v1_signed.bin \ + -fsbl ${PREBUILT_DIR}/zynq_fsbl.elf \ + -flash_type qspi_single -offset 0x100000 +``` + +`program_flash` ships with Vitis. Then run wolfBoot via `xsdb tools/scripts/zc702/jtag_load.tcl` - it should verify and chain-load the test app, producing the heartbeat output above. + +### QSPI driver self-test (`TEST_EXT_FLASH`) + +To exercise the `XQspiPs` driver in isolation - read JEDEC ID, sector erase + page program + linear-mode read-back of a 256-byte pattern at `0x200000`: + +```sh +make CFLAGS_EXTRA=-DTEST_EXT_FLASH wolfboot.elf +xsdb tools/scripts/zc702/jtag_load.tcl +``` + +Expected output: + +``` +qspi: --- TEST_EXT_FLASH start --- +qspi: JEDEC ID = 0x20bb18 rc=00 <- Micron N25Q128 +qspi: read @0x100000 = 574f4c468c010000 <- "WOLF" magic from a programmed signed image +qspi: erase sector @ 0x00200000 ... +qspi: page program ... +qspi: post-program JEDEC = 0x20bb18 +qspi: rdback[0..7] = 0001020304050607 +qspi: --- TEST_EXT_FLASH PASS --- +``` + +### QSPI driver design + +The driver in `hal/zynq7000.c` splits read vs cmd-only paths similarly to how the ZynqMP HAL splits SDHCI CMD17 (single-block PIO) vs CMD18 (multi-block SDMA): + +| Operation | Path | Why | +|---|---|---| +| JEDEC ID, RDSR, WREN, sector erase, page program | I/O mode (TXD0/TXD1/2/3 + auto-start) | Short, command-shaped transactions; needs precise byte counts on MOSI | +| Bulk reads (signed image, partition headers) | Linear/XIP mode (`memcpy` from `0xFC000000+offset`) | Hardware-accelerated; controller drives cmd+addr+dummy and presents data through the AXI window | + +`qspi_linear_mode_setup()` configures `LQSPI_CR=0x8000010B` (single-bit `FAST_READ` 0x0B + 1 dummy byte) which avoids needing the flash QE bit set. A sacrificial first-byte read primes the linear-mode pipeline before the actual `memcpy`. + +For TX-only commands sent without RX capture, `qspi_xfer4` picks `TXD1`/`TXD2`/`TXD3` so the controller clocks exactly *N* bytes on the wire (no 4-byte padding that some flash interprets as additional commands - this caused our WREN to fail in an early iteration). + +### Boot flow notes + +- **Cortex-A9 startup**: `src/boot_zynq7000_start.S` (Z7-specific) plus shared `src/boot_arm32.c` for `do_boot()`. Sets VBAR to wolfBoot's vector table at `0x04000000`, clears `SCTLR.{A,C,I,V}`, invalidates I-cache + branch predictor + TLB, sets stack pointers for IRQ/FIQ/ABT/UND/SVC modes, then unmasks async aborts and calls `main`. +- **MMU stays ON**, inheriting FSBL's flat 1:1 DDR mapping. Disabling the MMU on Cortex-A9 makes all memory Strongly-Ordered, which traps unaligned LDR/STR and breaks any ARMv7-A unrolled `memcpy`. +- **memcpy/memset**: do **not** define `WOLFBOOT_USE_STDLIBC` for this target. newlib's ARMv7-A `memcpy` uses unaligned word LDRs from arbitrary alignments and faults under any code path that runs without the MMU configured for Normal memory. wolfBoot's own byte-wise / aligned-word `memcpy` in `src/string.c` is used instead. +- **`ext_flash_read` returns bytes-read** (not 0 on success): `src/update_ram.c` checks `ret != IMAGE_HEADER_SIZE` for the header read and `ret < 0` for the body read. +- **Cache teardown** in `hal_prepare_boot()`: cleans+invalidates L1 D-cache by set/way, invalidates L1 I-cache and branch predictor, then disables MMU+caches via SCTLR before `do_boot()` performs `bx r4`. +- **Register handoff**: per ARM Linux boot ABI - `r0 = 0`, `r1 = 0`, `r2 = DTB ptr` (Linux/U-Boot, Milestone 5), entry in `r4`. +- **L2 (PL310)**: not touched by wolfBoot. Stock ZC702 FSBLs do not enable PL310; if your customised FSBL does, extend `hal_prepare_boot()` with an L2x0 clean-invalidate + disable. + +### Differences from the ZynqMP port + +| Aspect | ZynqMP (`hal/zynq.c`) | Zynq-7000 (`hal/zynq7000.c`) | +|------------------|-------------------------------|------------------------------| +| CPU | Cortex-A53 quad, AArch64 | Cortex-A9 dual, ARMv7-A | +| QSPI controller | GQSPI (`XQspiPsu`) | Linear/Static (`XQspiPs`) | +| UART IP | XUartPs @ `0xFF000000` | XUartPs @ `0xE0001000` | +| SDHCI | Arasan v3.0 + Cadence shim | Arasan v2.0 (planned) | +| Crypto HW | CSU (AES-GCM, SHA3, PUF) | none (DevC AES only) | +| Boot chain | FSBL + PMUFW + BL31 + wolfBoot| FSBL + wolfBoot | +| Linux EL | EL2 (hypervisor) | SVC (no exception levels) | +| `bootgen -arch` | `zynqmp` | `zynq` | + + ## Versal Gen 1 VMK180 AMD Versal Prime Series VMK180 Evaluation Kit - Versal Prime XCVM1802-2MSEVSVA2197 Adaptive SoC - Dual ARM Cortex-A72. diff --git a/hal/zynq7000.c b/hal/zynq7000.c new file mode 100644 index 0000000000..b171bb4101 --- /dev/null +++ b/hal/zynq7000.c @@ -0,0 +1,774 @@ +/* zynq7000.c + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifdef TARGET_zynq7000 + +#include +#include +#include +#include "image.h" +#include "printf.h" +#include "hal/zynq7000.h" + +#ifndef ARCH_ARM +# error "wolfBoot zynq7000 HAL: wrong architecture selected. Please compile with ARCH=ARM." +#endif + +#ifdef DEBUG_UART +void uart_init(void) +{ + /* Disable interrupts */ + Z7_UART_IDR = Z7_UART_ISR_MASK; + /* Disable TX/RX */ + Z7_UART_CR = (Z7_UART_CR_TX_DIS | Z7_UART_CR_RX_DIS); + /* Clear ISR */ + Z7_UART_ISR = Z7_UART_ISR_MASK; + + /* 8N1 */ + Z7_UART_MR = Z7_UART_MR_8N1; + + /* Half-FIFO trigger levels (XUartPs FIFO depth = 64) */ + Z7_UART_RXWM = 32; + Z7_UART_TXWM = 32; + + /* RX timeout disabled */ + Z7_UART_RXTOUT = 0; + + /* baud = ref_clk / (BR_GEN * (BR_DIV + 1)) */ + Z7_UART_BR_GEN = UART_CLK_REF / (DEBUG_UART_BAUD * (DEBUG_UART_DIV + 1)); + Z7_UART_BR_DIV = DEBUG_UART_DIV; + + /* Reset TX/RX paths */ + Z7_UART_CR = (Z7_UART_CR_TXRST | Z7_UART_CR_RXRST); + /* Enable TX/RX */ + Z7_UART_CR = (Z7_UART_CR_TX_EN | Z7_UART_CR_RX_EN); +} + +void uart_write(const char* buf, unsigned int sz) +{ + unsigned int pos = 0; + while (sz-- > 0) { + char c = buf[pos++]; + if (c == '\n') { + while (Z7_UART_SR & Z7_UART_SR_TXFULL) + ; + Z7_UART_FIFO = (uint32_t)'\r'; + } + while (Z7_UART_SR & Z7_UART_SR_TXFULL) + ; + Z7_UART_FIFO = (uint32_t)c; + } + while (!(Z7_UART_SR & Z7_UART_SR_TXEMPTY)) + ; +} +#endif /* DEBUG_UART */ + +#ifdef EXT_FLASH +/* ===================== QSPI flash driver (XQspiPs) ===================== + * Bare-metal driver for the Zynq-7000 "Linear/Static" QSPI controller. + * Used here in I/O mode (single-bit SPI) for read/write/erase. The Linear + * QSPI XIP window at 0xFC000000 is not used by this driver - all reads go + * through the controller's I/O FIFO so the same code path works whether or + * not FSBL pre-configured linear mode. + * + * The driver assumes FSBL has already configured QSPI ref clock and MIO + * pins (typical when wolfBoot is loaded by FSBL). qspi_init() resets and + * reconfigures the controller itself. */ + +/* JEDEC SPI-NOR command codes (subset used by wolfBoot) */ +#define SPI_CMD_RDID 0x9F +#define SPI_CMD_RDSR 0x05 +#define SPI_CMD_WREN 0x06 +#define SPI_CMD_WRDI 0x04 +#define SPI_CMD_READ 0x03 +#define SPI_CMD_FAST_READ 0x0B /* requires 8 dummy clocks */ +#define SPI_CMD_PAGE_PROGRAM 0x02 +#define SPI_CMD_SECTOR_ERASE 0xD8 /* 64 KB erase */ +#define SPI_STATUS_WIP 0x01 /* write-in-progress */ +#define SPI_STATUS_WEL 0x02 /* write-enable latch */ + +#define SPI_NOR_PAGE_SIZE 256U +#define SPI_NOR_SECTOR_SIZE 0x10000U /* 64 KB */ + +static void qspi_drain_rxfifo(void) +{ + while (Z7_QSPI_ISR & Z7_QSPI_ISR_RXNEMPTY) + (void)Z7_QSPI_RXD; +} + +static void qspi_cs_assert(void) +{ + /* PCS [13:10] = 0b1110 -> CS0 asserted. */ + Z7_QSPI_CR = (Z7_QSPI_CR & ~Z7_QSPI_CR_PCS_MASK) | Z7_QSPI_CR_PCS_CS0; +} + +static void qspi_cs_release(void) +{ + /* PCS [13:10] = 0b1111 -> all CS deasserted. */ + Z7_QSPI_CR |= Z7_QSPI_CR_PCS_NONE; +} + +/* Transfer up to 4 bytes. Uses TXD(n) for partial sends WITHOUT RX (so the + * flash sees exactly n clock cycles of MOSI), and TXD0 (4-byte) when RX is + * needed (so the controller pushes a full 4-byte RX FIFO entry we can + * decode). Mirrors u-boot zynq_qspi.c (offsets[3] when rx_buf, offsets[len-1] + * otherwise). */ +static void qspi_xfer4(const uint8_t *tx, uint8_t *rx, unsigned int nbytes) +{ + uint32_t txw = 0xFFFFFFFFU; + uint32_t rxw; + unsigned int i; + + if (nbytes > 4) + nbytes = 4; + if (nbytes == 0) + return; + + if (tx != NULL) { + for (i = 0; i < nbytes; i++) { + txw &= ~((uint32_t)0xFFU << (i * 8)); + txw |= ((uint32_t)tx[i]) << (i * 8); + } + } + + qspi_drain_rxfifo(); + + if (rx != NULL || nbytes == 4) { + /* Receive path or full 4-byte send: use TXD0. */ + Z7_QSPI_TXD0 = txw; + } else { + /* Send-only short transfer: pick TXD1/TXD2/TXD3 to clock exactly + * nbytes out the wire (no padding). The TX byte(s) are at the LSB. */ + switch (nbytes) { + case 1: Z7_QSPI_TXD1 = txw; break; + case 2: Z7_QSPI_TXD2 = txw; break; + case 3: Z7_QSPI_TXD3 = txw; break; + default: Z7_QSPI_TXD0 = txw; break; + } + } + + while (!(Z7_QSPI_ISR & Z7_QSPI_ISR_RXNEMPTY)) + ; + rxw = Z7_QSPI_RXD; + +#ifdef DEBUG_QSPI_BYTE + { + const char hex[] = "0123456789abcdef"; + char line[48]; + unsigned int p = 0; + line[p++] = '['; + for (i = 0; i < nbytes; i++) { + uint8_t b = (uint8_t)(txw >> (i * 8)); + line[p++] = hex[(b >> 4) & 0xF]; + line[p++] = hex[(b >> 0) & 0xF]; + } + line[p++] = ' '; + line[p++] = '/'; + line[p++] = ' '; + for (i = 0; i < nbytes; i++) { + uint8_t b = (uint8_t)(rxw >> (i * 8)); + line[p++] = hex[(b >> 4) & 0xF]; + line[p++] = hex[(b >> 0) & 0xF]; + } + line[p++] = ']'; + line[p++] = '\n'; + uart_write(line, p); + } +#endif + + if (rx != NULL) { + for (i = 0; i < nbytes; i++) + rx[i] = (uint8_t)(rxw >> (i * 8)); + } +} + +static int qspi_xfer(const uint8_t *tx, uint8_t *rx, unsigned int len) +{ + unsigned int off = 0; + unsigned int chunk; + + qspi_cs_assert(); + while (off < len) { + chunk = len - off; + if (chunk > 4) + chunk = 4; + qspi_xfer4((tx != NULL) ? &tx[off] : NULL, + (rx != NULL) ? &rx[off] : NULL, + chunk); + off += chunk; + } + qspi_cs_release(); + return 0; +} + +/* I/O mode: used for short cmd-only ops (JEDEC, RDSR, WREN, sector erase, + * page program initiation). Reads use Linear/XIP mode separately. */ +static void qspi_io_mode_setup(void) +{ + Z7_QSPI_EN = 0; + Z7_QSPI_IDR = Z7_QSPI_ISR_MASK; + qspi_drain_rxfifo(); + Z7_QSPI_ISR = Z7_QSPI_ISR_MASK; + Z7_QSPI_LQSPI_CR = 0; /* leave linear mode */ + Z7_QSPI_TXTHR = 1; + Z7_QSPI_RXTHR = 1; + Z7_QSPI_CR = Z7_QSPI_CR_IFMODE + | Z7_QSPI_CR_HOLD_B + | Z7_QSPI_CR_SSFORCE + | Z7_QSPI_CR_PCS_NONE + | Z7_QSPI_CR_FIFO_WIDTH + | Z7_QSPI_CR_BAUD_DIV_8 + | Z7_QSPI_CR_MSTREN; + Z7_QSPI_EN = Z7_QSPI_EN_VAL; +} + +/* Linear (XIP) mode: hardware-managed reads. Controller asserts CS, sends + * cmd+addr+dummy, returns data via memory-mapped accesses at 0xFC000000+. + * Matches XQspiPs_LinearInit() in qspips_v3_14/src/xqspips_hw.c. */ +static void qspi_linear_mode_setup(void) +{ + Z7_QSPI_EN = 0; + Z7_QSPI_IDR = Z7_QSPI_ISR_MASK; + qspi_drain_rxfifo(); + Z7_QSPI_ISR = Z7_QSPI_ISR_MASK; + + /* CR: IFMODE=1, FIFO=32-bit, MSTREN=1, SSFORCE=1, HOLD_B=1, /4 baud, + * MANSTRTEN=0 (auto-start), CPHA/CPOL=0, PCS bit 10 cleared (CS0 + * asserted - in linear mode the controller still wants this). */ + Z7_QSPI_CR = Z7_QSPI_CR_IFMODE + | Z7_QSPI_CR_HOLD_B + | Z7_QSPI_CR_SSFORCE + | Z7_QSPI_CR_FIFO_WIDTH + | Z7_QSPI_CR_BAUD_DIV_4 + | Z7_QSPI_CR_MSTREN; + /* Single-bit FAST_READ (0x0B) with 1 dummy byte. Avoids needing QE + * bit set in the flash status register. */ + Z7_QSPI_LQSPI_CR = 0x8000010BU; + Z7_QSPI_EN = Z7_QSPI_EN_VAL; +} + +static void qspi_init(void) +{ + qspi_io_mode_setup(); +} + +static int spi_flash_read_id(uint8_t out[3]) +{ + uint8_t cmd[4] = { SPI_CMD_RDID, 0, 0, 0 }; + uint8_t rx[4] = { 0, 0, 0, 0 }; + int rc = qspi_xfer(cmd, rx, sizeof(cmd)); + if (rc == 0) { + out[0] = rx[1]; + out[1] = rx[2]; + out[2] = rx[3]; + } + return rc; +} + +static int spi_flash_status(uint8_t *status) +{ + uint8_t cmd[2] = { SPI_CMD_RDSR, 0 }; + uint8_t rx[2] = { 0, 0 }; + int rc = qspi_xfer(cmd, rx, sizeof(cmd)); + if (rc == 0) + *status = rx[1]; + return rc; +} + +static int spi_flash_wait_ready(void) +{ + uint8_t status = 0xFF; + /* Spin until WIP clears. No timeout: a stuck flash is a board issue. */ + do { + if (spi_flash_status(&status) != 0) + return -1; + } while ((status & SPI_STATUS_WIP) != 0); + return 0; +} + +static int spi_flash_write_enable(void) +{ + uint8_t cmd = SPI_CMD_WREN; + int rc; + rc = qspi_xfer(&cmd, NULL, 1); + if (rc != 0) + return rc; + /* Optional: confirm WEL bit set */ + { + uint8_t status = 0; + if (spi_flash_status(&status) != 0) + return -1; + if ((status & SPI_STATUS_WEL) == 0) + return -1; + } + return 0; +} + +static int spi_flash_sector_erase(uint32_t address) +{ + uint8_t cmd[4]; + int rc; + + rc = spi_flash_write_enable(); + if (rc != 0) + return rc; + + cmd[0] = SPI_CMD_SECTOR_ERASE; + cmd[1] = (uint8_t)((address >> 16) & 0xFFU); + cmd[2] = (uint8_t)((address >> 8) & 0xFFU); + cmd[3] = (uint8_t)((address >> 0) & 0xFFU); + rc = qspi_xfer(cmd, NULL, sizeof(cmd)); + if (rc != 0) + return rc; + + return spi_flash_wait_ready(); +} + +static int spi_flash_page_program(uint32_t address, + const uint8_t *data, + unsigned int len) +{ + /* len must be <= SPI_NOR_PAGE_SIZE and not cross a page boundary */ + uint8_t hdr[4]; + int rc; + + if (len == 0 || len > SPI_NOR_PAGE_SIZE) + return -1; + + rc = spi_flash_write_enable(); + if (rc != 0) + return rc; + + hdr[0] = SPI_CMD_PAGE_PROGRAM; + hdr[1] = (uint8_t)((address >> 16) & 0xFFU); + hdr[2] = (uint8_t)((address >> 8) & 0xFFU); + hdr[3] = (uint8_t)((address >> 0) & 0xFFU); + + qspi_cs_assert(); + qspi_xfer4(hdr, NULL, 4); + { + unsigned int off = 0; + while (off < len) { + unsigned int chunk = len - off; + if (chunk > 4) + chunk = 4; + qspi_xfer4(&data[off], NULL, chunk); + off += chunk; + } + } + qspi_cs_release(); + + return spi_flash_wait_ready(); +} + +/* Reads use Linear/XIP mode: switch the controller to linear mode, do a + * memcpy from the XIP window at 0xFC000000+offset, then return the + * controller to I/O mode. Mirrors how the ZynqMP HAL splits CMD17 (single + * block PIO) vs CMD18 (multi-block DMA) on SDHCI - here, short cmd ops use + * I/O mode and bulk reads use linear/XIP. */ +static int spi_flash_read(uint32_t address, uint8_t *data, unsigned int len) +{ + if (len == 0) + return 0; + + qspi_linear_mode_setup(); + { + const volatile uint8_t *xip = + (const volatile uint8_t *)(Z7_QSPI_LINEAR_BASE + address); + unsigned int i; + /* Sacrificial read: the first XIP byte after switching to linear + * mode is unreliable while the controller primes its read pipeline. + * Read one byte from the same address and discard it. */ + (void)xip[0]; + for (i = 0; i < len; i++) + data[i] = xip[i]; + } + qspi_io_mode_setup(); + return 0; +} +#if defined(TEST_EXT_FLASH) || defined(TEST_QSPI) +/* QSPI self-test (enable with -DTEST_EXT_FLASH or -DTEST_QSPI): + * 1) Read JEDEC ID and print it + * 2) Erase a 64 KB sector at TEST_EXT_FLASH_ADDR (default 2 MB offset) + * 3) Page-program a 256 B pattern (i & 0xFF) + * 4) Read back and verify + * Mirrors the existing src/spi_flash.c test_ext_flash() logic. Output via + * UART. Wired to fire from qspi_init() / hal_init() below. */ +#ifndef TEST_EXT_FLASH_ADDR +#define TEST_EXT_FLASH_ADDR (2U * 1024U * 1024U) +#endif + +static void qspi_print_hex_byte(uint8_t b) +{ + static const char hex[] = "0123456789abcdef"; + char buf[2]; + buf[0] = hex[(b >> 4) & 0xFU]; + buf[1] = hex[(b >> 0) & 0xFU]; + uart_write(buf, 2); +} + +static void qspi_print_hex32(uint32_t v) +{ + qspi_print_hex_byte((uint8_t)(v >> 24)); + qspi_print_hex_byte((uint8_t)(v >> 16)); + qspi_print_hex_byte((uint8_t)(v >> 8)); + qspi_print_hex_byte((uint8_t)(v >> 0)); +} + +static void qspi_selftest(void) +{ + static const uint8_t pattern[SPI_NOR_PAGE_SIZE] = { + /* zero-initialized, filled in below */ + 0 + }; + uint8_t rdback[SPI_NOR_PAGE_SIZE]; + uint8_t id[3] = { 0, 0, 0 }; + unsigned int i; + int rc; + /* Local mutable pattern buffer */ + uint8_t patbuf[SPI_NOR_PAGE_SIZE]; + + (void)pattern; + for (i = 0; i < SPI_NOR_PAGE_SIZE; i++) + patbuf[i] = (uint8_t)(i & 0xFFU); + + uart_write("qspi: --- TEST_EXT_FLASH start ---\n", 35); + + /* 1) JEDEC ID */ + rc = spi_flash_read_id(id); + uart_write("qspi: JEDEC ID = 0x", 19); + qspi_print_hex_byte(id[0]); + qspi_print_hex_byte(id[1]); + qspi_print_hex_byte(id[2]); + uart_write(" rc=", 5); + qspi_print_hex_byte((uint8_t)rc); + uart_write("\n", 1); + if (id[0] == 0x00 || id[0] == 0xFF) { + uart_write("qspi: JEDEC read returned blank - driver broken\n", 48); + return; + } + + /* 1b) Sanity read of known-programmed area at 0x100000 (signed image + * staged via Vitis program_flash). Should start with 'WOLF' magic. */ + { + uint8_t boot[8] = { 0 }; + spi_flash_read(0x00100000U, boot, sizeof(boot)); + uart_write("qspi: read @0x100000 = ", 23); + for (i = 0; i < 8; i++) qspi_print_hex_byte(boot[i]); + uart_write("\n", 1); + } + + /* 2) Erase */ + uart_write("qspi: erase sector @ 0x", 23); + qspi_print_hex32(TEST_EXT_FLASH_ADDR); + uart_write(" ...\n", 5); + rc = spi_flash_sector_erase(TEST_EXT_FLASH_ADDR); + if (rc != 0) { + uart_write("qspi: erase FAILED\n", 19); + return; + } + + /* 3) Page program */ + uart_write("qspi: page program ...\n", 23); + rc = spi_flash_page_program(TEST_EXT_FLASH_ADDR, patbuf, SPI_NOR_PAGE_SIZE); + if (rc != 0) { + uart_write("qspi: program FAILED\n", 21); + return; + } + + /* 4a) Re-read JEDEC ID after program to confirm controller is alive */ + { + uint8_t id2[3] = { 0, 0, 0 }; + spi_flash_read_id(id2); + uart_write("qspi: post-program JEDEC = 0x", 29); + qspi_print_hex_byte(id2[0]); + qspi_print_hex_byte(id2[1]); + qspi_print_hex_byte(id2[2]); + uart_write("\n", 1); + } + + /* 4b) Read back at TEST_EXT_FLASH_ADDR + compare */ + for (i = 0; i < SPI_NOR_PAGE_SIZE; i++) + rdback[i] = 0; + spi_flash_read(TEST_EXT_FLASH_ADDR, rdback, SPI_NOR_PAGE_SIZE); + uart_write("qspi: rdback[0..7] = ", 21); + for (i = 0; i < 8; i++) qspi_print_hex_byte(rdback[i]); + uart_write("\n", 1); + + /* 4c) Linear-mode XIP sanity check: read 32-bit words then decode bytes. + * Single-byte AXI accesses to the linear window confuse the controller - + * the burst-aware controller wants 32-bit reads. */ + uart_write("qspi: xip32@0x200000 = ", 23); + { + volatile uint32_t *xipw; + unsigned int j; + uint32_t w; + Z7_QSPI_EN = 0; + Z7_QSPI_LQSPI_CR = 0x80000003U; /* LQ_MODE=1, INST=0x03 (READ), no dummy */ + Z7_QSPI_EN = Z7_QSPI_EN_VAL; + xipw = (volatile uint32_t*)(Z7_QSPI_LINEAR_BASE + TEST_EXT_FLASH_ADDR); + for (j = 0; j < 2; j++) { + w = xipw[j]; + qspi_print_hex_byte((uint8_t)(w >> 0)); + qspi_print_hex_byte((uint8_t)(w >> 8)); + qspi_print_hex_byte((uint8_t)(w >> 16)); + qspi_print_hex_byte((uint8_t)(w >> 24)); + } + uart_write(" xip32@0x100000 = ", 19); + xipw = (volatile uint32_t*)(Z7_QSPI_LINEAR_BASE + 0x100000U); + for (j = 0; j < 2; j++) { + w = xipw[j]; + qspi_print_hex_byte((uint8_t)(w >> 0)); + qspi_print_hex_byte((uint8_t)(w >> 8)); + qspi_print_hex_byte((uint8_t)(w >> 16)); + qspi_print_hex_byte((uint8_t)(w >> 24)); + } + uart_write("\n", 1); + /* Restore I/O mode for any later transfers. */ + Z7_QSPI_EN = 0; + Z7_QSPI_LQSPI_CR = 0; + Z7_QSPI_EN = Z7_QSPI_EN_VAL; + } + for (i = 0; i < SPI_NOR_PAGE_SIZE; i++) { + if (rdback[i] != patbuf[i]) { + uart_write("qspi: MISMATCH @ idx 0x", 23); + qspi_print_hex_byte((uint8_t)(i >> 8)); + qspi_print_hex_byte((uint8_t)(i & 0xFFU)); + uart_write(" got 0x", 8); + qspi_print_hex_byte(rdback[i]); + uart_write(" expected 0x", 12); + qspi_print_hex_byte(patbuf[i]); + uart_write("\n", 1); + return; + } + } + uart_write("qspi: --- TEST_EXT_FLASH PASS ---\n", 34); +} +#endif /* TEST_EXT_FLASH || TEST_QSPI */ + +#endif /* EXT_FLASH (qspi block) */ + +void hal_init(void) +{ +#ifdef DEBUG_UART + uart_init(); + { + const char banner[] = "wolfBoot Zynq-7000 (ZC702) hal_init\n"; + uart_write(banner, sizeof(banner) - 1); + } +#endif +#ifdef EXT_FLASH + qspi_init(); +#if defined(TEST_EXT_FLASH) || defined(TEST_QSPI) + qspi_selftest(); +#endif +#ifdef DEBUG_BOOTPART + /* Dump first 16 bytes of the BOOT partition so we can see if the + * QSPI driver is returning the signed-image header (magic 'WOLF'). */ + { + uint8_t buf[16]; + const char hex[] = "0123456789abcdef"; + char line[3*16 + 2]; + unsigned int i; + spi_flash_read(0x00100000U, buf, sizeof(buf)); + for (i = 0; i < sizeof(buf); i++) { + line[i*3 + 0] = hex[(buf[i] >> 4) & 0xF]; + line[i*3 + 1] = hex[(buf[i] >> 0) & 0xF]; + line[i*3 + 2] = ' '; + } + line[sizeof(line) - 2] = '\n'; + line[sizeof(line) - 1] = 0; + uart_write("QSPI[0x100000]: ", 16); + uart_write(line, sizeof(line) - 1); + } +#endif +#endif +} + +/* Cortex-A9 cache teardown sequence used before do_boot(). FSBL hands off + * with MMU+L1+L2 enabled; we clean and disable them so the next stage sees + * a deterministic CPU state. Order follows ARM ARM B2.2.5: clean D-cache, + * disable MMU, invalidate I-cache, ISB. */ +static inline void z7_dsb(void) { __asm__ volatile("dsb sy" ::: "memory"); } +static inline void z7_isb(void) { __asm__ volatile("isb sy" ::: "memory"); } + +static void z7_l1_dcache_clean_invalidate_all(void) +{ + /* v7-A clean+invalidate by set/way - iterates the data cache levels in + * CLIDR and walks each (set, way) issuing DCCISW. Adapted from ARMv7-A + * Architecture Reference Manual B2.2.4 example. */ + __asm__ volatile ( + "dmb sy \n" + "mrc p15, 1, r0, c0, c0, 1 \n" /* CLIDR */ + "ands r3, r0, #0x07000000 \n" + "mov r3, r3, lsr #23 \n" + "beq 5f \n" + "mov r10, #0 \n" + "1: \n" + "add r2, r10, r10, lsr #1 \n" + "mov r1, r0, lsr r2 \n" + "and r1, r1, #7 \n" + "cmp r1, #2 \n" + "blt 4f \n" + "mcr p15, 2, r10, c0, c0, 0\n" /* CSSELR */ + "isb \n" + "mrc p15, 1, r1, c0, c0, 0 \n" /* CCSIDR */ + "and r2, r1, #7 \n" + "add r2, r2, #4 \n" /* line size */ + "ldr r4, =0x3FF \n" + "ands r4, r4, r1, lsr #3 \n" /* assoc */ + "clz r5, r4 \n" + "ldr r7, =0x7FFF \n" + "ands r7, r7, r1, lsr #13 \n" /* num sets */ + "2: \n" + "mov r9, r4 \n" + "3: \n" + "orr r11, r10, r9, lsl r5 \n" + "orr r11, r11, r7, lsl r2 \n" + "mcr p15, 0, r11, c7, c14, 2 \n" /* DCCISW */ + "subs r9, r9, #1 \n" + "bge 3b \n" + "subs r7, r7, #1 \n" + "bge 2b \n" + "4: \n" + "add r10, r10, #2 \n" + "cmp r3, r10 \n" + "bgt 1b \n" + "5: \n" + "dsb sy \n" + "isb \n" + : + : + : "r0","r1","r2","r3","r4","r5","r7","r9","r10","r11","memory","cc" + ); +} + +static void z7_l1_icache_invalidate_all(void) +{ + /* ICIALLU + branch predictor invalidate */ + __asm__ volatile ( + "mov r0, #0 \n" + "mcr p15, 0, r0, c7, c5, 0 \n" /* ICIALLU */ + "mcr p15, 0, r0, c7, c5, 6 \n" /* BPIALL */ + "dsb sy \n" + "isb \n" + : : : "r0","memory" + ); +} + +static void z7_disable_mmu_and_caches(void) +{ + /* SCTLR: clear M (bit0), C (bit2), I (bit12). Leaves Z (branch predict) + * alone since we cleared BPIALL above. */ + __asm__ volatile ( + "mrc p15, 0, r0, c1, c0, 0 \n" + "bic r0, r0, #(1 << 0) \n" + "bic r0, r0, #(1 << 2) \n" + "bic r0, r0, #(1 << 12) \n" + "mcr p15, 0, r0, c1, c0, 0 \n" + "dsb sy \n" + "isb \n" + : : : "r0","memory" + ); +} + +void hal_prepare_boot(void) +{ + /* Disable IRQ + FIQ */ + __asm__ volatile("cpsid if" ::: "memory"); + z7_dsb(); + z7_l1_dcache_clean_invalidate_all(); + z7_disable_mmu_and_caches(); + z7_l1_icache_invalidate_all(); + z7_isb(); + /* PL310 L2: leave alone for first cut. FSBL on ZC702 typically does + * not enable PL310 unless explicitly configured; if your FSBL does, + * extend this routine with L2x0 clean-invalidate + disable. */ +} + +/* Internal flash operations are no-ops on Zynq-7000: + * QSPI is treated as external flash via ext_flash_*. */ +int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) +{ + (void)address; (void)data; (void)len; + return 0; +} + +int RAMFUNCTION hal_flash_erase(uint32_t address, int len) +{ + (void)address; (void)len; + return 0; +} + +void RAMFUNCTION hal_flash_unlock(void) { } +void RAMFUNCTION hal_flash_lock(void) { } + +#ifdef EXT_FLASH +int ext_flash_read(uintptr_t address, uint8_t *data, int len) +{ + if (len <= 0) + return 0; + if (spi_flash_read((uint32_t)address, data, (unsigned int)len) != 0) + return -1; + return len; /* wolfBoot's update_ram.c expects bytes-read on success */ +} + +int ext_flash_write(uintptr_t address, const uint8_t *data, int len) +{ + /* Split writes on SPI-NOR page boundaries (256 B). */ + uint32_t addr = (uint32_t)address; + unsigned int remain = (unsigned int)((len > 0) ? len : 0); + unsigned int off = 0; + + while (remain > 0) { + unsigned int page_off = addr & (SPI_NOR_PAGE_SIZE - 1U); + unsigned int chunk = SPI_NOR_PAGE_SIZE - page_off; + if (chunk > remain) + chunk = remain; + if (spi_flash_page_program(addr, data + off, chunk) != 0) + return -1; + addr += chunk; + off += chunk; + remain -= chunk; + } + return 0; +} + +int ext_flash_erase(uintptr_t address, int len) +{ + /* Erase whole sectors covering [address, address+len). The caller is + * expected to align to WOLFBOOT_SECTOR_SIZE (= SPI_NOR_SECTOR_SIZE). */ + uint32_t addr = (uint32_t)address; + int remain = len; + while (remain > 0) { + if (spi_flash_sector_erase(addr) != 0) + return -1; + addr += SPI_NOR_SECTOR_SIZE; + remain -= (int)SPI_NOR_SECTOR_SIZE; + } + return 0; +} + +void ext_flash_lock(void) { } +void ext_flash_unlock(void) { } +#endif /* EXT_FLASH */ + +#endif /* TARGET_zynq7000 */ diff --git a/hal/zynq7000.h b/hal/zynq7000.h new file mode 100644 index 0000000000..41997bfff7 --- /dev/null +++ b/hal/zynq7000.h @@ -0,0 +1,186 @@ +/* zynq7000.h + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Xilinx Zynq-7000 (Cortex-A9, ARMv7-A 32-bit) HAL register map. + * Reference: UG585 (Zynq-7000 TRM), UG821 (Zynq-7000 SW Dev Guide). + * Target board: ZC702 Evaluation Kit (XC7Z020). + */ + +#ifndef _ZYNQ7000_H_ +#define _ZYNQ7000_H_ + +#include + +/* DDR memory range (PS DDR3 on ZC702: 1 GB) */ +#define Z7_DDR_BASE 0x00000000UL +#define Z7_DDR_HIGH 0x3FFFFFFFUL + +/* On-chip memory (OCM, 256 KB at high alias when remapped) */ +#define Z7_OCM_BASE 0xFFFC0000UL + +/* SLCR (System Level Control Registers) - UG585 ch.4 */ +#define Z7_SLCR_BASE 0xF8000000UL +#define Z7_SLCR_UNLOCK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x008))) +#define Z7_SLCR_LOCK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x004))) +#define Z7_SLCR_UART_RST (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x228))) +#define Z7_SLCR_LQSPI_RST (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x204))) +#define Z7_SLCR_UART_CLK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x154))) +#define Z7_SLCR_LQSPI_CLK (*((volatile uint32_t*)(Z7_SLCR_BASE + 0x14C))) +#define Z7_SLCR_UNLOCK_KEY 0x0000DF0DUL +#define Z7_SLCR_LOCK_KEY 0x0000767BUL + +/* UART (XUartPs) - UG585 ch.19. Same IP as ZynqMP, different base. */ +#define Z7_UART0_BASE 0xE0000000UL +#define Z7_UART1_BASE 0xE0001000UL + +#if defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 0 + #define DEBUG_UART_BASE Z7_UART0_BASE +#elif defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 1 + #define DEBUG_UART_BASE Z7_UART1_BASE +#endif +#ifndef DEBUG_UART_BASE + /* ZC702 console is wired to UART1 (MIO48/49) */ + #define DEBUG_UART_BASE Z7_UART1_BASE +#endif + +#define Z7_UART_CR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x00))) +#define Z7_UART_MR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x04))) +#define Z7_UART_IDR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x0C))) +#define Z7_UART_ISR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x14))) +#define Z7_UART_BR_GEN (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x18))) +#define Z7_UART_RXTOUT (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x1C))) +#define Z7_UART_RXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x20))) +#define Z7_UART_SR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x2C))) +#define Z7_UART_FIFO (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x30))) +#define Z7_UART_BR_DIV (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x34))) +#define Z7_UART_TXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x44))) + +#define Z7_UART_CR_TX_DIS 0x00000020U +#define Z7_UART_CR_TX_EN 0x00000010U +#define Z7_UART_CR_RX_DIS 0x00000008U +#define Z7_UART_CR_RX_EN 0x00000004U +#define Z7_UART_CR_TXRST 0x00000002U +#define Z7_UART_CR_RXRST 0x00000001U +#define Z7_UART_ISR_MASK 0x00003FFFU +#define Z7_UART_MR_8N1 0x00000020U /* parity none, 8 data, 1 stop */ +#define Z7_UART_SR_TXFULL 0x00000010U +#define Z7_UART_SR_TXEMPTY 0x00000008U + +/* PS UART_REF_CLK on ZC702 is 50 MHz (IO_PLL / 20). + * BR_GEN = ref / (baud * (BR_DIV + 1)). For 115200 with BR_DIV=6 -> BR_GEN=62. + */ +#ifndef UART_CLK_REF + #define UART_CLK_REF 50000000U +#endif +#ifndef DEBUG_UART_BAUD + #define DEBUG_UART_BAUD 115200U + #define DEBUG_UART_DIV 6U +#endif + +/* QSPI controller (XQspiPs - the older "Linear/Static" QSPI on Z7, + * NOT the GQSPI on ZynqMP). UG585 ch.12. */ +#define Z7_QSPI_BASE 0xE000D000UL +#define Z7_QSPI_LINEAR_BASE 0xFC000000UL /* XIP window for linear-mode reads */ + +#define Z7_QSPI_CR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x00))) +#define Z7_QSPI_ISR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x04))) +#define Z7_QSPI_IER (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x08))) +#define Z7_QSPI_IDR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x0C))) +#define Z7_QSPI_IMR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x10))) +#define Z7_QSPI_EN (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x14))) +#define Z7_QSPI_DELAY (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x18))) +#define Z7_QSPI_TXD0 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x1C))) +#define Z7_QSPI_RXD (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x20))) +#define Z7_QSPI_SICR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x24))) +#define Z7_QSPI_TXTHR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x28))) +#define Z7_QSPI_RXTHR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x2C))) +#define Z7_QSPI_GPIO (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x30))) +#define Z7_QSPI_LPBK (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x38))) +#define Z7_QSPI_TXD1 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x80))) +#define Z7_QSPI_TXD2 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x84))) +#define Z7_QSPI_TXD3 (*((volatile uint32_t*)(Z7_QSPI_BASE + 0x88))) +#define Z7_QSPI_LQSPI_CR (*((volatile uint32_t*)(Z7_QSPI_BASE + 0xA0))) +#define Z7_QSPI_LQSPI_STS (*((volatile uint32_t*)(Z7_QSPI_BASE + 0xA4))) +#define Z7_QSPI_MODID (*((volatile uint32_t*)(Z7_QSPI_BASE + 0xFC))) + +/* QSPI Config Register (CR) bits. + * PCS is a 4-bit slave-select decode field [13:10]: 0xF = all CS deasserted, + * 0xE = CS0 active. We mask the whole 4-bit field, not just bit 10. + */ +#define Z7_QSPI_CR_IFMODE 0x80000000U /* flash mem interface mode */ +#define Z7_QSPI_CR_HOLD_B 0x00080000U /* drive HOLD high */ +#define Z7_QSPI_CR_MANSTRT 0x00010000U /* manual start command (kick) */ +#define Z7_QSPI_CR_MANSTRTEN 0x00008000U /* manual start enable */ +#define Z7_QSPI_CR_SSFORCE 0x00004000U /* manual CS control */ +#define Z7_QSPI_CR_PCS_MASK 0x00003C00U /* PCS field [13:10] */ +#define Z7_QSPI_CR_PCS_NONE 0x00003C00U /* all CS deasserted (0xF<<10) */ +#define Z7_QSPI_CR_PCS_CS0 0x00003800U /* CS0 asserted (0xE<<10) */ +#define Z7_QSPI_CR_REF_CLK 0x00000100U +#define Z7_QSPI_CR_FIFO_WIDTH 0x000000C0U /* must be 11 (32-bit) */ +#define Z7_QSPI_CR_BAUD_DIV_MSK 0x00000038U +/* BAUDDIV field is value N in bits[5:3]; clock = ref_clk / 2^(N+1). + * N=1 -> /4, N=2 -> /8, N=3 -> /16. */ +#define Z7_QSPI_CR_BAUD_DIV_4 0x00000008U /* /4 (BAUDDIV=1) */ +#define Z7_QSPI_CR_BAUD_DIV_8 0x00000010U /* /8 (BAUDDIV=2) */ +#define Z7_QSPI_CR_BAUD_DIV_16 0x00000018U /* /16 (BAUDDIV=3) */ +#define Z7_QSPI_CR_CPHA 0x00000004U +#define Z7_QSPI_CR_CPOL 0x00000002U +#define Z7_QSPI_CR_MSTREN 0x00000001U + +/* QSPI Interrupt Status Register (ISR) bits */ +#define Z7_QSPI_ISR_TXUF 0x00000040U /* TX underflow */ +#define Z7_QSPI_ISR_RXFULL 0x00000020U /* RX FIFO full */ +#define Z7_QSPI_ISR_RXNEMPTY 0x00000010U /* RX FIFO not empty */ +#define Z7_QSPI_ISR_TXFULL 0x00000008U /* TX FIFO full */ +#define Z7_QSPI_ISR_TXNFULL 0x00000004U /* TX FIFO threshold */ +#define Z7_QSPI_ISR_RXOVR 0x00000001U /* RX overrun */ +#define Z7_QSPI_ISR_MASK 0x0000007DU + +#define Z7_QSPI_EN_VAL 0x00000001U /* enable controller */ + +/* SLCR clock/reset for QSPI (FSBL normally pre-configures these) */ +#define Z7_SLCR_LQSPI_CLK_DIV_MSK 0x00003F00U +#define Z7_SLCR_LQSPI_CLK_DIV_5 0x00000500U +#define Z7_SLCR_LQSPI_CLK_SRCSEL_M 0x00000030U +#define Z7_SLCR_LQSPI_CLK_CLKACT0 0x00000001U +#define Z7_SLCR_LQSPI_RST_REF 0x00000002U +#define Z7_SLCR_LQSPI_RST_CPU 0x00000001U + +/* SDIO (Arasan SDHCI v2.0). UG585 ch.10. Filled out in Milestone 6. */ +#define Z7_SDIO0_BASE 0xE0100000UL +#define Z7_SDIO1_BASE 0xE0101000UL + +/* DevC (Device Configuration: AES + bitstream loader). UG585 ch.6. */ +#define Z7_DEVC_BASE 0xF8007000UL + +/* GIC (PL390 / GIC-400 v1) - per-CPU interface and distributor. */ +#define Z7_GIC_CPUIF_BASE 0xF8F00100UL +#define Z7_GIC_DIST_BASE 0xF8F01000UL + +/* PL310 L2 cache controller. UG585 ch.3. */ +#define Z7_PL310_BASE 0xF8F02000UL + +/* SCU + private timer/watchdog. UG585 ch.3. */ +#define Z7_SCU_BASE 0xF8F00000UL +#define Z7_GTIMER_BASE 0xF8F00200UL +#define Z7_PTIMER_BASE 0xF8F00600UL + +#endif /* _ZYNQ7000_H_ */ diff --git a/hal/zynq7000.ld b/hal/zynq7000.ld new file mode 100644 index 0000000000..aeca136809 --- /dev/null +++ b/hal/zynq7000.ld @@ -0,0 +1,62 @@ +OUTPUT_FORMAT("elf32-littlearm") +OUTPUT_ARCH(arm) + +/* wolfBoot is loaded by Xilinx FSBL into DDR at 0x04000000. + * Reserve 1 MB for code/data/bss/stack. */ +MEMORY +{ + DDR_MEM(rwx): ORIGIN = 0x04000000, LENGTH = 0x00100000 +} + +ENTRY(reset_vector_entry) + +SECTIONS +{ + .text : { + _start_text = .; + KEEP(*(start)) + *(.text) + *(.text.*) + *(.rodata) + *(.rodata*) + . = ALIGN(4); + *(.glue_7) + . = ALIGN(4); + *(.eh_frame) + . = ALIGN(4); + _end_text = .; + } > DDR_MEM + + . = ALIGN(4); + .dummy : { + _edummy = .; + } > DDR_MEM + + .data : AT (LOADADDR(.dummy)) { + _start_data = .; + *(.vectors) + *(.data) + *(.data.*) + _end_data = .; + } > DDR_MEM + + .bss (NOLOAD) : { + . = ALIGN(4); + _start_bss = .; + *(.bss) + *(.bss.*) + *(COMMON) + _end_bss = .; + _end = .; + } > DDR_MEM +} + +kernel_addr = 0x00100000; +update_addr = 0x00700000; +dts_addr = 0x00000000; + +_romsize = _end_data - _start_text; +_sramsize = _end_bss - _start_text; +END_STACK = _start_text; +_stack_top = ORIGIN(DDR_MEM) + LENGTH(DDR_MEM); +end = .; diff --git a/src/boot_arm32.c b/src/boot_arm32.c index dedd1e4375..256e065806 100644 --- a/src/boot_arm32.c +++ b/src/boot_arm32.c @@ -68,13 +68,13 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset) /* Move the dts pointer to r5 (as first argument) */ asm volatile("mov r5, %0" : : "r"(dts_offset)); #else - asm volatile("mov r5, 0"); + asm volatile("mov r5, #0"); #endif /* Zero registers r1, r2, r3 */ - asm volatile("mov r3, 0"); - asm volatile("mov r2, 0"); - asm volatile("mov r1, 0"); + asm volatile("mov r3, #0"); + asm volatile("mov r2, #0"); + asm volatile("mov r1, #0"); /* Move the dts pointer to r0 (as first argument) */ asm volatile("mov r0, r5"); diff --git a/src/boot_zynq7000_start.S b/src/boot_zynq7000_start.S new file mode 100644 index 0000000000..f98bcb8d10 --- /dev/null +++ b/src/boot_zynq7000_start.S @@ -0,0 +1,208 @@ +/* boot_zynq7000_start.S + * + * Cortex-A9 (ARMv7-A 32-bit) startup for Xilinx Zynq-7000. + * + * Replaces src/boot_arm32_start.S for the zynq7000 target. Performs the + * minimum CPU setup that every Zynq-7000 standalone image needs before + * running C code, modeled after the Xilinx standalone BSP boot.S + * (data/embeddedsw/lib/bsp/standalone_v9_4/src/arm/cortexa9/gcc/boot.S). + * + * 1. mask IRQ + FIQ + * 2. set VBAR to wolfBoot's vector table (so aborts route to us, not + * to whatever FSBL/BootROM left vectors pointing at) + * 3. disable MMU, L1 I/D caches (we run flat, uncached, deterministic) + * 4. invalidate TLB, I-cache, branch predictor, L1 D-cache by set/way + * 5. set up stack pointer for SVC/IRQ/FIQ/ABT/UND modes + * 6. clear .bss, jump to main() + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + + .arm + .section start, "ax" + + .globl reset_vector_entry + .align 4 +reset_vector_entry: +_vector_table: + b isr_reset /* 0x00 reset */ + b isr_undef /* 0x04 undefined */ + b isr_swi /* 0x08 swi */ + b isr_pabt /* 0x0C prefetch abort */ + b isr_dabt /* 0x10 data abort */ + .word _romsize /* 0x14 (size word, kept for FSBL header parity) */ + b isr_irq /* 0x18 IRQ */ + b isr_fiq /* 0x1C FIQ */ + +isr_undef: b isr_undef +isr_swi: b isr_swi +isr_pabt: b isr_pabt +isr_dabt: b isr_dabt +isr_irq: b isr_irq +isr_fiq: b isr_fiq + +isr_reset: + /* 1. Mask IRQ + FIQ, force SVC mode */ + cpsid if + mrs r0, cpsr + bic r0, r0, #0x1f + orr r0, r0, #0x13 /* SVC mode */ + msr cpsr_c, r0 + + /* 2. Set VBAR to our vector table (the load address). */ + ldr r0, =_vector_table + mcr p15, 0, r0, c12, c0, 0 + /* Adjust SCTLR: keep MMU (M) ON to inherit FSBL's flat DDR mapping - + * disabling MMU would treat all memory as Strongly-Ordered, causing + * unaligned LDR/STR (e.g. newlib memcpy from 2-byte aligned strings) + * to fault. Clear A (alignment check), V (high vectors), and the + * cache enables for deterministic behavior. */ + mrc p15, 0, r1, c1, c0, 0 + bic r1, r1, #(1 << 13) /* V bit (high vectors) */ + bic r1, r1, #(1 << 1) /* A bit (alignment fault check) */ + bic r1, r1, #(1 << 2) /* C bit (D-cache) */ + bic r1, r1, #(1 << 12) /* I bit (I-cache) */ + mcr p15, 0, r1, c1, c0, 0 + dsb + isb + + /* 4. Invalidate TLB, I-cache, branch predictor. Leave D-cache alone - + * FSBL left it consistent with DDR; cleaning by set/way here is risky + * since 'dirty' lines from FSBL must be cleaned, not just invalidated. */ + mov r0, #0 + mcr p15, 0, r0, c8, c7, 0 /* TLBIALL */ + mcr p15, 0, r0, c7, c5, 0 /* ICIALLU */ + mcr p15, 0, r0, c7, c5, 6 /* BPIALL */ + dsb + isb + + /* 5. Set stack pointers for IRQ/FIQ/ABT/UND/SVC modes. + * Each mode gets a 1 KB slice carved out from below _stack_top: + * _stack_top <- top + * 0x000 SVC (sys/usr) - main wolfBoot stack (largest) + * 0x800 IRQ + * 0xC00 FIQ + * 0x1000 ABT + * 0x1400 UND + * 0x1800 reserved + */ + mrs r0, cpsr + bic r0, r0, #0x1f + + orr r1, r0, #0x12 /* IRQ */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0x800) + + orr r1, r0, #0x11 /* FIQ */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0xC00) + + orr r1, r0, #0x17 /* ABT */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0x1000) + + orr r1, r0, #0x1b /* UND */ + msr cpsr_c, r1 + ldr sp, =(_stack_top - 0x1400) + + orr r1, r0, #0x13 /* SVC (where main runs) */ + msr cpsr_c, r1 + ldr sp, =_stack_top + + /* Save BootROM r4 (unused now but kept for parity with arm32 path). */ + push {r4} + + /* 6. Copy .data section (LMA -> VMA). LMA == VMA in our linker + * script, so this loop is a no-op, but keep the symmetry. */ + ldr r2, =_lp_data + ldmia r2, {r1, r3, r4} +1: cmp r3, r4 + ldrcc r2, [r1], #4 + strcc r2, [r3], #4 + bcc 1b + + /* Zero .bss */ + adr r2, _lp_bss + ldmia r2, {r3, r4} + mov r2, #0 +1: cmp r3, r4 + strcc r2, [r3], #4 + bcc 1b + + /* Enable async-abort delivery so we get an exception now, not later */ + mrs r0, cpsr + bic r0, r0, #(1 << 8) /* clear A bit -> async aborts unmasked */ + msr cpsr_xsf, r0 + + /* Jump to main(). */ + ldr r4, =main + mov lr, pc + bx r4 + +_panic: + b _panic + +/* L1 D-cache clean+invalidate by set/way - ARMv7-A canonical sequence. */ +_z7_invalidate_dcache_all: + push {r4-r11, lr} + dmb sy + mrc p15, 1, r0, c0, c0, 1 /* CLIDR */ + ands r3, r0, #0x07000000 + mov r3, r3, lsr #23 + beq 2f + mov r10, #0 +1: add r2, r10, r10, lsr #1 + mov r1, r0, lsr r2 + and r1, r1, #7 + cmp r1, #2 + blt 1f + mcr p15, 2, r10, c0, c0, 0 /* CSSELR */ + isb + mrc p15, 1, r1, c0, c0, 0 /* CCSIDR */ + and r2, r1, #7 + add r2, r2, #4 /* line size offset */ + ldr r4, =0x3FF + ands r4, r4, r1, lsr #3 /* assoc */ + clz r5, r4 + ldr r7, =0x7FFF + ands r7, r7, r1, lsr #13 /* num sets */ +3: mov r9, r4 +4: orr r11, r10, r9, lsl r5 + orr r11, r11, r7, lsl r2 + mcr p15, 0, r11, c7, c6, 2 /* DCISW */ + subs r9, r9, #1 + bge 4b + subs r7, r7, #1 + bge 3b +1: add r10, r10, #2 + cmp r3, r10 + bgt 1b +2: mov r10, #0 + mcr p15, 2, r10, c0, c0, 0 /* CSSELR */ + dsb sy + isb + pop {r4-r11, pc} + + .align +_lp_data: + .word _start_data + .word _end_data +_lp_bss: + .word _start_bss + .word _end_bss diff --git a/test-app/ARM-zynq7000.ld b/test-app/ARM-zynq7000.ld new file mode 100644 index 0000000000..363d0cf6c4 --- /dev/null +++ b/test-app/ARM-zynq7000.ld @@ -0,0 +1,52 @@ +OUTPUT_FORMAT("elf32-littlearm") +OUTPUT_ARCH(arm) + +/* App is staged by wolfBoot to DDR at WOLFBOOT_LOAD_ADDRESS=0x10000000. + * Stack carved out from the half-MB region just above. */ +MEMORY +{ + DDR_MEM(rwx) : ORIGIN = 0x10000000, LENGTH = 0x00080000 /* 512 KB code/data/bss */ + STACK_MEM(rw) : ORIGIN = 0x10080000, LENGTH = 0x00080000 /* 512 KB stack */ +} + +ENTRY(reset_vector_entry) +SECTIONS +{ + .text : AT (ORIGIN(DDR_MEM)) { + _start_text = .; + *(.iv) + *(.text) + *(.rodata) + *(.rodata*) + . = ALIGN(4); + *(.glue_7) + . = ALIGN(4); + *(.eh_frame) + . = ALIGN(4); + _end_text = .; + } + + . = ALIGN(4); + .dummy : { + _edummy = .; + } + + .data : AT (LOADADDR(.dummy)) { + _start_data = .; + *(.vectors) + *(.data) + _end_data = .; + } + + .bss (NOLOAD) : { + . = ALIGN(4); + _start_bss = .; + *(.bss) + _end_bss = .; + } +} +_romsize = _end_data - _start_text; +_sramsize = _end_bss - _start_text; +END_STACK = _start_text; +_stack_top = ORIGIN(STACK_MEM) + LENGTH(STACK_MEM); +end = .; diff --git a/test-app/Makefile b/test-app/Makefile index e5b06108c3..17868942dc 100644 --- a/test-app/Makefile +++ b/test-app/Makefile @@ -413,6 +413,11 @@ ifeq ($(TARGET),sama5d3) LSCRIPT_TEMPLATE:=$(ARCH)-$(TARGET).ld endif +ifeq ($(TARGET),zynq7000) + APP_OBJS+=./boot_arm32_start.o + LSCRIPT_TEMPLATE:=$(ARCH)-$(TARGET).ld +endif + ifeq ($(TARGET),stm32l4) APP_OBJS+=$(STM32CUBE)/Drivers/STM32L4xx_HAL_Driver/Src/stm32l4xx_hal_flash.o APP_OBJS+=$(STM32CUBE)/Drivers/STM32L4xx_HAL_Driver/Src/stm32l4xx_hal_flash_ex.o diff --git a/test-app/app_zynq7000.c b/test-app/app_zynq7000.c new file mode 100644 index 0000000000..5b765ad566 --- /dev/null +++ b/test-app/app_zynq7000.c @@ -0,0 +1,67 @@ +/* app_zynq7000.c + * + * Bare-metal Cortex-A9 test app for the Zynq-7000 ZC702. Prints a banner + * on UART1 and a heartbeat character so the user can see do_boot() landed. + * + * Copyright (C) 2026 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#include + +#ifdef TARGET_zynq7000 + +#define UART1_FIFO (*(volatile uint32_t*)0xE0001030U) +#define UART1_SR (*(volatile uint32_t*)0xE000102CU) +#define UART_SR_TXFULL 0x10U +#define UART_SR_TXEMPTY 0x08U + +static void uart_putc(char c) +{ + while (UART1_SR & UART_SR_TXFULL) + ; + UART1_FIFO = (uint32_t)(uint8_t)c; +} + +static void uart_puts(const char *s) +{ + while (*s) { + if (*s == '\n') + uart_putc('\r'); + uart_putc(*s++); + } +} + +static void delay(volatile uint32_t n) +{ + while (n--) { + __asm__ volatile("nop"); + } +} + +void main(void) +{ + uart_puts("\n=== ZC702 test-app: BOOT OK ===\n"); + uart_puts("wolfBoot verified + chain-loaded this image\n"); + while (1) { + uart_putc('.'); + delay(2000000); + } +} + +#endif /* TARGET_zynq7000 */ diff --git a/tools/scripts/zc702/jtag_load.tcl b/tools/scripts/zc702/jtag_load.tcl new file mode 100644 index 0000000000..f359d56f34 --- /dev/null +++ b/tools/scripts/zc702/jtag_load.tcl @@ -0,0 +1,76 @@ +# jtag_load.tcl - load wolfboot.elf onto a ZC702 via Xilinx Platform Cable II. +# +# Uses the prebuilt Zynq-7000 FSBL (zynq_fsbl.elf) to bring DDR / MIO / +# clocks / UART up, then loads wolfboot.elf over the top and starts it. +# +# Usage: +# source /opt/Xilinx/2025.2/Vitis/settings64.sh +# xsdb tools/scripts/zc702/jtag_load.tcl +# +# Set the JTAG boot mode straps on the ZC702 (SW16 = all OFF) before use. +# After this script runs the board may need a power-cycle to recover the +# CPU into a JTAG-loadable state again. +# +# Override paths via env: +# FSBL_ELF=... FSBL ELF path +# WOLFBOOT_ELF=... wolfboot ELF path + +set fsbl_default "$::env(HOME)/GitHub/soc-prebuilt-firmware/zc702-zynq/zynq_fsbl.elf" +set wolfboot_default "[file dirname [info script]]/../../../wolfboot.elf" + +if {[info exists ::env(FSBL_ELF)]} { set fsbl_elf $::env(FSBL_ELF) } \ + else { set fsbl_elf $fsbl_default } +if {[info exists ::env(WOLFBOOT_ELF)]} { set wolfboot_elf $::env(WOLFBOOT_ELF) } \ + else { set wolfboot_elf $wolfboot_default } + +if {![file exists $fsbl_elf]} { + puts "ERROR: FSBL not found at $fsbl_elf" + puts "Clone wolfSSL/soc-prebuilt-firmware next to wolfboot or set FSBL_ELF." + exit 1 +} +if {![file exists $wolfboot_elf]} { + puts "ERROR: wolfboot.elf not found at $wolfboot_elf" + exit 1 +} + +connect + +# Sometimes the chain comes up empty if the previous run left the CPU in an +# off-chain state (e.g. WFI with clock gated). Retry the target lookup. +for {set i 0} {$i < 5} {incr i} { + catch {targets -set -filter {name =~ "ARM Cortex-A9 MPCore #0"}} rc + if {[string first "no targets" $rc] < 0} { break } + puts "Cortex-A9 not on chain yet, retry $i ..." + after 500 +} +if {[string first "no targets" $rc] >= 0} { + puts "ERROR: no Cortex-A9 targets visible after retries." + puts "Power-cycle the ZC702 (SW10) and try again." + exit 1 +} + +# Full PS reset, then wait for BootROM to enter JTAG-mode poll loop. +rst -system +after 1500 +targets -set -filter {name =~ "ARM Cortex-A9 MPCore #0"} + +# Run FSBL to completion. It does ps7_init (DDR/MIO/clocks/UART), then +# parks itself since no bundled second-stage exists. 2-3s is plenty. +puts "Loading FSBL: $fsbl_elf" +dow $fsbl_elf +con +after 3000 + +# Stop where FSBL parked, but do NOT rst -processor here - that would drop +# us back into BootROM and lose FSBL's PS state. +stop + +# Load wolfBoot at its DDR address. xsdb's `dow` does NOT consistently set +# PC after a second target dow, so set PC and CPSR explicitly. +puts "Loading wolfBoot: $wolfboot_elf" +dow $wolfboot_elf +rwr pc 0x04000000 +rwr cpsr 0xD3 ;# SVC mode, IRQ+FIQ masked + +puts "Resuming - watch UART1 (115200 8N1) for the wolfBoot banner." +con diff --git a/tools/scripts/zc702/zc702_qspi.bif b/tools/scripts/zc702/zc702_qspi.bif new file mode 100644 index 0000000000..cfb730a556 --- /dev/null +++ b/tools/scripts/zc702/zc702_qspi.bif @@ -0,0 +1,19 @@ +// bootgen image descriptor for ZC702 QSPI boot. +// +// Pairs the prebuilt Zynq-7000 FSBL from +// ${PREBUILT_DIR}/zynq_fsbl.elf (default ../soc-prebuilt-firmware/zc702-zynq) +// with wolfboot.elf produced by `make TARGET=zynq7000`. +// +// Usage: +// PREBUILT_DIR=$HOME/GitHub/soc-prebuilt-firmware/zc702-zynq \ +// cp ${PREBUILT_DIR}/zynq_fsbl.elf . +// bootgen -arch zynq -image tools/scripts/zc702/zc702_qspi.bif -w -o BOOT.BIN +// +// Then program BOOT.BIN to QSPI offset 0 with `program_flash` (Vitis) or +// Vivado Hardware Manager. Set ZC702 SW16 to QSPI boot and power-cycle. + +the_ROM_image: +{ + [bootloader] zynq_fsbl.elf + wolfboot.elf +}