Graeme Winter

PIO debugging

Debugging PIO programs is hard, particularly from Python, particularly when you have a complex problem to solve like building a motor control and encoder readback system. Therefore, a useful intermediate step is to build a debugger running on the rp2040 to allow stepping through the program one instruction at a time, printing the instructions as you go.

Instruction disassembly

PIO instructions are written as up to uint16_t records, which in the µPython PIO implementation are filled from the highest address (i.e. the last instruction is written at the position 0x1f.) There is not a way to read from the program registers, but current instructions can be read from registers for each SM, along with the program counter:

from machine import mem32

PIO0_BASE = 0x50200000
SM0_ADDR = PIO0_BASE | 0xD4
SM0_INSTR = PIO0_BASE | 0xD8

print(hex(mem32[SM0_INSTR]))

This will print the currently executing instruction, as a number. Printing the equivalent assembly involves essentially unrolling §3.4 of the rp2040 data sheet which defines the instruction encoding. This is boring but necessary work which has been done by multiple people:

# 3 MSB [15-13] key instruction, where push-pull is separated by bit [7] (0-1)
INSTRUCTIONS = ["jmp", "wait", "in", "out", "pushpull", "mov", "irq", "set"]

# tables for various bit fields
JMP_CND = ["", "!x", "x--", "!y", "y--", "x!=y", "pin", "!orse"]
WAIT_SRC = ["gpio", "pin", "irq", "reserved"]
IN_SRC = ["pins", "x", "y", "null", "reserved", "reserved", "isr", "osr"]
OUT_DST = ["pins", "x", "y", "null", "pindirs", "pc", "isr", "exec"]
MOV_DST = ["pins", "x", "y", "reserved", "exec", "pc", "isr", "osr"]
MOV_OP = ["", "!", "::", "reserved"]
MOV_SRC = ["pins", "x", "y", "null", "reserved", "status", "isr", "osr"]
SET_DST = ["pins", "x", "y", "reserved", "pindirs", "reserved", "reserved", "reserved"]

# parsing code for individual instructions


def _jmp(instr):
    cnd = JMP_CND[(instr >> 5) & 0b111]
    addr = instr & 0b11111
    if cnd:
        return f"jmp {cnd} {hex(addr)}"
    return f"jmp {hex(addr)}"


def _wait(instr):
    idx = instr & 0b11111
    pol = instr >> 7 & 0b1
    src = WAIT_SRC[instr >> 5 & 0b11]
    return f"wait {pol} {src} {idx}"


def _in(instr):
    src = IN_SRC[(instr >> 5) & 0b111]
    bits = instr & 0b11111
    if bits == 0:
        bits = 32
    return f"in {src} {bits}"


def _out(instr):
    dst = OUT_DST[(instr >> 5) & 0b111]
    bits = instr & 0b11111
    if bits == 0:
        bits = 32
    return f"out {dst} {bits}"


def _pull(instr):
    res = "pull "
    if (instr >> 6) & 0b1:
        res += "ifempty "
    if (instr >> 5) & 0b1:
        res += "block"
    else:
        res += "noblock"
    return res


def _push(instr):
    res = "push "
    if (instr >> 6) & 0b1:
        res += "iffull "
    if (instr >> 5) & 0b1:
        res += "block"
    else:
        res += "noblock"
    return res


def _pushpull(instr):
    if (instr >> 7) & 0b1:
        return _pull(instr)
    else:
        return _push(instr)


def _mov(instr):
    src = MOV_SRC[instr & 0b111]
    op = MOV_OP[(instr >> 3) & 0b11]
    dst = MOV_DST[(instr >> 5) & 0b111]

    return f"mov {dst} {op}{src}"


def _irq(instr):
    idx = instr & 0b11111
    wait = (instr >> 5) & 0b1
    clr = (instr >> 6) & 0b1

    # no support for rel
    assert not idx & 0x10

    res = "irq "
    if wait:
        res += "wait "
    if clr:
        res += "clear "
    res += f"{idx}"

    return res


def _set(instr):
    val = instr & 0b11111
    dst = SET_DST[(instr >> 5) & 0b111]
    return f"set {dst} {val}"


interps = [_jmp, _wait, _in, _out, _pushpull, _mov, _irq, _set]


def _sideset(instr, sideset, sideset_en):
    dss = (instr >> 8) & 0b11111

    # defaults widths for sideset, delay
    s = 0
    d = 5

    if sideset > 0:
        s = sideset
        if sideset_en:
            s += 1
        d -= s

    # masks to parse dss bits
    d_mask = (1 << d) - 1

    delay = dss & d_mask

    # Parse sideset
    s_en = 0
    if sideset_en:
        s_en = (dss >> 4) & 0b1
        dss &= 0b01111
    else:
        if (dss >> d) > 0:
            s_en = 1

    sideset = dss >> d

    res = ""
    if s_en:
        res += "side " + str(sideset)

    if delay > 0:
        if len(res) > 0:
            res += " "
        res += "[" + str(delay) + "]"

    return res


def disasm(instr, sideset, sideset_en):
    i_code = (instr >> 13) & 0b111
    result = interps[i_code](instr)
    sideset = _sideset(instr, sideset, sideset_en)
    return result, sideset

This depends on the sideset configuration to properly decode the instructions, as the same bits are used as a delay if the sideset is not configured. In a µPython environment the value of sideset here is len(sideset_init=(rp2.PIO.OUT_LOW, rp2.PIO.OUT_LOW)). Essentially the program consists of a collection of lists which are indexed by the bit patterns, so we know instruction starting 010 is in: the sources etc. are also defined in a similar manner.

Using this is a slightly different matter.

Debugging

The essence of debugging is tron i.e. trace on, tracing what instructions the computer is performing - in a proper debugger you can also inspect the registers etc. but that is work for another day (for a PIO it is perfectly possible but annoying work). The key to this debugger is that you never start the PIO in the main loop: we do that from some inline assembly for just long enough to tick one instruction. Since a memory write on Cortex M0+ takes two ticks the PIO clock divider needs to be 2.

from machine import mem32, Pin
import rp2

from disasm import disasm

PIO0_BASE = 0x50200000
SM0_ADDR = PIO0_BASE | 0xD4
SM0_INSTR = PIO0_BASE | 0xD8

pins = [Pin(j, Pin.OUT) for j in (0, 1, 25)]

@micropython.asm_thumb
def advance(r0):
    mov(r1, 0xF)
    mov(r2, 0x0)
    str(r1, [r0, 0])
    str(r2, [r0, 0])

@rp2.asm_pio(set_init=rp2.PIO.OUT_LOW,
             sideset_init=(rp2.PIO.OUT_LOW, rp2.PIO.OUT_LOW))
def junk():
    label("start")
    nop().side(0)
    nop().side(1)
    nop().side(2)
    nop().side(3)
    set(pins, 1)
    nop().side(0)
    nop().side(1)
    nop().side(2)
    nop().side(3)
    set(pins, 0)
    jmp("start")

sm = rp2.StateMachine(0, junk, freq=62_500_000, set_base=pins[2], sideset_base=pins[0])

# reset all the clocks
mem32[PIO0_BASE] = 0xFF0

print("Cycle   addr    assm")

for j in range(0x20):
    i = mem32[SM0_INSTR]
    a = mem32[SM0_ADDR]
    instr, side = disasm(i, 2, True)
    line = instr
    if side:
        line += f" {side}"
    print(f"0x{j:04x} [0x{a:02x}] {line}")
    advance(PIO0_BASE)

This will step through the PIO program for 32 cycles, printing the cycle number, address and instruction being executed viz:

Cycle   addr    assm
0x0000 [0x15] mov y y side 0
0x0001 [0x16] mov y y side 1
0x0002 [0x17] mov y y side 2
0x0003 [0x18] mov y y side 3
0x0004 [0x19] set pins 1
0x0005 [0x1a] mov y y side 0
0x0006 [0x1b] mov y y side 1
0x0007 [0x1c] mov y y side 2
0x0008 [0x1d] mov y y side 3
0x0009 [0x1e] set pins 0
0x000a [0x1f] jmp 0x15
0x000b [0x15] mov y y side 0
0x000c [0x16] mov y y side 1
0x000d [0x17] mov y y side 2
0x000e [0x18] mov y y side 3
0x000f [0x19] set pins 1
0x0010 [0x1a] mov y y side 0
0x0011 [0x1b] mov y y side 1
0x0012 [0x1c] mov y y side 2
0x0013 [0x1d] mov y y side 3
0x0014 [0x1e] set pins 0
0x0015 [0x1f] jmp 0x15
0x0016 [0x15] mov y y side 0
0x0017 [0x16] mov y y side 1
0x0018 [0x17] mov y y side 2
0x0019 [0x18] mov y y side 3
0x001a [0x19] set pins 1
0x001b [0x1a] mov y y side 0
0x001c [0x1b] mov y y side 1
0x001d [0x1c] mov y y side 2
0x001e [0x1d] mov y y side 3
0x001f [0x1e] set pins 0

This should be helpful when debugging a less trivial PIO program…