; This is designed to be serial-line downloaded to cdcode. ; ; Our memory map: ; ; [8c000000,8c010000) Stack (r15 set by cdcode) ; [8c010000,8c01????) cdcode ; [8c020000,8c02????) Us ; This code is heavy on the magic numbers. All the other BBA-driving ; code I've found is at least as bad. :-( .include "regs.s" .include "maple-bits.s" ; Layout within the gapspci bridge bounce buffer area. . = 0x01840000 rxbuff_bounce: .space 16384 + 16 txbuff_bounce: .space 2000 . = 0x8c020000 .sz any .pr any SETS.L #main,r0 jmp @r0 nop SETCONST ; Our "data segment". MIN_ETHER_LEN = 32 ; I think 64 is the actual value, not sure MAX_ETHER_LEN = 1600 .align 4 rxcfg: .space 4 top_sp: .space 4 ipkt: .space 4+MAX_ETHER_LEN opkt_0: .space 4+MAX_ETHER_LEN opkt_1: .space 4+MAX_ETHER_LEN opkt_2: .space 4+MAX_ETHER_LEN opkt_3: .space 4+MAX_ETHER_LEN opkt_busy: .space 1 MAC: .space 6 ; End of "data segment". ; Our "text segment". ; The only things startup.s sets up that cdcode hasn't already done for ; us are (1) fpscr and (2) clearing bss. We don't have bss because we ; aren't linked by a conventional linker, and we don't use floating ; point so we don't care about fpscr. We do, however, need to set up ; the VBR. We also save r10-r14 against returning to cdcode. .align 2 main: mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 ldc r14,gbr stc sr,r1 SETS.L #~[SR_FD|SR_RB|SR_BL],r2 and r2,r1 ldc r1,sr ; Note that r0-r7 may have just changed if we switched banks. mov #0,r1 lds r1,fpscr .sz 0 .pr 0 SETS.L #intvec,r0 ldc r0,vbr ; Real code begins here. ; Make sure all interrupts are blocked. stc sr,r0 or #0xf0,r0 ldc r0,sr ; Set up the PCI bridge. bsr init_gapspci nop bt fail ; Initialize the Ethernet itself. bsr init_ether nop bt fail bsr await_link nop bsr init_ether nop bt fail bsr report_mac nop bsr await_link nop ; bsr enable_interrupts ; nop SETS.L #top_sp,r0 mov.l r15,@r0 bsr init_data nop 1: bsr check_recv_packet nop bsr check_serial nop bra 1b nop failmsg: .asciz "Setup failed"(13,10) .align 2 fail: SETS.L #failmsg,r1 bsr putstr nop done: bsr putchar mov #13,r1 bsr putchar mov #10,r1 SETS.L #top_sp,r0 mov.l @r0,r15 mov.l @r15+,r10 mov.l @r15+,r11 lds r11,pr mov.l @r15+,r12 mov.l @r15+,r13 mov.l @r15+,r14 rts nop report_mac: sts pr,r4 SETS.L #5,r3 SETS.L #MAC,r2 2: mov.b @r2+,r1 bsr printhexN mov #2,r0 bsr putchar mov #':,r1 1: dt r3 bf 2b mov.b @r2+,r1 bsr printhexN mov #2,r0 bsr putchar mov #13,r1 lds r4,pr bra putchar mov #10,r1 g2_lock: ; This is conceptually what NetBSD and libronin call G2_LOCK(). ; It returns a cookie in r0 which needs to be passed to ; g2_unlock. ; ; This saves and restores all other registers it uses. ; mov.l r1,@-r15 stc sr,r0 mov.l r0,@-r15 .if @IS_UB[SR_IMASK_MASK<,r1 bsr putchar mov #'.,r1 mov.l @r15+,r0 sub r3,r0 and r2,r0 mov r0,r4 bsr putchar mov #'[,r1 bsr printdec mov r3,r1 bsr putchar mov #',,r1 bsr printdec mov r4,r1 bsr putchar mov #'],r1 bsr putchar mov #'.,r1 ; In the code below: ; r0 = scratch ; r1 = scratch ; r2 = buffersize mask (16K-1), already set up ; r3 = index into ring buffer, already set up ; r4 = max bytes to read, already set up ; r5 = base of bounce buffer ; r6 = 0xfff0 "unfinished packet" length value ; r7 = scratch / ipkt pointer ; r8 = length/status word SETS.L #rxbuff_bounce|P2_BITS,r5 SETS.L #0xfff0,r6 mov r5,r0 mov.l @(r0,r3),r8 bsr printhex8 mov r8,r1 bsr putchar2 mov #'.,r1 mov r8,r1 SHLR #16,r1/r7 cmp/eq r1,r6 .if 0 bt 1f ; if unfinished packet .else bf 7f mov.l r1,@-r15 bsr putchar2 mov #'U,r1 bra 1f mov.l @r15+,r1 7: .endif add #4+3,r1 ; 4 for len/stat, 3 for roundup shlr2 r1 shll2 r1 cmp/hi r4,r1 .if 0 bt 1f ; if overruns available space .else bf 7f mov.l r1,@-r15 bsr putchar mov #'O,r1 bsr printdec mov.l @r15,r1 bsr putchar mov #'>,r1 bsr printdec mov r4,r1 bsr putchar2 mov #'.,r1 bra 1f mov.l @r15+,r1 7: .endif SETS.L #BBA_RTK_RXSTAT_RXOK,r0 tst r0,r8 .if 0 bt 2f ; if RXOK not set .else bf 7f mov.l r1,@-r15 bsr putchar2 mov #'N,r1 bra 2f mov.l @r15+,r1 7: .endif ; Prepare to copy it to ibuf. add #4,r3 and r2,r3 add #-4,r4 add #-4,r1 shlr2 r1 SETS.L #MIN_ETHER_LEN>>2,r0 cmp/hi r1,r0 .if 0 bt 1f ; if too short .else bf 7f mov.l r1,@-r15 mov.l r0,@-r15 bsr putchar mov #'S,r1 bsr printdec mov.l @r15+,r1 bsr putchar mov #'>,r1 bsr printdec mov.l @r15,r1 bsr putchar2 mov #'.,r1 bra 1f mov.l @r15+,r1 7: .endif SETS.L #MAX_ETHER_LEN>>2,r0 cmp/hi r0,r1 .if 0 bt 1f ; if too long .else bf 7f mov.l r1,@-r15 mov.l r0,@-r15 bsr putchar mov #'L,r1 bsr printdec mov.l @r15+,r1 bsr putchar mov #'<,r1 bsr printdec mov.l @r15,r1 bsr putchar2 mov #'.,r1 bra 1f mov.l @r15+,r1 7: .endif mov.l r1,@-r15 bsr putchar2 mov #'C,r1 mov.l @r15+,r1 ; Now, r1 has longs-to-copy; r3/r4 have been updated to skip ; the header word. SETS.L #ipkt,r7 mov r8,r0 SHLR #16,r0 mov.l r0,@r7 3: mov r5,r0 add #4,r7 mov.l @(r0,r3),r0 mov.l r0,@r7 add #4,r3 and r2,r3 dt r1 bf/s 3b add #-4,r4 ; Packet copied to ibuf. Update the chip's registers, process ; the packet, and go back to see if we (then) have another ; packet. bsr putchar2 mov #'.,r1 bsr 8f nop bsr putchar2 mov #'P,r1 bsr process_packet nop bsr putchar2 mov #'.,r1 bra 4b nop 1: bsr 8f nop 9: lds.l @r15+,pr ; bra putchar ; mov #'z,r1 rts nop 2: SHLR #16,r8/r0 bsr putchar2 mov #'S,r1 add #4+3,r0 ; 4 for len/stat, 3 for roundup shlr2 r0 shll2 r0 add r0,r3 and r2,r3 sub r0,r4 bsr putchar2 mov #'.,r1 bsr 8f nop bra 4b nop 8: add #-16,r3 mov r3,r0 and r2,r0 bra wr_w mov #BBA_RTK_CUR_PKT_READ,r1 process_packet: sts.l pr,@-r15 SETS.L #ipkt,r2 mov.l @r2+,r3 bsr printdec mov r3,r1 bsr putchar mov #':,r1 1: bsr putchar mov #' ,r1 mov.b @r2+,r1 bsr printhexN mov #2,r0 dt r3 bf 1b bsr putchar mov #13,r1 lds.l @r15+,pr bra putchar mov #10,r1 check_serial: sts.l pr,@-r15 bsr nbgetchar nop cmp/pz r0 bf 1f bra done nop 1: lds.l @r15+,pr rts nop ; Input number is in r1. Trashes r1 and r0; preserves others. printdec: mov.l r2,@-r15 mov.l r3,@-r15 mov.l r4,@-r15 sts.l pr,@-r15 mov #-1,r0 mov.l r0,@-r15 SETS.L #10,r0 ; Dividing by 10 is a pain. But multiplying by 0xcccccccd and ; then shifting right by 35 bits produces the same result and ; is much less of a pain - and is lots faster, too. SETS.L #0xcccccccd,r2 1: cmp/hi r1,r0 bt 1f dmulu.l r2,r1 sts mach,r3 SHLR #3,r3 ; r3 is now floor(r1/10) dmulu.l r3,r0 sts macl,r4 sub r4,r1 mov.l r1,@-r15 bra 1b mov r3,r1 1: bsr putchar add #'0,r1 mov.l @r15+,r1 cmp/pz r1 bt 1b lds.l @r15+,pr mov.l @r15+,r4 mov.l @r15+,r3 rts mov.l @r15+,r2 printhex8: mov #8,r0 printhexN: mov.l r4,@-r15 mov r0,r4 add #-8,r0 neg r0,r0 SHLL #2,r0 shld r0,r1 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 mova 9f,r0 mov r0,r3 mov r1,r2 1: mov r2,r0 SHLR #28,r0 SHLL #4,r2 add r3,r0 bsr putchar mov.b @r0,r1 dt r4 bf 1b lds.l @r15+,pr mov.l @r15+,r2 mov.l @r15+,r3 rts mov.l @r15+,r4 .align 4 9: .ascii "0123456789abcdef" .align 2 putchar2: sts.l pr,@-r15 bsr putchar mov.l r1,@-r15 mov.l @r15+,r1 lds.l @r15+,pr putchar: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov r1,r0 mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 tst #SCFDR2_TX_MASK,r0 bf 1b rts nop putstr: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov.b @r1+,r0 tst r0,r0 bt 1f bra 1b mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: ; don't bother waiting for drain here; we do a putchar call, ; which will drain everything, after all putstr calls and ; before anything for which it matters. rts nop nbgetchar: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_RX_SHIFT,r0/r1 tst #SCFDR2_RX_MASK,r0 bt 1f mov.b @(SCFRDR2-SCIF_BASE,gbr),r0 extu.b r0,r1 mov.w @(SCLSR2-SCIF_BASE,gbr),r0 mov #0,r0 mov.w r0,@(SCLSR2-SCIF_BASE,gbr) rts mov r1,r0 1: rts mov #-1,r0 SETCONST ; Not sure we actually need to align the VBR; the only reason I ; have to suspect we might is that it's the kind of thing I've ; seen relatively often before - interrupt/trap vector tables ; often need to be aligned, not infrequently to a remarkably ; strict boundary. I see no indication in the manuals that ; the SH requires _any_ alignment, but it's easy to do and ; definitely won't hurt anything. (No explicit indication, ; that is. It is implicit in the execution of code at ; VBR+0x100, VBR+0x400, and VBR+0x600 that VBR must be even.) .align 0x10000 ; Exception handling consists of: ; - Save PC and SR in SPC and SSR ; - Set SR bit BL to 1 (block exceptions/interrupts) ; - Set SR bit MD to 1 (privileged mode) ; - Set SR bit RB to 1 (r0-r7 bank 1) ; - Write code to EXPEVT or INTEVT ; - Set PC to vector addr, resume execution intvec = . . = intvec + 0x100 SETS.L #0x100,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x400 SETS.L #0x400,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x600 SETS.L #0x600,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x1000 crash_msg_0: .asciz (13,10,10)"FATAL TRAP"(13,10)"R0 " crash_msg_1: .asciz " R1 " crash_msg_2: .asciz " R2 " crash_msg_3: .asciz " R3 " crash_msg_4: .asciz (13,10)"R4 " crash_msg_5: .asciz " R5 " crash_msg_6: .asciz " R6 " crash_msg_7: .asciz " R7 " crash_msg_8: .asciz (13,10)"R8 " crash_msg_9: .asciz " R9 " crash_msg_10: .asciz " R10 " crash_msg_11: .asciz " R11 " crash_msg_12: .asciz (13,10)"R12 " crash_msg_13: .asciz " R13 " crash_msg_14: .asciz " R14 " crash_msg_15: .asciz " R15 " crash_msg_gbr: .asciz (13,10)"GBR " crash_msg_sr: .asciz " SR " crash_msg_pc: .asciz " PC " crash_msg_mach: .asciz (13,10)"MACH" crash_msg_macl: .asciz " MACL" crash_msg_pr: .asciz " PR " crash_msg_vec: .asciz (13,10)"vector" crash_msg_expevt: .asciz " EXPEVT" crash_msg_intevt: .asciz " INTEVT" crash_msg_done: .asciz (13,10) crash_msg_equal: .asciz " = " .align 4 crash_msgs: .long crash_msg_0 .long crash_msg_1 .long crash_msg_2 .long crash_msg_3 .long crash_msg_4 .long crash_msg_5 .long crash_msg_6 .long crash_msg_7 .long crash_msg_8 .long crash_msg_9 .long crash_msg_10 .long crash_msg_11 .long crash_msg_12 .long crash_msg_13 .long crash_msg_14 .long crash_msg_15 .long crash_msg_gbr .long crash_msg_sr .long crash_msg_pc .long crash_msg_mach .long crash_msg_macl .long crash_msg_pr .long crash_msg_vec .long crash_msg_expevt .long crash_msg_intevt .long 0 .align 2 regdump: mov r15,r5 SETS.L #intstacktop,r15 mov.l r4,@-r15 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 sts.l macl,@-r15 sts.l mach,@-r15 stc.l spc,@-r15 stc.l ssr,@-r15 stc.l gbr,@-r15 mov.l r5,@-r15 mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 mov.l r9,@-r15 mov.l r8,@-r15 stc.l r7_bank,@-r15 stc.l r6_bank,@-r15 stc.l r5_bank,@-r15 stc.l r4_bank,@-r15 stc.l r3_bank,@-r15 stc.l r2_bank,@-r15 stc.l r1_bank,@-r15 stc.l r0_bank,@-r15 SETS.L #SCIF_BASE,r14 SETS.L #crash_msgs,r9 SETS.L #putstr,r8 SETS.L #printhex8,r7 SETS.L #putchar,r6 1: mov.l @r9+,r1 tst r1,r1 bt 1f jsr @r8 nop SETS.L #crash_msg_equal,r1 jsr @r8 nop jsr @r7 mov.l @r15+,r1 bra 1b nop 1: SETS.L #crash_msg_done,r1 jsr @r8 nop jsr @r6 mov #0,r1 SETS.L #0xa0000000,r0 ; hard-reset vector jmp @r0 nop SETCONST .align 4 .space 0x1000 intstacktop = .