; This is designed to be serial-line downloaded to cdcode. ; ; Our memory map: ; ; [8c000000,8c010000) Stack (r15 set by cdcode) ; [8c010000,8c01????) cdcode ; [8c020000,8c0?????) Us TRIG_TABLE_SIZE = 2048 ; must be a power of two .include "regs.s" . = 0x8c020000 SETS.L #main,r0 jmp @r0 nop SETCONST .align 8 twopi: .long 0x401921fb ; s=0, exp=1023+2, mant=(1.)921fbxxxxxxxx .long 0x54442d18 ; mant=(1.)xxxxx54442d18(469898cc...) .align 4 sincos_table: .space TRIG_TABLE_SIZE*4 .align 2 main: mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 ldc r14,gbr stc sr,r1 SETS.L #~[SR_FD|SR_RB|SR_BL],r2 and r2,r1 ldc r1,sr ; Note that r0-r7 may have just changed if we switched banks. mov #0,r1 lds r1,fpscr SETS.L #intvec,r0 ldc r0,vbr ; Real code begins here. bsr init_trig nop bsr putchar mov #13,r1 bsr putchar mov #10,r1 ; Turn SR.BL (back) on before returning to cdcode. stc sr,r1 SETS.L #SR_BL,r2 or r2,r1 ldc r1,sr mov.l @r15+,r10 mov.l @r15+,r11 mov.l @r15+,r12 mov.l @r15+,r13 lds r11,pr rts mov.l @r15+,r14 ; Load the trig table. We do this by summing the infinite series for ; sin(x), carrying it far enough that the next term doesn't change the ; sum. We compute in a double and then convert to single for the ; table, the extra precision just to ensure single-float accuracy. ; ; To work out the sum for sin(x), ; ; e^x = 1 + x + x^2/2! + x^3/3! + x^4/4! + x^5/5! + ... ; (which can be deduced from d/dx e^x = e^x) ; ; e^(ia) = cos(a) + i sin(a) (de Moivre's formula) ; ; Let x = ia in the series above and collect real and imaginary terms: ; ; cos(a) = 1 - a^2/2! + a^4/4! - a^6/6! + a^8/8! - a^10/10! + ... ; sin(a) = a - a^3/3! + a^5/5! - a^7/7! + a^9/9! - a^11/11! + ... ; ; We compute sin(a) using the second of these. ; ; Pseudocode: ; for i = TRIG_TABLE_SIZE .. 1 ; x = ((i-1) * 2pi) / TRIG_TABLE_SIZE ; x2 = - x * x ; s = x ; p = x ; n = 2 ; do ; prevs = s ; p = (p * x2) / (n * (n+1)) ; s += p ; n += 2 ; while s != prevs ; table[i-1] is in s ; ; We don't store x in the inner loop, since it's dead once x2, s, and p ; are set. We also develop the x value in p. ; ; We have 8 registers available, since we're running with PR=1. ; We use them for ; dr0 scratch ; dr2 p ; dr4 s ; dr6 x2 ; dr8 1 ; dr10 n ; dr12 2pi/TRIG_TABLE_SIZE ; dr14 prevs ; Other values in the pseudocode above are in ; r0 scratch ; r1 scratch ; r2 i ; r3 ptr to table[i] init_trig: SETS.L #FPSCR_SZ|FPSCR_PR,r0 lds r0,fpscr .pr 1 .sz 1 SETS.L #twopi,r0 ; dr12 = 2pi/TRIG_TABLE_SIZE fmov @r0,dr12 SETS.L #TRIG_TABLE_SIZE,r0 lds r0,fpul float fpul,dr0 fdiv dr0,dr12 ; grr, fldi1 is single-only! SETS.L #1,r2 ; dr8 = 1 lds r2,fpul float fpul,dr8 SETS.L #TRIG_TABLE_SIZE,r2 ; i = TRIG_TABLE_SIZE SETS.L #[sincos_table+[4*TRIG_TABLE_SIZE]],r3 5: mov r2,r0 ; p = (i-1) * (2pi / TRIG_TABLE_SIZE) add #-1,r0 lds r0,fpul float fpul,dr2 fmul dr12,dr2 fmov dr2,dr6 ; x2 = - p * p fmul dr2,dr6 fneg dr6 fmov dr2,dr4 ; s = p SETS.L #2,r0 ; n = 2 lds r0,fpul float fpul,dr10 4: fmov dr4,dr14 ; prevs = s fmul dr6,dr2 ; p = (p * x2) / (n * (n+1)) fmov dr10,dr0 ; merged with n += 2 fadd dr8,dr10 fmul dr10,dr0 fadd dr8,dr10 fdiv dr0,dr2 fadd dr2,dr4 ; s += p fcmp/eq dr4,dr14 ; compare s vs prevs bf 4b fcnvds dr4,fpul ; *--r3 = s (single-float) sts.l fpul,@-r3 dt r2 bf 5b 1: SETS.L #0,r0 lds r0,fpscr .pr 0 .sz 0 sts.l pr,@-r15 SETS.L #TRIG_TABLE_SIZE,r2 SETS.L #sincos_table,r3 SETS.L #0,r4 SETS.L #10,r0 lds r0,fpul float fpul,fr15 fldi0 fr14 1: fmov.s @r3+,fr2 mov r4,r0 SHLL #16-11,r0,r1 ; 2K to 64K lds r0,fpul fsca fpul,fr4 bsr print_float fmov fr2,fr0 bsr putchar mov #' ,r1 bsr print_float fmov fr4,fr0 bsr putchar mov #' ,r1 fmov fr4,fr0 bsr print_float fsub fr2,fr0 bsr putchar mov #' ,r1 flds fr2,fpul sts fpul,r1 flds fr4,fpul sts fpul,r0 bsr printhex8 xor r0,r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 dt r2 bf/s 1b add #1,r4 lds.l @r15+,pr rts nop print_float: sts.l pr,@-r15 fcmp/gt fr0,fr14 bf 1f bsr putchar mov #'-,r1 fneg fr0 1: ftrc fr0,fpul float fpul,fr1 fsub fr1,fr0 sts fpul,r1 bsr putchar add #'0,r1 bsr putchar mov #'.,r1 1: fmul fr15,fr0 ftrc fr0,fpul float fpul,fr1 sts fpul,r1 bsr putchar add #'0,r1 fsub fr1,fr0 fcmp/eq fr0,fr14 bf 1b lds.l @r15+,pr rts nop SETCONST printhex8: mov #8,r0 printhexN: mov.l r4,@-r15 mov r0,r4 add #-8,r0 neg r0,r0 SHLL #2,r0 shld r0,r1 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 mova 9f,r0 mov r0,r3 mov r1,r2 1: mov r2,r0 SHLR #28,r0,r1 SHLL #4,r2 add r3,r0 bsr putchar mov.b @r0,r1 dt r4 bf 1b lds.l @r15+,pr mov.l @r15+,r2 mov.l @r15+,r3 rts mov.l @r15+,r4 .align 4 9: .ascii "0123456789abcdef" .align 2 putchar2: sts.l pr,@-r15 bsr putchar mov.l r1,@-r15 mov.l @r15+,r1 lds.l @r15+,pr putchar: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov r1,r0 mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 tst #SCFDR2_TX_MASK,r0 bf 1b rts nop putstr: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov.b @r1+,r0 tst r0,r0 bt 1f bra 1b mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: ; don't bother waiting for drain here; we do a putchar call, ; which will drain everything, after all putstr calls and ; before anything for which it matters. rts nop nbgetchar: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_RX_SHIFT,r0,r1 tst #SCFDR2_RX_MASK,r0 bt 1f mov.b @(SCFRDR2-SCIF_BASE,gbr),r0 extu.b r0,r1 mov.w @(SCLSR2-SCIF_BASE,gbr),r0 mov #0,r0 mov.w r0,@(SCLSR2-SCIF_BASE,gbr) rts mov r1,r0 1: rts mov #-1,r0 SETCONST ; Not sure we actually need to align the VBR; the only reason I ; have to suspect we might is that it's the kind of thing I've ; seen relatively often before - interrupt/trap vector tables ; often need to be aligned, not infrequently to a remarkably ; strict boundary. I see no indication in the manuals that ; the SH requires _any_ alignment, but it's easy to do and ; definitely won't hurt anything. (No explicit indication, ; that is. It is implicit in the execution of code at ; VBR+0x100, VBR+0x400, and VBR+0x600 that VBR must be even.) .align 0x10000 ; Exception handling consists of: ; - Save PC and SR in SPC and SSR ; - Set SR bit BL to 1 (block exceptions/interrupts) ; - Set SR bit MD to 1 (privileged mode) ; - Set SR bit RB to 1 (r0-r7 bank 1) ; - Write code to EXPEVT or INTEVT ; - Set PC to vector addr, resume execution intvec = . . = intvec + 0x100 SETS.L #0x100,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x400 SETS.L #0x400,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x600 SETS.L #0x600,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x1000 crash_msg_0: .asciz (13,10,10)"FATAL TRAP"(13,10)"R0 " crash_msg_1: .asciz " R1 " crash_msg_2: .asciz " R2 " crash_msg_3: .asciz " R3 " crash_msg_4: .asciz (13,10)"R4 " crash_msg_5: .asciz " R5 " crash_msg_6: .asciz " R6 " crash_msg_7: .asciz " R7 " crash_msg_8: .asciz (13,10)"R8 " crash_msg_9: .asciz " R9 " crash_msg_10: .asciz " R10 " crash_msg_11: .asciz " R11 " crash_msg_12: .asciz (13,10)"R12 " crash_msg_13: .asciz " R13 " crash_msg_14: .asciz " R14 " crash_msg_15: .asciz " R15 " crash_msg_gbr: .asciz (13,10)"GBR " crash_msg_sr: .asciz " SR " crash_msg_pc: .asciz " PC " crash_msg_mach: .asciz (13,10)"MACH" crash_msg_macl: .asciz " MACL" crash_msg_pr: .asciz " PR " crash_msg_vec: .asciz (13,10)"vector" crash_msg_expevt: .asciz " EXPEVT" crash_msg_intevt: .asciz " INTEVT" crash_msg_done: .asciz (13,10) crash_msg_equal: .asciz " = " .align 4 crash_msgs: .long crash_msg_0 .long crash_msg_1 .long crash_msg_2 .long crash_msg_3 .long crash_msg_4 .long crash_msg_5 .long crash_msg_6 .long crash_msg_7 .long crash_msg_8 .long crash_msg_9 .long crash_msg_10 .long crash_msg_11 .long crash_msg_12 .long crash_msg_13 .long crash_msg_14 .long crash_msg_15 .long crash_msg_gbr .long crash_msg_sr .long crash_msg_pc .long crash_msg_mach .long crash_msg_macl .long crash_msg_pr .long crash_msg_vec .long crash_msg_expevt .long crash_msg_intevt .long 0 .align 2 regdump: mov r15,r5 SETS.L #intstacktop,r15 mov.l r4,@-r15 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 sts.l macl,@-r15 sts.l mach,@-r15 stc.l spc,@-r15 stc.l ssr,@-r15 stc.l gbr,@-r15 mov.l r5,@-r15 mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 mov.l r9,@-r15 mov.l r8,@-r15 stc.l r7_bank,@-r15 stc.l r6_bank,@-r15 stc.l r5_bank,@-r15 stc.l r4_bank,@-r15 stc.l r3_bank,@-r15 stc.l r2_bank,@-r15 stc.l r1_bank,@-r15 stc.l r0_bank,@-r15 SETS.L #SCIF_BASE,r14 SETS.L #crash_msgs,r9 SETS.L #putstr,r8 SETS.L #printhex8,r7 SETS.L #putchar,r6 1: mov.l @r9+,r1 tst r1,r1 bt 1f jsr @r8 nop SETS.L #crash_msg_equal,r1 jsr @r8 nop jsr @r7 mov.l @r15+,r1 bra 1b nop 1: SETS.L #crash_msg_done,r1 jsr @r8 nop jsr @r6 mov #0,r1 SETS.L #0xa0000000,r0 ; hard-reset vector jmp @r0 nop SETCONST .align 4 .space 0x1000 intstacktop = .