. = 0x8c010000 .include "regs.s" DC_PCLOCK = 49900000 ; Hz BAUDRATE = 57600 VIDBEG = 0xa520a000 VIDSTRIDE = 640 * 4 DOTWD = 16 ; bytes (implicitly known in putdot) DOTHT = 5 ; lines XDOT = VIDSTRIDE / DOTWD .entry . stacktop: ; r0 = scratch ; r1 = video ram base ; r2 = row stride, in bytes ; r3 = pointer to data for next dot to store ; r4 = number of dots remaining to store ; r5 = dot X ; r6 = dot Y ; r7 = dot V ; r8 = scratch ; r9 = scratch ; r10 = scratch ; r11 = scratch ; r12 = scratch mov.l vidbeg,r1 mov.l vidstride,r2 mov.l dotptr,r3 mov.l ndots,r4 1: mov.b @r3+,r5 mov.b @r3+,r6 bsr putdot mov.b @r3+,r7 dt r4 bf 1b bsr progress mov #33,r0 ; We want to be running in P2, to fiddle CCR. (The hardware ; PDF, page 77, says that "CCR modifications must only be made ; by a program in the non-cached P2 area".) mova enable_cache,r0 mov.l px_mask,r8 mov.l p2_bits,r9 and r8,r0 or r9,r0 jmp @r0 nop .align 4 vidbeg: .long VIDBEG vidstride: .long VIDSTRIDE dotptr: .long dots ndots: .long [enddots-dots]/3 px_mask: ; mask off Px-selecting bits .long 0x1fffffff p2_bits: ; bits for P2 .long 0xa0000000 .align 4 enable_cache: ; Now running in P2, so we can turn on the cache. mov.l ccr_addr,r0 mov.l ccr_bits,r9 mov.l r9,@r0 ; The hardware PDF, page 77, says that "After CCR is updated, ; an instruction that performs data access to the P0, P1, P3, ; or U0 area should be located at least four instructions after ; the CCR update instruction. Also, a branch instruction to ; the P0, P1, P3, or U0 area should be located at least eight ; instructions after the CCR update instruction." It doesn't ; say why this is "should" rather than "must", nor does it ; describe the consequences if this is not done, nor does it ; say whether this "beyond" refers to address space or ; instruction execution order (eg, does a three-instruction ; loop that's executed three times count as nine instructions ; or three? does a branch seven instructions forward count?). ; We treat it pessimistically, making sure we burn eight ; instructions by any of these measures. ; ; Gotta love incomplete doc. ; mova cacheon,r0 ; #1 mov.l p0_bits,r9 ; #2 and r8,r0 ; #3 or r9,r0 ; #4 mov.l stackptr,r15 ; #5 nop ; #6 nop ; #7 nop ; #8 jmp @r0 nop .align 4 ccr_addr: .long CCR ccr_bits: ; Not set: IIX OIX ORA WT .long CCR_ICI | CCR_ICE | CCR_OCI | CCR_CB | CCR_OCE p0_bits: ; bits for P0 .long 0x80000000 stackptr: .long stacktop .align 2 progress: mov r0,r5 mov #8,r6 bra putdot mov #1,r7 .align 4 cacheon: bsr progress mov #9,r0 ; Initialize the SCIF. Mostly follows hardware PDF figure ; 16.6, but not entirely (eg, 16.6 shows turning on CKE1, but ; we don't want external clock, so we don't). mov.l scif_base,r0 ldc r0,gbr bsr progress mov #11,r0 ; Clear SCSCR2 (in particular, clear TE and RE). mov #0,r0 mov.w r0,@(SCSCR2-SCIF_BASE,gbr) bsr progress mov #13,r0 ; Clear out the FIFOs. mov.w reset_fifos,r0 mov.w r0,@(SCFCR2-SCIF_BASE,gbr) bsr progress mov #15,r0 ; Configure for 8N1. mov.w config_8n1,r0 mov.w r0,@(SCMSR2-SCIF_BASE,gbr) bsr progress mov #17,r0 ; Get the BRG constant. mov #[[[[DC_PCLOCK*2]/[32*BAUDRATE]]+1]/2],r0 mov.b r0,@(SCBRR2-SCIF_BASE,gbr) bsr progress mov #19,r0 ; Delay at least one bit time. .if BAUDRATE < 64 .error Code assumes BAUDRATE is at least 64 .endif bsr await_tick nop bsr progress mov #21,r0 bsr await_tick nop bsr progress mov #23,r0 ; Set the FIFO interrupt trigger points and clear the reset ; bits. We don't actually care about the trigger points, ; because we don't use interrupts; we might be able to skip ; this step, but it's easy and harmless. mov.w set_triggers,r0 mov.w r0,@(SCFCR2-SCIF_BASE,gbr) bsr progress mov #25,r0 ; Setup complete. Enable transmitter and receiver. mov.w enable_tx_rx,r0 mov.w r0,@(SCSCR2-SCIF_BASE,gbr) bsr progress mov #27,r0 ; Flush any lingering statuses. mov.w @(SCFSR2-SCIF_BASE,gbr),r0 mov #0,r0 mov.w r0,@(SCFSR2-SCIF_BASE,gbr) mov.w @(SCLSR2-SCIF_BASE,gbr),r0 bsr progress mov #29,r0 mov #0,r0 mov.w r0,@(SCLSR2-SCIF_BASE,gbr) bsr progress mov #31,r0 2: mov #' ,r8 mov #95,r9 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov r8,r0 mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) add #1,r8 dt r9 bf 1b bra 2b nop .align 4 reset_fifos: .word SCFCR2_TFRST | SCFCR2_RFRST set_triggers: .word SCFCR2_RXT_8 | SCFCR2_TXT_8 config_8n1: .word SCMSR2_CHR_8 | SCMSR2_PE_DIS | SCMSR2_STOP_1 | SCMSR2_CKS_DIV1 enable_tx_rx: .word SCSCR2_TE | SCSCR2_RE scif_base: .long SCIF_BASE await_tick: stc gbr,r8 sts pr,r9 mov.l rtc_base,r0 ldc r0,gbr bsr read_r64cnt nop mov r10,r11 1: bsr read_r64cnt nop cmp/eq r10,r11 bt 1b lds r9,pr ldc r8,gbr rts nop 1: mov.b r0,@(RCR1-RTC_BASE,gbr) read_r64cnt: mov.b @(R64CNT-RTC_BASE,gbr),r0 mov r0,r10 mov.b @(RCR1-RTC_BASE,gbr),r0 tst #0x80,r0 mov #0,r0 bf 1b rts nop .align 4 rtc_base: .long RTC_BASE putdot: ; Stores dot of all 0s or all 1s, depending on low bit of r7, at (r5,r6) ; Destroys r0, r8, r9 add #1,r5 add #1,r6 mov #DOTWD,r0 mul.l r0,r5 sts macl,r8 ; X offset in bytes mov.l 9f,r0 mul.l r0,r6 sts macl,r0 ; Y offset in bytes add r8,r0 add r1,r0 mov r7,r9 shlr r9 bt 1f bra 2f mov #0,r8 1: mov #-1,r8 2: mov #DOTHT,r9 3: .macro foo b, o .if $(b) >= 4 mov.l r8,@($(o),r0) foo $(b)-4,$(o)+4 .endif .endm foo DOTWD,0 dt r9 bf/s 3b add r2,r0 rts nop .align 4 9: .long DOTHT * VIDSTRIDE dots: ; y\x 0 1 2 3 4 5 6 7 ; 0 . . . . . . . . ; 1 . * * * * * * . ; 2 . * . . . * . . ; 3 . . . . * . . . ; 4 . . . . * . . . ; 5 . . . . * . . . ; 6 . * * * * * * . ; 7 . . . . . . . . ; x, y, v .byte 0, 0, 0 .byte 1, 0, 0 .byte 2, 0, 0 .byte 3, 0, 0 .byte 4, 0, 0 .byte 5, 0, 0 .byte 6, 0, 0 .byte 7, 0, 0 .byte 0, 1, 0 .byte 1, 1, 1 .byte 2, 1, 1 .byte 3, 1, 1 .byte 4, 1, 1 .byte 5, 1, 1 .byte 6, 1, 1 .byte 7, 1, 0 .byte 0, 2, 0 .byte 1, 2, 1 .byte 2, 2, 0 .byte 3, 2, 0 .byte 4, 2, 0 .byte 5, 2, 1 .byte 6, 2, 0 .byte 7, 2, 0 .byte 0, 3, 0 .byte 1, 3, 0 .byte 2, 3, 0 .byte 3, 3, 0 .byte 4, 3, 1 .byte 5, 3, 0 .byte 6, 3, 0 .byte 7, 3, 0 .byte 0, 4, 0 .byte 1, 4, 0 .byte 2, 4, 0 .byte 3, 4, 0 .byte 4, 4, 1 .byte 5, 4, 0 .byte 6, 4, 0 .byte 7, 4, 0 .byte 0, 5, 0 .byte 1, 5, 0 .byte 2, 5, 0 .byte 3, 5, 0 .byte 4, 5, 1 .byte 5, 5, 0 .byte 6, 5, 0 .byte 7, 5, 0 .byte 0, 6, 0 .byte 1, 6, 1 .byte 2, 6, 1 .byte 3, 6, 1 .byte 4, 6, 1 .byte 5, 6, 1 .byte 6, 6, 1 .byte 7, 6, 0 .byte 0, 7, 0 .byte 1, 7, 0 .byte 2, 7, 0 .byte 3, 7, 0 .byte 4, 7, 0 .byte 5, 7, 0 .byte 6, 7, 0 .byte 7, 7, 0 .byte 8, 8, 0 .byte 9, 8, 0 .byte 10, 8, 0 .byte 11, 8, 0 .byte 12, 8, 0 .byte 13, 8, 0 .byte 14, 8, 0 .byte 15, 8, 0 .byte 16, 8, 0 .byte 17, 8, 0 .byte 18, 8, 0 .byte 19, 8, 0 .byte 20, 8, 0 .byte 21, 8, 0 .byte 22, 8, 0 .byte 23, 8, 0 .byte 24, 8, 0 .byte 25, 8, 0 .byte 26, 8, 0 .byte 27, 8, 0 .byte 28, 8, 0 .byte 29, 8, 0 .byte 30, 8, 0 .byte 31, 8, 0 .byte 32, 8, 0 .byte 33, 8, 0 .byte 34, 8, 0 enddots: