. = 0x8c010000 .include "regs.s" DC_PCLOCK = 49900000 ; Hz BAUDRATE = 57600 VIDBEG = 0xa520a000 VIDSTRIDE = 640 * 4 DOTWD = 16 ; bytes (implicitly known in putdot) DOTHT = 5 ; lines XDOT = VIDSTRIDE / DOTWD .entry . stacktop: ; r0 = scratch ; r1 = video ram base ; r2 = row stride, in bytes ; r3 = pointer to data for next dot to store ; r4 = number of dots remaining to store ; r5 = dot X ; r6 = dot Y ; r7 = dot V ; r8 = scratch ; r9 = scratch ; r10 = scratch ; r11 = scratch ; r12 = scratch mov.l vidbeg,r1 mov.l vidstride,r2 mov.l dotptr,r3 mov.l ndots,r4 1: mov.b @r3+,r5 mov.b @r3+,r6 bsr putdot mov.b @r3+,r7 dt r4 bf 1b ; We want to be running in P2, to fiddle CCR. (The hardware ; PDF, page 77, says that "CCR modifications must only be made ; by a program in the non-cached P2 area".) mova enable_cache,r0 mov.l px_mask,r8 mov.l p2_bits,r9 and r8,r0 or r9,r0 jmp @r0 nop .align 4 vidbeg: .long VIDBEG vidstride: .long VIDSTRIDE dotptr: .long dots ndots: .long [enddots-dots]/3 px_mask: ; mask off Px-selecting bits .long 0x1fffffff p2_bits: ; bits for P2 .long 0xa0000000 .align 4 enable_cache: ; Now running in P2, so we can turn on the cache. mov.l ccr_addr,r0 mov.l ccr_bits,r9 mov.l r9,@r0 ; The hardware PDF, page 77, says that "After CCR is updated, ; an instruction that performs data access to the P0, P1, P3, ; or U0 area should be located at least four instructions after ; the CCR update instruction. Also, a branch instruction to ; the P0, P1, P3, or U0 area should be located at least eight ; instructions after the CCR update instruction." It doesn't ; say why this is "should" rather than "must", nor does it ; describe the consequences if this is not done, nor does it ; say whether this "beyond" refers to address space or ; instruction execution order (eg, does a three-instruction ; loop that's executed three times count as nine instructions ; or three? does a branch seven instructions forward count?). ; We treat it pessimistically, making sure we burn eight ; instructions by any of these measures. ; ; Gotta love incomplete doc. ; mova cacheon,r0 ; #1 mov.l p0_bits,r9 ; #2 and r8,r0 ; #3 or r9,r0 ; #4 mov.l stackptr,r15 ; #5 nop ; #6 nop ; #7 nop ; #8 jmp @r0 nop .align 4 ccr_addr: .long CCR ccr_bits: ; Not set: IIX OIX ORA WT .long CCR_ICI | CCR_ICE | CCR_OCI | CCR_CB | CCR_OCE p0_bits: ; bits for P0 .long 0x80000000 stackptr: .long stacktop .align 2 progress: mov r0,r5 mov #8,r6 bra putdot mov #1,r7 .align 4 cacheon: 2: mov r10,r11 mov #32,r12 3: shlr r11 rotcl r7 mov r12,r5 bsr putdot mov #10,r6 dt r12 bf 3b mov.l delay,r0 1: dt r0 bf 1b bra 2b add #1,r10 .align 4 delay: .long 1<<16 putdot: ; Stores dot of all 0s or all 1s, depending on low bit of r7, at (r5,r6) ; Destroys r0, r8, r9 add #1,r5 add #1,r6 mov #DOTWD,r0 mul.l r0,r5 sts macl,r8 ; X offset in bytes mov.l 9f,r0 mul.l r0,r6 sts macl,r0 ; Y offset in bytes add r8,r0 add r1,r0 mov r7,r9 shlr r9 bt 1f bra 2f mov #0,r8 1: mov #-1,r8 2: mov #DOTHT,r9 3: .macro foo b, o .if $(b) >= 4 mov.l r8,@($(o),r0) foo $(b)-4,$(o)+4 .endif .endm foo DOTWD,0 dt r9 bf/s 3b add r2,r0 rts nop .align 4 9: .long DOTHT * VIDSTRIDE dots: ; y\x 0 1 2 3 4 5 6 7 ; 0 . . . . . . . . ; 1 . * * * * * * . ; 2 . * . . . * . . ; 3 . . . . * . . . ; 4 . . . . * . . . ; 5 . . . . * . . . ; 6 . * * * * * * . ; 7 . . . . . . . . ; x, y, v .byte 0, 0, 0 .byte 1, 0, 0 .byte 2, 0, 0 .byte 3, 0, 0 .byte 4, 0, 0 .byte 5, 0, 0 .byte 6, 0, 0 .byte 7, 0, 0 .byte 0, 1, 0 .byte 1, 1, 1 .byte 2, 1, 1 .byte 3, 1, 1 .byte 4, 1, 1 .byte 5, 1, 1 .byte 6, 1, 1 .byte 7, 1, 0 .byte 0, 2, 0 .byte 1, 2, 1 .byte 2, 2, 0 .byte 3, 2, 0 .byte 4, 2, 0 .byte 5, 2, 1 .byte 6, 2, 0 .byte 7, 2, 0 .byte 0, 3, 0 .byte 1, 3, 0 .byte 2, 3, 0 .byte 3, 3, 0 .byte 4, 3, 1 .byte 5, 3, 0 .byte 6, 3, 0 .byte 7, 3, 0 .byte 0, 4, 0 .byte 1, 4, 0 .byte 2, 4, 0 .byte 3, 4, 0 .byte 4, 4, 1 .byte 5, 4, 0 .byte 6, 4, 0 .byte 7, 4, 0 .byte 0, 5, 0 .byte 1, 5, 0 .byte 2, 5, 0 .byte 3, 5, 0 .byte 4, 5, 1 .byte 5, 5, 0 .byte 6, 5, 0 .byte 7, 5, 0 .byte 0, 6, 0 .byte 1, 6, 1 .byte 2, 6, 1 .byte 3, 6, 1 .byte 4, 6, 1 .byte 5, 6, 1 .byte 6, 6, 1 .byte 7, 6, 0 .byte 0, 7, 0 .byte 1, 7, 0 .byte 2, 7, 0 .byte 3, 7, 0 .byte 4, 7, 0 .byte 5, 7, 0 .byte 6, 7, 0 .byte 7, 7, 0 .byte 8, 8, 0 .byte 9, 8, 0 .byte 10, 8, 0 .byte 11, 8, 0 .byte 12, 8, 0 .byte 13, 8, 0 .byte 14, 8, 0 .byte 15, 8, 0 .byte 16, 8, 0 .byte 17, 8, 0 .byte 18, 8, 0 .byte 19, 8, 0 .byte 20, 8, 0 .byte 21, 8, 0 .byte 22, 8, 0 .byte 23, 8, 0 .byte 24, 8, 0 .byte 25, 8, 0 .byte 26, 8, 0 .byte 27, 8, 0 .byte 28, 8, 0 .byte 29, 8, 0 .byte 30, 8, 0 .byte 31, 8, 0 .byte 32, 8, 0 .byte 33, 8, 0 .byte 34, 8, 0 enddots: