; Debugging flags. Set these to 1 to turn on various debugging output. ; set_params debug_set_params = 0 ; rendering cycle kickoff debug_start_render = 0 ; texture setup debug_texture = 0 ; TA command commits debug_ta_commit = 0 ; This is designed to be serial-line downloaded to cdcode. ; ; This matters mostly in the interfaces it means we expect. In ; particular, we are not called with a bsr/jsr; we are entered with a ; jmp, and our return-to address, to the extent that we have one, is ; in r11, not pr. We also expect registers set up the way cdcode sets ; them; in particular, we expect r15 to be set to point to a stack, ; 8c010000 (or a little below that if cdcode happens to have anything ; on the stack), and r14 set to the SCIF's base address. If we return ; to cdcode, it expects those two, and r11 and r10, to be preserved. ; It doesn't mind if we trash r12/r13, but we preserve them too. ; ; Our memory map: ; ; [8c000000,8c010000) Stack (r15 set by cdcode) ; [8c010000,8c01????) cdcode ; [8c020000,8c0?????) Us ; ; Our entry point is 8c020000. We don't set an entry point here with ; .entry because then send-s would jump to us directly, and I'd rather ; do that manually. ; Throughout this file, MC, used as a name in the comments, means ; Marcus Comstedt, and tatest is a C program of his which he ; distributes as an example of a 3D rendering program. This file ; started out as tatest.s, a (manual) rewriting in assembly of tatest, ; but it's evolving in its own directions. .include "regs.s" .include "ta-cmds.s" .include "maple-bits.s" VRAM_BASE_32 = 0xa5000000 VRAM_BASE_64 = 0xa4000000 VRAM_SIZE = 8 << 20 STOREQ_BASE = 0xe0000000 VIDREG_BASE = 0xa05f0000 X_SIZE = 640 Y_SIZE = 480 VBLANK_REG = VIDREG_BASE + 0x6900 VBLANK_VBIT = 0x08 DISPLAY_VRAM = VIDREG_BASE + 0x8050 SHORT_FRAME_OFFSET = X_SIZE*2 ; X_SIZE pixels at two bytes each COT_FOVY = 0f1.73 ; cot(FOVy/2), field-of-view angle figure ZNEAR = 0f1 ZFAR = 0f100 BUTTON_FACTOR = 0f3 DISTANCE = 0f15 ; Layout of some things in video RAM. We double-buffer, so there are ; two of most of these. _a and _b suffixes indicate the pairs. ; ; We copy tatest's layout in video RAM. This means we put rendered ; scenes at [a5000000,a512c000) and TA tile buffers, tile descriptors, ; and command lists at [a5400000,a550df00) (these values are for ; X_SIZE=640 Y_SIZE=480), with textures in [a4400000-a4420000), the ; textures using the same memory as [a5200000-a5210000) and ; [a5600000-a5610000). ; ; Unfortunately the rendering and video hardware aren't capable of ; making distinctions equivalent to the difference between the CPU's ; a4xxxxxx and a5xxxxxx views of video RAM (textures always come from ; a4xxxxxx, or, to be more precise, always access video RAM in a way ; compatible with the CPU's a4xxxxxx view, whereas everything else ; comes from a5xxxxxx). So we're stuck jigsawing together a4xxxxxx ; allocations for textures and a5xxxxxx allocations for other stuff. ; Texture space. . = VRAM_BASE_64 + 0x00400000 ; Cube faces. Each one uses 256*256 bytes. texture_cubeface_1: .space 65536 texture_cubeface_2: .space 65536 ; Characters. We need only 0 and 1. Each one is an 8x8 ; texture, taking 64 bytes. texture_font_0: .space 64 texture_font_1: .space 64 ; Space to render into. Each field takes up X_SIZE*Y_SIZE ; pixels at two bytes per pixel. (If it's displayed ; interlaced, this is handled with the display hardware; in ; memory it's totally non-interlaced.) render_buf_size = X_SIZE * Y_SIZE * 2 . = VRAM_BASE_32 render_buf_a: .space render_buf_size render_buf_b: .space render_buf_size ; Tile descriptors. There is one of these, at 6 longs, per ; tile; there is also a 24-long header. Each tile is 32x32 ; pixels. So for a 640x480 screen, we need ; 24+(6*(640/32)*(480/32)) longs of space. (I don't know what ; happens if the screen width or height is not a multiple of ; 32.) Each tile also uses 64 bytes of buffer space. ta_buffers_size_cmd_list = 512 * 1024 ta_buffers_size_tile_buffer = 64 * [X_SIZE/32] * [Y_SIZE/32] ta_buffers_size_tile_descriptor = 4 * [24 + [6 * [X_SIZE/32] * [Y_SIZE/32]]] . = VRAM_BASE_32 + 0x00400000 ta_buffers_cmd_list_a: .space ta_buffers_size_cmd_list ta_buffers_cmd_list_b: .space ta_buffers_size_cmd_list ta_buffers_tile_buffer_a: .space ta_buffers_size_tile_buffer ta_buffers_tile_buffer_b: .space ta_buffers_size_tile_buffer ta_buffers_tile_descriptor_a: .space ta_buffers_size_tile_descriptor ta_buffers_tile_descriptor_b: .space ta_buffers_size_tile_descriptor ; End of layout of video RAM. . = 0x8c020000 .sz any .pr any SETS.L #main,r0 jmp @r0 nop SETCONST ; Our "data segment". We don't really have segments the way ; the term implies. The data is here rather than at the end ; so the symbols' values are known by the time the assembler ; sees them later. This is not critical, but does produce ; slightly better code. ; ; Things here are ordered approximately by decreasing alignment ; requirement. Not essential, just avoids needless gaps. ; The maple command and response buffers. The hardware ; requires they be aligned on 32-byte boundaries. .align 32 maple_cmd: .long XDESC_LAST | [0 << XDESC_PORTSHIFT] | [1 << XDESC_LENSHIFT] .long maple_resp & DMA_ADDRMASK MapleFrame CMD_GETCOND, 0, ADDR_MAIN, 0, 0, 1 .long @BSL[FUNC_CONTROLLER] .align 32 ; 1024 is the largest the hardware supports, so it's a safe ; limit. (The amount actually used is usually fairly small.) maple_resp: .space 1024 ; The base matrix (composition of screenview, projection, and ; translation). .align 8 base_matrix: .space 16*4 ; Font setup table. font_setup: .long init_font_0, texture_font_0 1: .long init_font_1, texture_font_1 n_font_setup = [. - font_setup] / [1b - font_setup] ; The current and previous controller input state. The ; patterns we initialize this to are what the controller sends ; when it's not being touched. .align 4 curistate: .long 0x0000ffff, 0x80808080 previstate: .long 0x0000ffff, 0x80808080 ; Pointers to the texture memory. We have two different ; cube-face textures (each of which is used with three ; different palettes, though that doesn't matter here), and ; two font character textures. .align 4 textures: .long texture_font_0, TA_POLYMODE2_U_SIZE_8 | TA_POLYMODE2_V_SIZE_8 .long texture_font_1, TA_POLYMODE2_U_SIZE_8 | TA_POLYMODE2_V_SIZE_8 .long texture_cubeface_1, TA_POLYMODE2_U_SIZE_256 | TA_POLYMODE2_V_SIZE_256 .long texture_cubeface_2, TA_POLYMODE2_U_SIZE_256 | TA_POLYMODE2_V_SIZE_256 ; Pointers to the two places screens get rendered into. .align 4 render_buf: .long render_buf_a .long render_buf_b ; Cookies to pass to the hardware (void *tiles[2] in tatest) .align 4 tiledesc_cookies: .space 2*4 ; Command lists (ta_buffers cmd_list arrays in tatest; we point ; to them rather than using a struct to generate offsets) .align 4 cmdlists: .long ta_buffers_cmd_list_a .long ta_buffers_cmd_list_b ; Tile buffers (the 64-bytes-per-tile work space) .align 4 tilebuffers: .long ta_buffers_tile_buffer_a .long ta_buffers_tile_buffer_b ; Tile descriptors (the spaces in which the descriptors are ; built) .align 4 tiledescs: .long ta_buffers_tile_descriptor_a .long ta_buffers_tile_descriptor_b ; Current orientation, stored in the form of the world axes in ; eye coordinates. This is just the rotation. .align 4 eye_x: .long 0f1, 0f0, 0f0 eye_y: .long 0f0, 0f1, 0f0 eye_z: .long 0f0, 0f0, 0f1 ; Texture modes. .align 4 texture_mode_base = TA_POLYMODE2_BLEND_DEFAULT|TA_POLYMODE2_FOG_DISABLED|TA_POLYMODE2_BILINEAR_FILTER|TA_POLYMODE2_MIPMAP_D_1_00|TA_POLYMODE2_TEXTURE_REPLACE cur_texture_mode: .long texture_mode_base ; Scene corner coordinates. .align 4 vertex_coords: .long -0f1, -0f1, -0f1 ; 0 .long -0f1, -0f1, 0f1 ; 1 .long -0f1, 0f1, 0f1 ; 2 .long -0f1, 0f1, -0f1 ; 3 .long 0f1, 0f1, -0f1 ; 4 .long 0f1, 0f1, 0f1 ; 5 .long 0f1, -0f1, 0f1 ; 6 .long 0f1, -0f1, -0f1 ; 7 .long -0f3, -0f3, -0f3 ; 8 .long 0f3, -0f3, -0f3 ; 9 .long -0f3, -0f3, -0f2 ; 10 .long 0f3, -0f3, -0f2 ; 11 .long -0f3, -0f2, -0f2 ; 12 .long 0f3, -0f2, -0f2 ; 13 .long -0f3, -0f2, -0f3 ; 14 .long 0f3, -0f2, -0f3 ; 15 n_vertex_coords = [. - vertex_coords] / [3*4] xform_coords: .space n_vertex_coords*3*4 ; These vertices don't get transformed; they are fixed in terms ; of screen location, not in terms of 3D scene location. The ; z coordinate (0f11) here is noncritical; it just needs to be ; larger than anything the scene produces. (The right fix ; would be to store a flag indicating use of Z_ALWAYS rather ; than Z_GREATER, but that's more hair than I want to bother ; with.) .long [0f16* 0]+ 0, 0f20, 0f11 ; 16 .long [0f16* 0]+ 0, 0f32, 0f11 ; 17 .long [0f16* 1]+ 0, 0f20, 0f11 ; 18 .long [0f16* 1]+ 0, 0f32, 0f11 ; 19 .long [0f16* 2]+ 0, 0f20, 0f11 ; 20 .long [0f16* 2]+ 0, 0f32, 0f11 ; 21 .long [0f16* 3]+ 0, 0f20, 0f11 ; 22 .long [0f16* 3]+ 0, 0f32, 0f11 ; 23 .long [0f16* 4]+ 0, 0f20, 0f11 ; 24 .long [0f16* 4]+ 0, 0f32, 0f11 ; 25 .long [0f16* 5]+ 0, 0f20, 0f11 ; 26 .long [0f16* 5]+ 0, 0f32, 0f11 ; 27 .long [0f16* 6]+ 0, 0f20, 0f11 ; 28 .long [0f16* 6]+ 0, 0f32, 0f11 ; 29 .long [0f16* 7]+ 0, 0f20, 0f11 ; 30 .long [0f16* 7]+ 0, 0f32, 0f11 ; 31 .long [0f16* 8]+ 0, 0f20, 0f11 ; 32 .long [0f16* 8]+ 0, 0f32, 0f11 ; 33 .long [0f16* 8]+ 8, 0f20, 0f11 ; 34 .long [0f16* 8]+ 8, 0f32, 0f11 ; 35 .long [0f16* 9]+ 8, 0f20, 0f11 ; 36 .long [0f16* 9]+ 8, 0f32, 0f11 ; 37 .long [0f16*10]+ 8, 0f20, 0f11 ; 38 .long [0f16*10]+ 8, 0f32, 0f11 ; 39 .long [0f16*11]+ 8, 0f20, 0f11 ; 40 .long [0f16*11]+ 8, 0f32, 0f11 ; 41 .long [0f16*12]+ 8, 0f20, 0f11 ; 42 .long [0f16*12]+ 8, 0f32, 0f11 ; 43 .long [0f16*13]+ 8, 0f20, 0f11 ; 44 .long [0f16*13]+ 8, 0f32, 0f11 ; 45 .long [0f16*14]+ 8, 0f20, 0f11 ; 46 .long [0f16*14]+ 8, 0f32, 0f11 ; 47 .long [0f16*15]+ 8, 0f20, 0f11 ; 48 .long [0f16*15]+ 8, 0f32, 0f11 ; 49 .long [0f16*16]+ 8, 0f20, 0f11 ; 50 .long [0f16*16]+ 8, 0f32, 0f11 ; 51 .long [0f16*16]+16, 0f20, 0f11 ; 52 .long [0f16*16]+16, 0f32, 0f11 ; 53 .long [0f16*17]+16, 0f20, 0f11 ; 54 .long [0f16*17]+16, 0f32, 0f11 ; 55 .long [0f16*18]+16, 0f20, 0f11 ; 56 .long [0f16*18]+16, 0f32, 0f11 ; 57 .long [0f16*19]+16, 0f20, 0f11 ; 58 .long [0f16*19]+16, 0f32, 0f11 ; 59 .long [0f16*20]+16, 0f20, 0f11 ; 60 .long [0f16*20]+16, 0f32, 0f11 ; 61 .long [0f16*21]+16, 0f20, 0f11 ; 62 .long [0f16*21]+16, 0f32, 0f11 ; 63 .long [0f16*22]+16, 0f20, 0f11 ; 64 .long [0f16*22]+16, 0f32, 0f11 ; 65 .long [0f16*23]+16, 0f20, 0f11 ; 66 .long [0f16*23]+16, 0f32, 0f11 ; 67 .long [0f16*24]+16, 0f20, 0f11 ; 68 .long [0f16*24]+16, 0f32, 0f11 ; 69 .long [0f16*24]+24, 0f20, 0f11 ; 70 .long [0f16*24]+24, 0f32, 0f11 ; 71 .long [0f16*25]+24, 0f20, 0f11 ; 72 .long [0f16*25]+24, 0f32, 0f11 ; 73 .long [0f16*26]+24, 0f20, 0f11 ; 74 .long [0f16*26]+24, 0f32, 0f11 ; 75 .long [0f16*27]+24, 0f20, 0f11 ; 76 .long [0f16*27]+24, 0f32, 0f11 ; 77 .long [0f16*28]+24, 0f20, 0f11 ; 78 .long [0f16*28]+24, 0f32, 0f11 ; 79 .long [0f16*29]+24, 0f20, 0f11 ; 80 .long [0f16*29]+24, 0f32, 0f11 ; 81 .long [0f16*30]+24, 0f20, 0f11 ; 82 .long [0f16*30]+24, 0f32, 0f11 ; 83 .long [0f16*31]+24, 0f20, 0f11 ; 84 .long [0f16*31]+24, 0f32, 0f11 ; 85 .long [0f16*32]+24, 0f20, 0f11 ; 86 .long [0f16*32]+24, 0f32, 0f11 ; 87 ; Coordinate numbers of the various faces' corners, with ; palette numbers and texture numbers. .macro face c1,c1u,c1v,c2,c2u,c2v,c3,c3u,c3v,c4,c4u,c4v,pal,tex .long $(c2),@FLOAT[$(c2u)],@FLOAT[$(c2v)] .long $(c1),@FLOAT[$(c1u)],@FLOAT[$(c1v)] .long $(c3),@FLOAT[$(c3u)],@FLOAT[$(c3v)] .long $(c4),@FLOAT[$(c4u)],@FLOAT[$(c4v)] .long $(pal),$(tex) .endm .align 4 scene_faces: face 0,0,0, 1,1,0, 2,1,1, 3,0,1, 0, 2 1: face 0,0,0, 7,1,0, 6,1,1, 1,0,1, 1, 2 face 0,0,0, 3,1,0, 4,1,1, 7,0,1, 2, 2 face 5,0,0, 6,1,0, 7,1,1, 4,0,1, 0, 3 face 5,0,0, 4,1,0, 3,1,1, 2,0,1, 1, 3 face 5,0,0, 2,1,0, 1,1,1, 6,0,1, 2, 3 face 8,0,0, 10,1,0, 12,1,1, 14,0,1, 0, 2 face 8,0,0, 9,6,0, 11,6,1, 10,0,1, 1, 2 face 8,0,0, 14,1,0, 15,1,6, 9,0,6, 2, 2 face 13,0,0, 11,1,0, 9,1,1, 15,0,1, 0, 3 face 13,0,0, 15,1,0, 14,1,6, 12,0,6, 1, 3 face 13,0,0, 12,6,0, 10,6,1, 11,0,1, 2, 3 face 16,0,0, 17,1,0, 19,1,1, 18,0,1, 0, 0 bits_base_bit = . - 4 bits_base_pal = . - 8 bits_inc = 1b - scene_faces face 18,0,0, 19,1,0, 21,1,1, 20,0,1, 0, 0 face 20,0,0, 21,1,0, 23,1,1, 22,0,1, 0, 0 face 22,0,0, 23,1,0, 25,1,1, 24,0,1, 0, 0 face 24,0,0, 25,1,0, 27,1,1, 26,0,1, 0, 0 face 26,0,0, 27,1,0, 29,1,1, 28,0,1, 0, 0 face 28,0,0, 29,1,0, 31,1,1, 30,0,1, 0, 0 face 30,0,0, 31,1,0, 33,1,1, 32,0,1, 0, 0 face 34,0,0, 35,1,0, 37,1,1, 36,0,1, 0, 0 face 36,0,0, 37,1,0, 39,1,1, 38,0,1, 0, 0 face 38,0,0, 39,1,0, 41,1,1, 40,0,1, 0, 0 face 40,0,0, 41,1,0, 43,1,1, 42,0,1, 0, 0 face 42,0,0, 43,1,0, 45,1,1, 44,0,1, 0, 0 face 44,0,0, 45,1,0, 47,1,1, 46,0,1, 0, 0 face 46,0,0, 47,1,0, 49,1,1, 48,0,1, 0, 0 face 48,0,0, 49,1,0, 51,1,1, 50,0,1, 0, 0 face 52,0,0, 53,1,0, 55,1,1, 54,0,1, 0, 0 face 54,0,0, 55,1,0, 57,1,1, 56,0,1, 0, 0 face 56,0,0, 57,1,0, 59,1,1, 58,0,1, 0, 0 face 58,0,0, 59,1,0, 61,1,1, 60,0,1, 0, 0 face 60,0,0, 61,1,0, 63,1,1, 62,0,1, 0, 0 face 62,0,0, 63,1,0, 65,1,1, 64,0,1, 0, 0 face 64,0,0, 65,1,0, 67,1,1, 66,0,1, 0, 0 face 66,0,0, 67,1,0, 69,1,1, 68,0,1, 0, 0 face 70,0,0, 71,1,0, 73,1,1, 72,0,1, 0, 0 face 72,0,0, 73,1,0, 75,1,1, 74,0,1, 0, 0 face 74,0,0, 75,1,0, 77,1,1, 76,0,1, 0, 0 face 76,0,0, 77,1,0, 79,1,1, 78,0,1, 0, 0 face 78,0,0, 79,1,0, 81,1,1, 80,0,1, 0, 0 face 80,0,0, 81,1,0, 83,1,1, 82,0,1, 0, 0 face 82,0,0, 83,1,0, 85,1,1, 84,0,1, 0, 0 face 84,0,0, 85,1,0, 87,1,1, 86,0,1, 0, 0 n_scene_faces = [. - scene_faces] / [1b - scene_faces] ; A command to be sent to the TA. There are two kinds of ; commands, one 32 bytes and one 64 bytes. We reserve space ; for the larger against future need (we don't currently use ; 64-byte commands). .align 4 ta_cmd: .space 64 ; These palettes are straight from tatest; I've just ; reformatted them from C to assembly. It doesn't say where, ; if anywhere, they came from. They're small enough I haven't ; bothered trying to compress them. .align 4 palette_0: .long 0xff000000,0xff3c3c3c,0xff413c3c,0xff493c3c,0xff4d3838,0xff553838,0xff593434,0xff613434 .long 0xff653030,0xff6d3030,0xff712c2c,0xff792c2c,0xff822828,0xff862828,0xff8e2424,0xff922424 .long 0xff9a2020,0xff9e2020,0xffa61c1c,0xffaa1c1c,0xffb21818,0xffb61818,0xffbe1414,0xffc71414 .long 0xffcb1010,0xffd31010,0xffd70c0c,0xffdf0c0c,0xffe30808,0xffeb0808,0xffef0404,0xfff70404 .long 0xffff0000,0xffff0400,0xffff0c00,0xffff1400,0xffff1c00,0xffff2400,0xffff2c00,0xffff3400 .long 0xffff3c00,0xffff4500,0xffff4d00,0xffff5500,0xffff5d00,0xffff6500,0xffff6d00,0xffff7500 .long 0xffff7d00,0xffff8600,0xffff8e00,0xffff9600,0xffff9e00,0xffffa600,0xffffae00,0xffffb600 .long 0xffffbe00,0xffffc700,0xffffcf00,0xffffd700,0xffffdf00,0xffffe700,0xffffef00,0xfffff700 .long 0xffffff00,0xffffff04,0xffffff0c,0xffffff14,0xffffff1c,0xffffff24,0xffffff2c,0xffffff34 .long 0xffffff3c,0xffffff45,0xffffff4d,0xffffff55,0xffffff5d,0xffffff65,0xffffff6d,0xffffff75 .long 0xffffff7d,0xffffff86,0xffffff8e,0xffffff96,0xffffff9e,0xffffffa6,0xffffffae,0xffffffb6 .long 0xffffffbe,0xffffffc7,0xffffffcf,0xffffffd7,0xffffffdf,0xffffffe7,0xffffffef,0xfffffff7 .long 0xffffffff,0xffffffff,0xfffffbfb,0xfffffbf7,0xfffff7f3,0xfffff7ef,0xfffff3eb,0xfffff3e7 .long 0xffffefe3,0xffffefdf,0xffffebdb,0xffffebd7,0xffffe7d3,0xffffe7cf,0xffffe3cb,0xffffe3c7 .long 0xffffdfc3,0xffffdfbe,0xffffdbba,0xffffdbb6,0xffffd7b2,0xffffd7ae,0xffffd3aa,0xffffd3a6 .long 0xffffcfa2,0xffffcf9e,0xffffcb9a,0xffffcb96,0xffffc792,0xffffc78e,0xffffc38a,0xffffc386 .long 0xffffbe82,0xffffba7d,0xffffba79,0xffffb675,0xffffb671,0xffffb26d,0xffffb269,0xffffae65 .long 0xffffae61,0xffffaa5d,0xffffaa59,0xffffa655,0xffffa651,0xffffa24d,0xffffa249,0xffff9e45 .long 0xffff9e41,0xffff9a3c,0xffff9a38,0xffff9634,0xffff9630,0xffff922c,0xffff9228,0xffff8e24 .long 0xffff8e20,0xffff8a1c,0xffff8a18,0xffff8614,0xffff8610,0xffff820c,0xffff8208,0xffff7d04 .long 0xffff7900,0xffff7900,0xffff7500,0xffff7100,0xffff6d00,0xffff6900,0xffff6500,0xffff6100 .long 0xffff5d00,0xffff5900,0xffff5500,0xffff5100,0xffff4d00,0xffff4900,0xffff4500,0xffff4100 .long 0xffff3c00,0xffff3c00,0xffff3800,0xffff3400,0xffff3000,0xffff2c00,0xffff2800,0xffff2400 .long 0xffff2000,0xffff1c00,0xffff1800,0xffff1400,0xffff1000,0xffff0c00,0xffff0800,0xffff0400 .long 0xffff0000,0xffff0000,0xfffb0000,0xfff70000,0xfff70000,0xfff30000,0xffef0000,0xffeb0000 .long 0xffeb0000,0xffe70000,0xffe30000,0xffe30000,0xffdf0000,0xffdb0000,0xffd70000,0xffd70000 .long 0xffd30000,0xffcf0000,0xffcf0000,0xffcb0000,0xffc70000,0xffc30000,0xffc30000,0xffbe0000 .long 0xffba0000,0xffba0000,0xffb60000,0xffb20000,0xffae0000,0xffae0000,0xffaa0000,0xffa60000 .long 0xffa20000,0xffa20000,0xff9e0404,0xff9a0404,0xff960808,0xff920808,0xff8e0c0c,0xff8e0c0c .long 0xff8a1010,0xff861010,0xff821414,0xff7d1414,0xff791818,0xff791818,0xff751c1c,0xff711c1c .long 0xff6d2020,0xff692020,0xff652424,0xff652424,0xff612828,0xff5d2828,0xff592c2c,0xff552c2c .long 0xff513030,0xff513030,0xff4d3434,0xff493434,0xff453838,0xff413838,0xff3c3c3c,0xff3c3c3c palette_1: .long 0xff000000,0xff000000,0xff000004,0xff00000c,0xff000010,0xff000018,0xff000020,0xff000024 .long 0xff00002c,0xff000030,0xff000038,0xff000041,0xff000045,0xff00004d,0xff000051,0xff000059 .long 0xff000061,0xff000065,0xff00006d,0xff000075,0xff000079,0xff000082,0xff000086,0xff00008e .long 0xff000096,0xff00009a,0xff0000a2,0xff0000a6,0xff0000ae,0xff0000b6,0xff0000ba,0xff0000c3 .long 0xff0000cb,0xff0004cb,0xff000ccb,0xff0010cf,0xff0018cf,0xff001cd3,0xff0024d3,0xff0028d3 .long 0xff0030d7,0xff0038d7,0xff003cdb,0xff0045db,0xff0049db,0xff0051df,0xff0055df,0xff005de3 .long 0xff0065e3,0xff0069e3,0xff0071e7,0xff0075e7,0xff007deb,0xff0082eb,0xff008aeb,0xff008eef .long 0xff0096ef,0xff009ef3,0xff00a2f3,0xff00aaf3,0xff00aef7,0xff00b6f7,0xff00bafb,0xff00c3fb .long 0xff00cbff,0xff04cbff,0xff0ccbff,0xff14cfff,0xff1ccfff,0xff24d3ff,0xff2cd3ff,0xff34d3ff .long 0xff3cd7ff,0xff45d7ff,0xff4ddbff,0xff55dbff,0xff5ddbff,0xff65dfff,0xff6ddfff,0xff75e3ff .long 0xff7de3ff,0xff86e3ff,0xff8ee7ff,0xff96e7ff,0xff9eebff,0xffa6ebff,0xffaeebff,0xffb6efff .long 0xffbeefff,0xffc7f3ff,0xffcff3ff,0xffd7f3ff,0xffdff7ff,0xffe7f7ff,0xffeffbff,0xfff7fbff .long 0xffffffff,0xfffbffff,0xfff7ffff,0xfff3ffff,0xffebffff,0xffe7ffff,0xffe3ffff,0xffdbffff .long 0xffd7ffff,0xffd3ffff,0xffcbffff,0xffc7ffff,0xffc3ffff,0xffbaffff,0xffb6ffff,0xffb2ffff .long 0xffaaffff,0xffa6ffff,0xffa2ffff,0xff9effff,0xff96ffff,0xff92ffff,0xff8effff,0xff86ffff .long 0xff82ffff,0xff7dffff,0xff75ffff,0xff71ffff,0xff6dffff,0xff65ffff,0xff61ffff,0xff5dffff .long 0xff55ffff,0xff51ffff,0xff4dffff,0xff49ffff,0xff41ffff,0xff3cffff,0xff38ffff,0xff30ffff .long 0xff2cffff,0xff28ffff,0xff20ffff,0xff1cffff,0xff18ffff,0xff10ffff,0xff0cffff,0xff08ffff .long 0xff00ffff,0xff00fbff,0xff00f7ff,0xff00f3ff,0xff00ebff,0xff00e7ff,0xff00e3ff,0xff00dbff .long 0xff00d7ff,0xff00d3ff,0xff00cbff,0xff00c7ff,0xff00c3ff,0xff00baff,0xff00b6ff,0xff00b2ff .long 0xff00aaff,0xff00a6ff,0xff00a2ff,0xff009eff,0xff0096ff,0xff0092ff,0xff008eff,0xff0086ff .long 0xff0082ff,0xff007dff,0xff0075ff,0xff0071ff,0xff006dff,0xff0065ff,0xff0061ff,0xff005dff .long 0xff0055ff,0xff0051ff,0xff004dff,0xff0049ff,0xff0041ff,0xff003cff,0xff0038ff,0xff0030ff .long 0xff002cff,0xff0028ff,0xff0020ff,0xff001cff,0xff0018ff,0xff0010ff,0xff000cff,0xff0008ff .long 0xff0000ff,0xff0000fb,0xff0000f7,0xff0000f3,0xff0000ef,0xff0000eb,0xff0000e7,0xff0000e3 .long 0xff0000df,0xff0000db,0xff0000d7,0xff0000d3,0xff0000cf,0xff0000cb,0xff0000c7,0xff0000c3 .long 0xff0000be,0xff0000ba,0xff0000b6,0xff0000b2,0xff0000ae,0xff0000aa,0xff0000a6,0xff0000a2 .long 0xff00009e,0xff00009a,0xff000096,0xff000092,0xff00008e,0xff00008a,0xff000086,0xff000082 .long 0xff00007d,0xff000079,0xff000075,0xff000071,0xff00006d,0xff000069,0xff000065,0xff000061 .long 0xff00005d,0xff000059,0xff000055,0xff000051,0xff00004d,0xff000049,0xff000045,0xff000041 .long 0xff00003c,0xff000038,0xff000034,0xff000030,0xff00002c,0xff000028,0xff000024,0xff000020 .long 0xff00001c,0xff000018,0xff000014,0xff000010,0xff00000c,0xff000008,0xff000000,0xff000000 palette_2: .long 0xff000000,0xff9208e7,0xff9208e3,0xff9608e3,0xff9a04df,0xff9e04df,0xff9e04db,0xffa204db .long 0xffa600d7,0xffaa00d7,0xffaa00d3,0xffae00cf,0xffb200cf,0xffb600cb,0xffb600c7,0xffba00c7 .long 0xffbe00c3,0xffbe00be,0xffc300be,0xffc700ba,0xffc700b6,0xffcb00b6,0xffcf00b2,0xffcf00ae .long 0xffd300aa,0xffd700aa,0xffd700a6,0xffdb04a2,0xffdb049e,0xffdf049e,0xffdf049a,0xffe30896 .long 0xffe30892,0xffe70892,0xffe7088e,0xffeb0c8a,0xffeb0c86,0xffef0c82,0xffef1082,0xffef107d .long 0xfff31479,0xfff31475,0xfff31475,0xfff71871,0xfff7186d,0xfff71c69,0xfffb1c65,0xfffb2065 .long 0xfffb2061,0xfffb245d,0xffff2859,0xffff2859,0xffff2c55,0xffff2c51,0xffff304d,0xffff344d .long 0xffff3449,0xffff3845,0xffff3c45,0xffff3c41,0xffff413c,0xffff453c,0xffff4538,0xffff4934 .long 0xffff4d34,0xffff4d30,0xffff512c,0xffff552c,0xffff5928,0xffff5928,0xfffb5d24,0xfffb6120 .long 0xfffb6520,0xfffb651c,0xfff7691c,0xfff76d18,0xfff77118,0xfff37514,0xfff37514,0xfff37914 .long 0xffef7d10,0xffef8210,0xffef820c,0xffeb860c,0xffeb8a0c,0xffe78e08,0xffe79208,0xffe39208 .long 0xffe39608,0xffdf9a04,0xffdf9e04,0xffdb9e04,0xffdba204,0xffd7a600,0xffd7aa00,0xffd3aa00 .long 0xffcfae00,0xffcfb200,0xffcbb600,0xffc7b600,0xffc7ba00,0xffc3be00,0xffbebe00,0xffbec300 .long 0xffbac700,0xffb6c700,0xffb6cb00,0xffb2cf00,0xffaecf00,0xffaad300,0xffaad700,0xffa6d700 .long 0xffa2db04,0xff9edb04,0xff9edf04,0xff9adf04,0xff96e308,0xff92e308,0xff92e708,0xff8ee708 .long 0xff8aeb0c,0xff86eb0c,0xff82ef0c,0xff82ef10,0xff7def10,0xff79f314,0xff75f314,0xff75f314 .long 0xff71f718,0xff6df718,0xff69f71c,0xff65fb1c,0xff65fb20,0xff61fb20,0xff5dfb24,0xff59ff28 .long 0xff59ff28,0xff55ff2c,0xff51ff2c,0xff4dff30,0xff4dff34,0xff49ff34,0xff45ff38,0xff45ff3c .long 0xff41ff3c,0xff3cff41,0xff3cff45,0xff38ff45,0xff34ff49,0xff34ff4d,0xff30ff4d,0xff2cff51 .long 0xff2cff55,0xff28ff59,0xff28ff59,0xff24fb5d,0xff20fb61,0xff20fb65,0xff1cfb65,0xff1cf769 .long 0xff18f76d,0xff18f771,0xff14f375,0xff14f375,0xff14f379,0xff10ef7d,0xff10ef82,0xff0cef82 .long 0xff0ceb86,0xff0ceb8a,0xff08e78e,0xff08e792,0xff08e392,0xff08e396,0xff04df9a,0xff04df9e .long 0xff04db9e,0xff04dba2,0xff00d7a6,0xff00d7aa,0xff00d3aa,0xff00cfae,0xff00cfb2,0xff00cbb6 .long 0xff00c7b6,0xff00c7ba,0xff00c3be,0xff00bebe,0xff00bec3,0xff00bac7,0xff00b6c7,0xff00b6cb .long 0xff00b2cf,0xff00aecf,0xff00aad3,0xff00aad7,0xff00a6d7,0xff04a2db,0xff049edb,0xff049edf .long 0xff049adf,0xff0896e3,0xff0892e3,0xff0892e7,0xff088ee7,0xff0c8aeb,0xff0c86eb,0xff0c82ef .long 0xff1082ef,0xff107def,0xff1479f3,0xff1475f3,0xff1475f3,0xff1871f7,0xff186df7,0xff1c69f7 .long 0xff1c65fb,0xff2065fb,0xff2061fb,0xff245dfb,0xff2859ff,0xff2859ff,0xff2c55ff,0xff2c51ff .long 0xff304dff,0xff344dff,0xff3449ff,0xff3845ff,0xff3c45ff,0xff3c41ff,0xff413cff,0xff453cff .long 0xff4538ff,0xff4934ff,0xff4d34ff,0xff4d30ff,0xff512cff,0xff552cff,0xff5928ff,0xff5928ff .long 0xff5d24fb,0xff6120fb,0xff6520fb,0xff651cfb,0xff691cf7,0xff6d18f7,0xff7118f7,0xff7514f3 .long 0xff7514f3,0xff7914f3,0xff7d10ef,0xff8210ef,0xff820cef,0xff860ceb,0xff8a0ceb,0xff8e08e7 ; Video initialization parameters. Most of these I don't ; understand; what documentation I have has been saved here as ; comments. The comment "magic" means "meaning unknown". ; ; These lists are taken pretty much directly from tatest, which ; says of them "These values mainly from Dans 3dtest ; program...". ; ; Since these are longwords stores, the offset must always be ; multiples of 4; the terminator is any value which isn't. ; (We use 1, but set_params accepts anything whose low two ; bits are nonzero.) ; .macro param offset, value .word $(offset) .long $(value) .endm .macro endparam .word 1 .endm .align 2 three_d_params: param 0x80a8, 0x15d1c951 ; magic param 0x80a0, 0x00000020 ; magic param 0x8008, 0x00000000 ; TA out of reset param 0x8048, 0x00000009 ; "alpha config" - ? param 0x8068, [X_SIZE<<16]|0 ; pixel clipping x param 0x806c, [Y_SIZE<<16]|0 ; pixel clipping y param 0x8110, 0x00093f39 ; magic param 0x8098, 0x00800408 ; magic param 0x804c, [X_SIZE*2]/8 ; "display align" - ? param 0x8078, 0f1.0 param 0x8084, 0x00000000 ; magic param 0x8030, 0x00000101 ; magic param 0x80b0, 0x007f7f7f ; fog table colour param 0x80b4, 0x007f7f7f ; fog vertex colour param 0x80c0, 0x00000000 ; colour clamp min param 0x80bc, 0xffffffff ; colour clamp max param 0x8080, 0x00000007 ; magic param 0x8074, 0x00000001 ; "cheap shadow" - ? param 0x807c, 0x0027df77 ; magic param 0x8008, 0x00000001 ; TA into reset param 0x8008, 0x00000000 ; TA out of reset param 0x80e4, 0x00000000 ; "stride width" - ? param 0x6884, 0x00000000 ; disable all interrupt enables param 0x6930, 0x00000000 param 0x6938, 0x00000000 param 0x6900, 0xffffffff ; reset all pending interrupts param 0x6908, 0xffffffff param 0x6930, 0x002807ec ; re-enable some events (which?) param 0x6938, 0x0000000e param 0x80b8, 0x0000ff07 ; fog density (meanings?) param 0x80b4, 0x007f7f7f ; fog vertex colour param 0x80b0, 0x007f7f7f ; fog table colour param 0x8108, 0x00000003 ; 32bit palette (?) endparam screen_params: param 0x80e8, 0x00160000 ; screen control (?) param 0x8044, 0x00800000 ; pixel mode ("vb+0x11" - ?) param 0x805c, 0x00000000 ; size modulo and display lines ("vb+0x17" - ?) param 0x80d0, 0x00000100 ; interlace flags (bit meanings?) param 0x80d8, 0x020c0359 ; magic param 0x80cc, 0x001501fe ; magic param 0x80d4, 0x007e0345 ; horizontal border (meaning? - see below) param 0x80dc, 0x00240204 ; vertical position (meaning?) param 0x80e0, 0x07d6c63f ; sync control (meaning?) param 0x80ec, 0x000000a4 ; horizontal position (meaning?) param 0x80f0, 0x00120012 ; vertical border (meanings?) param 0x80c8, 0x03450000 ; "set to same as border H in 80d4" - ? param 0x8068, [X_SIZE-1]<<16 ; (X resolution - 1) << 16 param 0x806c, [Y_SIZE-1]<<16 ; (Y resolution - 1) << 16 param 0x804c, 0x000000a0 ; "display align" - ? param 0x8118, 0x00008040 ; magic param 0x80f4, 0x00000401 ; "anti-aliasing" - ? param 0x8048, 0x00000009 ; "alpha config" - ? param 0x7814, 0x00000000 ; "more interrupt control stuff" - ? param 0x7834, 0x00000000 param 0x7854, 0x00000000 param 0x7874, 0x00000000 param 0x78bc, 0x4659404f param 0x8040, 0x00000000 ; border colour endparam ; "???" here means "not documented in tatest at all" ; The "2" in these is the offset from the beginning of the ; param to the place where we store the (longword) value. cmdlist_params: param 0x8008, 0x00000001 ; TA into reset param 0x8008, 0x00000000 ; TA out of reset cmdlist_param_tilebuf_a = 2 + . - cmdlist_params param 0x8124, 0 param 0x812c, 0 ; ??? cmdlist_param_cmdlist = 2 + . - cmdlist_params param 0x8128, 0 param 0x8130, 0 ; ??? param 0x813c, [[[Y_SIZE/32]-1]<<16] | [[X_SIZE/32]-1] cmdlist_param_tilebuf_b = 2 + . - cmdlist_params param 0x8164, 0 param 0x8140, 0x00100002 ; ??? param 0x8144, 0x80000000 ; confirm settings endparam ; Texture twiddling table. Why "twiddle"? That's the term ; used in tatest's comments. It appears to be interleaving ; the bits of the numbers that form texture coordinates, so ; that the texels conceptually at (x,y) and (x+1,y), where ; x=ABCDEFG0 and y=abcdefgh (say), are stored at offsets ; aAbBcCdDeEfFgGh0 (x) and aAbBcCdDeEfFgGh1 (x+1). ; ; Why do it? Because, in the words of another tatest comment, ; "palette based textures can not be non-twiddled". Why ; design hardware that way? MC, in email, passed along an ; explanation from someone who worked on the hardware, saying ; that twiddled textures provide higher performance, so the ; designers figured the only reason to use non-twiddled ; textures was to use a rendered frame as a texture (for, eg, ; reflections). Since the renderer output is always ; true-colour, that's all they implemented. (The ; "non-twiddled" bit got reused for a different meaning for ; palette-based textures.) ; ; tatest generates a 1024-entry table. We reserve (and set up) ; that much space, but as of this writing use only 256 entries ; of it. ; ; One possible note to beware of is that this may not apply to ; the large dimension of non-square textures. Done naïvely, ; doing this for non-square textures could use excessive ; amounts of memory; it would appear, for example, that an ; 8x256 texture would take up almost as much memory space as a ; 128x256 one (because of all the gaps between the address ; bits). But it may be smarter than that; when I mentioned ; that in mail to MC, he said he had a fuzzy memory that the ; high bits of non-square textures aren't twiddled, that, eg, ; an 8x256 texture in memory consists of 32 consecutive 8x8 ; (twiddled) blocks. But he also warned that memory could be ; wrong, so test this before depending on it. ; .align 2 twiddles: .space 1024*2 ; Current double-buffering buffer number. Always 0 or 1. curbuf: .space 1 ; When set, this causes printing of debugging info, but for ; only one cycle; it's cleared when the info is printed. debug: .byte 0 ; Character bitmaps. These are used to set up texture_font_0 ; and texture_font_1. These are non-twiddled versions; they ; also contain 0 and 1, one bit per texel, whereas the ; versions in video RAM contain 0 and 96, one byte per texel. ; (0 and 96 because those are where the palettes keep the ; colours we use for font characters.) .macro fontrow a,b,c,d,e,f,g,h .byte [$(a)<<0] | [$(b)<<1] | [$(c)<<2] | [$(d)<<3] | [$(e)<<4] | [$(f)<<5] | [$(g)<<6] | [$(h)<<7] .endm init_font_0: fontrow 0,0,1,1,1,0,0,0 fontrow 0,1,0,0,0,1,0,0 fontrow 0,1,0,0,0,1,0,0 fontrow 0,1,0,0,0,1,0,0 fontrow 0,1,0,0,0,1,0,0 fontrow 0,1,0,0,0,1,0,0 fontrow 0,1,0,0,0,1,0,0 fontrow 0,0,1,1,1,0,0,0 init_font_1: fontrow 0,0,0,1,0,0,0,0 fontrow 0,0,1,1,0,0,0,0 fontrow 0,0,0,1,0,0,0,0 fontrow 0,0,0,1,0,0,0,0 fontrow 0,0,0,1,0,0,0,0 fontrow 0,0,0,1,0,0,0,0 fontrow 0,0,0,1,0,0,0,0 fontrow 0,0,1,1,1,0,0,0 texture_bit_cursor: .byte 0 .align 2 main: ; The only things startup.s sets up that cdcode hasn't already done for ; us are (1) fpscr and (2) clearing bss. We don't have bss because we ; aren't linked by a conventional linker. FPSCR needs setup too. So ; does the VBR. Make sure FD, RB, and BL are clear in the SR. We ; don't need to copy r10-r15, even for the sake of returning to ; cdcode, because only r0-r7 are banked. We save r10-r14 on the stack ; so that we can use them; they matter only on return to cdcode, which ; happens only controlledly. mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 ldc r14,gbr stc sr,r1 SETS.L #~[SR_FD|SR_RB|SR_BL],r2 and r2,r1 ldc r1,sr ; Note that r0-r7 may have just changed if we switched banks. mov #0,r1 lds r1,fpscr .sz 0 .pr 0 SETS.L #intvec,r0 ldc r0,vbr ; Real code begins here. bsr clear_vram nop bsr init_maple nop bsr init_powervr nop bsr init_video nop bsr init_palette nop bsr init_twiddling nop bsr init_textures nop bsr init_tiledesc nop bsr init_3dvalues nop 1: bsr one_frame nop bsr nbgetchar nop cmp/pz r0 bf 1b cmp/eq #'d,r0 bt setdebug done: bsr putchar mov #13,r1 bsr putchar mov #10,r1 ; Turn SR.BL (back) on before returning to cdcode. stc sr,r1 SETS.L #SR_BL,r2 or r2,r1 ldc r1,sr mov.l @r15+,r10 mov.l @r15+,r11 mov.l @r15+,r12 mov.l @r15+,r13 lds r11,pr rts mov.l @r15+,r14 setdebug: SETS.L #debug,r1 SETS.L #1,r0 bra 1b mov.b r0,@r1 clear_vram: SETS.L #QACR0,r1 SETS.L #QACR1,r2 SETS.L #[[VRAM_BASE_64>>26]&7]<<2,r3 SETS.L #STOREQ_BASE+[4*16],r4 SETS.L #0,r5 mov.l r3,@r1 mov.l r3,@r2 SETS.L #16,r0 1: dt r0 bf/s 1b mov.l r5,@-r4 SETS.L #VRAM_SIZE/32,r1 SETS.L #[VRAM_BASE_64&0x03ffffc0]|0xe0000000,r2 1: pref @r2 dt r1 bf/s 1b add #32,r2 mov.l r5,@r4 add #4*16,r4 rts mov.l r5,@r4 set_params: .if debug_set_params sts.l pr,@-r15 .endif ; r1 points to params table SETS.L #VIDREG_BASE,r2 1: mov.w @r1+,r0 tst #3,r0 bf/s 1f extu.w r0,r0 mov.w @r1+,r3 mov.w @r1+,r4 SHLL #16,r4 extu.w r3,r3 or r3,r4 add r2,r0 .if debug_set_params mov.l r0,@-r15 mov.l r1,@-r15 sts.l pr,@-r15 bsr putchar mov #'*,r1 bsr printhex8 mov.l @(8,r15),r1 bsr putchar mov #'=,r1 bsr printhex8 mov r4,r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 lds.l @r15+,pr mov.l @r15+,r1 mov.l @r15+,r0 .endif bra 1b mov.l r4,@r0 1: .if debug_set_params lds.l @r15+,pr .endif rts nop init_maple: mova 9f,r0 1: mov.l @r0+,r1 tst r1,r1 bt 1f mov.l @r0+,r2 bra 1b mov.l r2,@r1 1: rts nop .align 4 9: .long BUS_RESET, BUS_RESET_VALUE .long BUS_RESET2, BUS_RESET2_VALUE .long BUS_SPEED, SPEED_2MBPS|[50000<>1)] = ; compute_texture(i, j, 0) | (compute_texture(i, j+1, 0)<<8); ; /* Texture 1 = Julia */ ; tex[1][twiddletab[i]|(twiddletab[j]>>1)] = ; compute_texture(i, j, 1) | (compute_texture(i, j+1, 1)<<8); ; } ; ; We change some names (eg, compute_texture_a and compute_texture_b ; rather than a third arg to compute_texture), but it's otherwise ; pretty similar. We keep a lot of stuff on the stack rather than in ; registers; while we might have enough registers, this means I don't ; have to think about register allocation as much. It also means the ; texture computation functions have a much freer hand with registers. ; ; Arguably we should write these through 0x84000000 and then flush the ; d$, but this is initialization code and hence uncached performance ; is acceptable here. ; init_textures: sts.l pr,@-r15 ; Cube-face textures. SETS.L #twiddles,r7 mov.l r7,@-r15 SETS.L #texture_cubeface_1,r8 SETS.L #texture_cubeface_2,r6 mov.l r6,@-r15 mov.l r8,@-r15 mov #0,r0 mov.l r0,@-r15 2: mov #0,r0 mov.l r0,@-r15 ; stack = x y tex0 tex1 twiddles 1: mov.l @r15,r1 ; x bsr compute_texture_a mov.l @(4,r15),r2 ; y mov.l r0,@-r15 ; valA(x,y) mov.l @(4,r15),r1 ; x mov.l @(8,r15),r2 ; y bsr compute_texture_a add #1,r1 ; r0=valA(x+1,y) mov.l @r15+,r1 ; valA(x,y) SHLL #8,r0 or r1,r0 ; combined vals mov.l r0,@-r15 mov.l @(4,r15),r1 ; x bsr compute_texture_b mov.l @(8,r15),r2 ; y mov.l r0,@-r15 ; valB(x,y) mov.l @(8,r15),r1 ; x mov.l @(12,r15),r2 ; y bsr compute_texture_b add #1,r1 ; r0=valB(x+1,y) mov.l @r15+,r1 ; valB(x,y) SHLL #8,r0 or r1,r0 ; combined vals mov.l r0,@-r15 ; stack = valsB valsA x y tex0 tex1 twiddles mov.l @(24,r15),r2 ; twiddles mov.l @(8,r15),r1 ; x SHLL #1,r1 add r2,r1 mov.w @r1,r1 mov.l @(12,r15),r3 ; y SHLL #1,r3 add r2,r3 mov.w @r3,r3 SHLL #1,r3 or r1,r3 ; r3 now holds twiddled texture offset mov.l @(16,r15),r2 ; tex0 add r3,r2 .if debug_texture bsr printhex8 mov.l @(8,r15),r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @(12,r15),r1 bsr putchar2 mov #' ,r1 bsr printhex8 mov r2,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @(4,r15),r1 bsr putchar2 mov #' ,r1 .endif mov.l @(4,r15),r0 ; valA mov.w r0,@r2 mov.l @(20,r15),r2 ; tex1 add r3,r2 .if debug_texture bsr printhex8 mov r2,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r15,r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 .endif mov.l @r15,r0 ; valB mov.w r0,@r2 add #8,r15 ; pop valA, valB SETS.L #256,r1 mov.l @r15,r0 ; x add #2,r0 cmp/hs r1,r0 bf/s 1b mov.l r0,@r15 add #4,r15 ; pop x mov.l @r15,r0 ; y add #1,r0 cmp/hs r1,r0 bf/s 2b mov.l r0,@r15 add #16,r15 ; pop remaining ; Font textures. SETS.L #font_setup,r9 SETS.L #n_font_setup,r8 SETS.L #twiddles,r7 5: mov.l @r9+,r1 ; init data mov.l @r9+,r2 ; texture area SETS.L #8,r3 ; x 4: SETS.L #8,r4 ; y 3: mov r1,r0 add #-1,r0 mov.b @(r0,r4),r0 SHLL #2,r0 neg r3,r5 shld r5,r0 tst #1,r0 bt 1f bra 2f mov #96,r5 1: mov #0,r5 2: tst #2,r0 bt 1f bra 2f mov #96,r0 1: mov #0,r0 2: ; r0 holds (x-1,y-1); r5 holds (x-2,y-1) SHLL #8,r0 or r0,r5 ; r5 now holds combined (x-2,y-1) and (x-1,y-1) values mov r3,r0 add #-2,r0 SHLL #1,r0 mov.w @(r0,r7),r6 ; twiddled x-2 mov r4,r0 add #-1,r0 SHLL #1,r0 mov.w @(r0,r7),r0 ; twiddled y-1 SHLL #1,r0 or r6,r0 dt r4 bf/s 3b mov.w r5,@(r0,r2) dt r3 dt r3 bf 4b dt r8 bf 5b lds.l @r15+,pr rts nop ; Texture A is diagonal stripes; texture B is concentric circles ; centred on (0,80). compute_texture_a: ; return(255&(x+y)) add r2,r1 rts extu.b r1,r0 compute_texture_b: ; return(255&(int)hypot(x,y-80)) lds r1,fpul float fpul,fr0 add #-80,r2 lds r2,fpul float fpul,fr1 fmul fr0,fr0 fmul fr1,fr1 fadd fr1,fr0 fsqrt fr0 ftrc fr0,fpul sts fpul,r0 rts extu.b r0,r0 SETCONST init_tiledesc: sts.l pr,@-r15 SETS.L #tiledesc_cookies,r4 SETS.L #tilebuffers,r5 SETS.L #tiledescs,r6 mov.l r4,@-r15 mov.l @(4,r5),r0 mov.l r0,@-r15 mov.l @(4,r6),r0 mov.l r0,@-r15 mov.l @r6,r2 bsr setup_tiledesc mov.l @r5,r3 mov.l @(8,r15),r4 mov.l r0,@r4 mov.l @r15+,r2 bsr setup_tiledesc mov.l @r15+,r3 mov.l @r15+,r4 mov.l r0,@(4,r4) SETS.L #curbuf,r1 mov #0,r0 lds.l @r15+,pr rts mov.b r0,@r1 setup_tiledesc: ; in tatest terms, this is ta_create_tile_descriptors. ptr is ; r2, buf is r3, w is X_SIZE/32, and h is Y_SIZE/32. No ; registers r0-r9 are important upon return; they all are ; available to us. ; vr = ptr mov r2,r4 ; vr is r4 ; bf = ((unsigned int)buf)&0x007fffff (buf is dead after this) SETS.L #0x007fffff,r0 and r0,r3 ; bf is r3 from here on ; strbase = (((unsigned int)ptr)&0x007fffff)|0x80000000 ; ptr is _not_ dead here, but 0x007fffff is. SETS.L #0x80000000,r7 and r2,r0 or r0,r7 ; strbase is r7 ; for (18 loops) *vr++ = 0 mov #18,r1 mov #0,r0 1: mov.l r0,@r4 dt r1 bf/s 1b add #4,r4 ; *vr++ = 0x10000000 ; *vr++ = 0x80000000 (five times) SETS.L #0x10000000,r1 mov.l r1,@r4 SETS.L #0x80000000,r1 mov.l r1,@(4,r4) mov.l r1,@(8,r4) mov.l r1,@(12,r4) mov.l r1,@(16,r4) mov.l r1,@(20,r4) add #24,r4 SETS.L #X_SIZE/32,r8 ; w is r8 SETS.L #Y_SIZE/32,r9 ; h is r9 ; for (x=0;x " .align 2 setup_cmd_list: ; In tatest terms, this is ta_set_target, but with args ; computed here based on curbuf rather than being passed in. sts.l pr,@-r15 SETS.L #curbuf,r1 mov.b @r1,r1 SHLL #2,r1 SETS.L #cmdlists,r2 SETS.L #tilebuffers,r3 add r1,r2 mov.l @r2,r2 add r1,r3 mov.l @r3,r3 SETS.L #0x007fffff,r4 and r4,r2 and r4,r3 swap.w r2,r4 swap.w r3,r5 SETS.L #cmdlist_params,r0 SETS.L #cmdlist_param_tilebuf_a,r1 mov.w r3,@(r0,r1) add #2,r1 mov.w r5,@(r0,r1) SETS.L #cmdlist_param_tilebuf_b,r1 mov.w r3,@(r0,r1) add #2,r1 mov.w r5,@(r0,r1) SETS.L #cmdlist_param_cmdlist,r1 mov.w r2,@(r0,r1) add #2,r1 mov.w r4,@(r0,r1) bsr set_params mov r0,r1 SETS.L #VIDREG_BASE+0x8144,r0 mov.l @r0,r0 lds.l @r15+,pr rts nop setup_bits: SETS.L #bits_base_bit,r1 SETS.L #bits_base_pal,r2 SETS.L #bits_inc,r3 SETS.L #cur_texture_mode,r4 mov.l @r4,r4 SETS.L #32,r5 SETS.L #2,r6 SETS.L #bits_inc,r7 SETS.L #0,r8 1: shll r4 movt r0 mov.l r0,@r1 mov.l r6,@r2 add r3,r1 dt r5 bf/s 1b add r3,r2 SETS.L #texture_bit_cursor,r5 mov.b @r5,r0 mul.l r0,r7 sts macl,r0 SETS.L #bits_base_pal,r2 add r2,r0 rts mov.l r8,@r0 draw_scene: sts.l pr,@-r15 SETS.L #scene_faces,r9 SETS.L #n_scene_faces,r8 SETS.L #ta_cmd,r7 SETS.L #0,r6 SETS.L #0f1,r5 SETS.L #xform_coords,r4 SETS.L #3*4,r3 1: SETS.L #TA_CMD_POLYGON|TA_CMD_POLYGON_TYPE_OPAQUE|TA_CMD_POLYGON_SUBLIST|TA_CMD_POLYGON_STRIPLENGTH_2|TA_CMD_POLYGON_TEXTURED,r0 mov.l r0,@r7 ; cmd SETS.L #TA_POLYMODE1_Z_GREATER|TA_POLYMODE1_CULL_CCW,r0 mov.l r0,@(4,r7) ; mode1 SETS.L #TA_TEXTUREMODE_CLUT8,r1 mov.l @(48,r9),r0 ; palette number SHLL #TA_TEXTUREMODE_CLUTBANK8_SHIFT,r0,r2 or r0,r1 mov.l @(52,r9),r0 ; texture number SETS.L #textures,r2 SHLL #3,r0 add r0,r2 mov.l @r2,r0 ; texture pointer mov.l @(4,r2),r2 ; size bits SETS.L #cur_texture_mode,r10 mov.l @r10,r10 or r2,r10 mov.l r10,@(8,r7) ; mode2 SHXR #TA_TEXTUREMODE_ADDRESS_SHIFT,r0 SETS.L #TA_TEXTUREMODE_ADDRESS_MASK,r2 and r2,r0 or r0,r1 mov.l r1,@(12,r7) ; texture mov.l r6,@(16,r7) ; alpha mov.l r6,@(20,r7) ; red mov.l r6,@(24,r7) ; green bsr commit_ta_cmd mov.l r6,@(28,r7) ; blue SETS.L #TA_CMD_VERTEX,r1 mov.l r1,@r7 ; cmd mov.l r6,@(28,r7) ; ocolour not r6,r1 mov.l r1,@(24,r7) ; colour mov.l @r9,r1 mulu.w r1,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(4,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(8,r9),r0 bsr commit_ta_cmd mov.l r0,@(20,r7) ; v mov.l @(12,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(16,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(20,r9),r0 bsr commit_ta_cmd mov.l r0,@(20,r7) ; v mov.l @(24,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(28,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(32,r9),r0 bsr commit_ta_cmd mov.l r0,@(20,r7) ; v mov.l @(36,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(40,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(44,r9),r0 mov.l r0,@(20,r7) ; v SETS.L #TA_CMD_VERTEX|TA_CMD_VERTEX_EOS,r1 bsr commit_ta_cmd mov.l r1,@r7 ; cmd dt r8 bf/s 1b add #56,r9 ; making this a loop saves only one instruction and adds time. mov.l r6,@r7 mov.l r6,@(4,r7) mov.l r6,@(8,r7) mov.l r6,@(12,r7) mov.l r6,@(16,r7) mov.l r6,@(20,r7) mov.l r6,@(24,r7) bsr commit_ta_cmd mov.l r6,@(28,r7) lds.l @r15+,pr rts nop commit_ta_cmd: ; In tatest terms, this is ta_commit_list(), with the argument ; always being ta_cmd. .if debug_ta_commit sts.l pr,@-r15 bsr putchar2 mov #'(,r1 .endif SETS.L #QACR0,r1 SETS.L #STOREQ_BASE,r14 SETS.L #[[TA_CMD_BASE>>26]&7]<<2,r13 SETS.L #ta_cmd,r12 SETS.L #8,r11 .if debug_ta_commit mov.l r1,@-r15 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 add #-8*4,r12 mov.l @r15+,r1 .endif mov.l r13,@r1 mov r14,r10 1: mov.l @r12+,r0 dt r11 mov.l r0,@r14 bf/s 1b add #4,r14 .if debug_ta_commit pref @r10 bsr putchar2 mov #'),r1 lds.l @r15+,pr rts nop .else rts pref @r10 .endif handle_maple: SETS.L #BUS_STATE,r3 1: mov.l @r3,r0 tst #BUS_STATE_RUNNING,r0 bf 1b SETS.L #maple_resp,r0 ; We ocbi only one cache line, because the parts of the ; response we care about fit in a single cache line. The ; hardware's alignment requirements for maple buffers match ; cache line alignments, and we access only 8 bytes of it at ; low offsets. ; ; We arguably should ocbi the line back just before we kick off ; the maple operation rather than waiting until here. Since ; we never write to this cache line, the only difference I see ; is whether it sits around in the cache in the interim. This ; might conceivably affect something, but even if it does I ; have trouble seeing the difference being more than one cache ; line fill penalty. ocbi @r0 mov.l @(8,r0),r1 mov.l @(12,r0),r2 SETS.L #curistate,r0 mov.l r1,@r0 mov.l r2,@(4,r0) rts nop await_video: ; In tatest terms, this is everything in the main loop after ; the call to ta_commit_end(). ; ta_wait_render() SETS.L #TA_RENDER_EVENT,r1 SETS.L #TA_RENDER_BIT,r2 1: mov.l @r1,r0 tst r2,r0 bt 1b mov.l r2,@r1 ; wait_bovp() SETS.L #VBLANK_REG,r1 SETS.L #VBLANK_VBIT,r2 mov.l r2,@r1 1: mov.l @r1,r0 tst r2,r0 bt 1b rts mov.l r2,@r1 next_frame: .if debug_start_render sts.l pr,@-r15 .endif ; Switch to the previously-rendered screen SETS.L #curbuf,r10 SETS.L #render_buf,r11 mov.b @r10,r0 SHLL #2,r0 mov.l @(r0,r11),r1 SETS.L #0x007fffff,r12 SETS.L #DISPLAY_VRAM,r3 and r12,r1 mov.l r1,@r3 SETS.L #SHORT_FRAME_OFFSET,r0 add r0,r1 mov.l r1,@(4,r3) ; Kick off rendering to the screen we just stopped displaying ; In tatest terms, this is ta_begin_render. mov.b @r10,r0 ; curbuf SETS.L #cmdlists,r1 SHLL #2,r0 SETS.L #tiledesc_cookies,r2 mov.l @(r0,r1),r1 ; cmdlist mov.l @(r0,r2),r2 ; tiles xor #4,r0 mov.l @(r0,r11),r3 ; scrn SETS.L #VIDREG_BASE+0x8138,r4 SETS.L #0x12,r5 SETS.L #0,r6 mov.l @r4,r4 SETS.L #VRAM_BASE_32,r0 or r0,r4 ; taend 1: mov.l r6,@r4 dt r5 bf/s 1b add #4,r4 add #-0x12*4,r4 ; We could use set_params here, but between the number of ; values to store and the need to break longs into two words, ; it's less pain to do it this way. ; ; Do we have to do all these in exactly this order? I suspect ; not, but, absent documentation, it's hard to tell how much ; deviation is OK. We stick strictly to tatest's order. SETS.L #VIDREG_BASE+0x802c,r5 and r12,r2 .if debug_start_render bsr 9f mov r2,r0 .endif mov.l r2,@r5 ; 0xa05f802c add #0x8020-0x802c,r5 mov r1,r0 and r12,r0 .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8020 add #0x8060-0x8020,r5 and r12,r3 .if debug_start_render bsr 9f mov r3,r0 .endif mov.l r3,@r5 ; 0xa05f8060 add #0x808c-0x8060,r5 sub r1,r4 SHLL #1,r4 SETS.L #0x01000000,r0 or r4,r0 .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f808c add #0x8088-0x808c,r5 SETS.L #0x3e4cccc0,r0 ; tatest says "zclip" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8088 add #0x8068-0x8088,r5 SETS.L #[X_SIZE-1]<<16,r0 ; tatest calls it "clipw" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8068 add #0x806c-0x8068,r5 SETS.L #[Y_SIZE-1]<<16,r0 ; tatest calls it "cliph" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f806c add #0x804c-0x806c,r5 SETS.L #[X_SIZE*2]>>3,r0 ; tatest calls it "modulo" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f804c add #0x8048-0x804c,r5 SETS.L #TA_PIXFMT_RGB565|TA_PIXFMT_DITHER,r0 ; tatest calls it "pixfmt" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8048 add #0x8014-0x8048,r5 SETS.L #0xffffffff,r0 ; tatest says "Launch!" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8014 ; curbuf = ! curbuf mov.b @r10,r0 tst r0,r0 bt/s 1f add #1,r0 mov #0,r0 1: .if debug_start_render lds.l @r15+,pr .endif rts mov.b r0,@r10 .if debug_start_render 9: ; about to mov.l r0,@r5; print it ; must preserve all input registers except pr mov.l r0,@-r15 mov.l r1,@-r15 sts.l pr,@-r15 bsr printhex8 mov r5,r1 bsr putchar mov #'=,r1 bsr printhex8 mov.l @(8,r15),r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 lds.l @r15+,pr mov.l @r15+,r1 rts mov.l @r15+,r0 .endif SETCONST ; computes (fr3,fr4,fr5) × (fr0,fr1,fr2) -> (fr0,fr1,fr2) ; uses fr6 as temporary; destroys fr3/fr4/fr5 inputs too ; ( (fr4*fr2)-(fr5*fr1) , (fr5*fr0)-(fr3*fr2) , (fr3*fr1)-(fr4*fr0) ) ; A B C D E F G H I crossproduct: fmov fr0,fr6 fmul fr5,fr6 ; D fmul fr1,fr5 ; C, input fr1 now dead fmul fr3,fr1 ; G, input fr3 now dead fmul fr2,fr3 ; F, input fr2 now dead fmul fr4,fr2 ; A, input fr4 now dead fmul fr0,fr4 ; I, input fr0 and fr5 now dead fmov fr2,fr0 ; A, temporary fr2 now dead fsub fr5,fr0 ; B, A and C now dead fmov fr1,fr2 ; G, temporary fr1 now dead fsub fr4,fr2 ; H, G and I now dead fmov fr6,fr1 ; D, temporary fr6 now dead rts fsub fr3,fr1 ; E, D and F now dead ; Rotate (fr0,fr1,fr2) by fpul fsca units around axis (fr4,fr5,fr6). ; The axis vector must be normalized already. ; Output in (fr0,fr1,fr2). ; Preserves fr4-fr6, fr13-fr15, fpul, all CPU registers. ; Destroys fr3, fr7-fr12. ; Let s = sin(fpul), c = cos(fpul); output in terms of input is ; ; fr0 = (fr0 * ((fr4 * fr4 * (1-c)) + c)) + A ; (fr1 * ((fr4 * fr5 * (1-c)) - (fr6 * s))) + B ; (fr2 * ((fr4 * fr6 * (1-c)) + (fr5 * s))) C ; ; fr1 = (fr0 * ((fr5 * fr4 * (1-c)) + (fr6 * s))) + D ; (fr1 * ((fr5 * fr5 * (1-c)) + c)) + E ; (fr2 * ((fr5 * fr6 * (1-c)) - (fr4 * s))) F ; ; fr2 = (fr0 * ((fr6 * fr4 * (1-c)) - (fr5 * s))) + G ; (fr1 * ((fr6 * fr5 * (1-c)) + (fr4 * s))) + H ; (fr2 * ((fr6 * fr6 * (1-c)) + c)) I rotate_around_axis: fsca fpul,fr8 ; fr8 = s, fr9 = c fldi1 fr3 fsub fr9,fr3 ; fr3 = 1-c fmov fr4,fr7 ; fr4 fmul fr4,fr7 ; fr4 * fr4 fmul fr3,fr7 ; fr4 * fr4 * (1-c) fadd fr9,fr7 ; (fr4 * fr4 * (1-c)) + c fmul fr0,fr7 ; A fmov fr4,fr10 ; fr4 fmul fr5,fr10 ; fr4 * fr5 fmul fr3,fr10 ; fr4 * fr5 * (1-c) fmov fr6,fr11 ; fr6 fmul fr8,fr11 ; fr6 * s fsub fr11,fr10 ; (fr4 * fr5 * (1-c)) - (fr6 * s) fmul fr1,fr10 ; B fadd fr10,fr7 ; A + B fmov fr4,fr12 ; fr4 fmul fr6,fr12 ; fr4 * fr6 fmul fr3,fr12 ; fr4 * fr6 * (1-c) fmov fr5,fr11 ; fr5 fmul fr8,fr11 ; fr5 * s fadd fr11,fr12 ; (fr4 * fr6 * (1-c)) + (fr5 * s) fmul fr2,fr12 ; C fadd fr7,fr12 ; output fr0 fmov fr5,fr7 ; fr5 fmul fr4,fr7 ; fr5 * fr4 fmul fr3,fr7 ; fr5 * fr4 * (1-c) fmov fr6,fr10 ; fr6 fmul fr8,fr10 ; fr6 * s fadd fr10,fr7 ; (fr5 * fr4 * (1-c)) + (fr6 * s) fmul fr0,fr7 ; D fmov fr5,fr11 ; fr5 fmul fr6,fr11 ; fr5 * fr6 fmul fr3,fr11 ; fr5 * fr6 * (1-c) fmov fr4,fr10 ; fr4 fmul fr8,fr10 ; fr4 * s ; This is our point of maximum register use. ; We have the following, all live, at this point: ; fr0,fr1,fr2 = input values ; fr3 = 1-c ; fr4,fr5,fr6,fpul = input values to be preserved ; fr7 = D ; fr8 = s ; fr9 = c ; fr10 = fr4 * s ; fr11 = fr5 * fr6 * (1-c) ; fr12 = output fr0 fsub fr10,fr11 ; (fr5 * fr6 * (1-c)) - (fr4 * s) fmul fr2,fr11 ; F fadd fr7,fr11 ; D + F fmov fr5,fr10 ; fr5 fmul fr5,fr10 ; fr5 * fr5 fmul fr3,fr10 ; fr5 * fr5 * (1-c) fadd fr9,fr10 ; (fr5 * fr5 * (1-c)) + c fmul fr1,fr10 ; E fadd fr10,fr11 ; output fr1 fmov fr6,fr7 ; fr6 fmul fr6,fr7 ; fr6 * fr6 fmul fr3,fr7 ; fr6 * fr6 * (1-c) fadd fr9,fr7 ; (fr6 * fr6 * (1-c)) + c [fr9 dead] fmul fr7,fr2 ; I [fr2 dead] fmov fr6,fr7 ; fr6 fmul fr5,fr7 ; fr6 * fr5 fmul fr3,fr7 ; fr6 * fr5 * (1-c) fmov fr4,fr10 ; fr4 fmul fr8,fr10 ; fr4 * s fadd fr10,fr7 ; (fr6 * fr5 * (1-c)) + (fr4 * s) fmul fr1,fr7 ; H [fr1 dead] fadd fr7,fr2 ; H + I fmov fr6,fr7 ; fr6 fmul fr4,fr7 ; fr6 * fr4 fmul fr3,fr7 ; fr6 * fr4 * (1-c) [fr3 dead] fmul fr5,fr8 ; fr5 * s [fr8 dead] fsub fr8,fr7 ; (fr6 * fr4 * (1-c)) - (fr5 * s) fmul fr0,fr7 ; G [fr0 dead] fadd fr7,fr2 ; output fr2 fmov fr11,fr1 ; output fr1 rts fmov fr12,fr0 ; output fr0 ; Modifies (fr0,fr1,fr2) by subtracting off the component in the ; direction of (fr8,fr9,fr10). ; (fr8,fr9,fr10) must be normalized already. ; Output in (fr0,fr1,fr2). ; Preserves fr4-fr6, fr8-fr15, fpul, all CPU registers. ; Destroys fr3, fr7. ; Output in terms of input is ; ; Let dp = (fr0 * fr8) + (fr1 * fr9) + (fr2 * fr10) ; ; fr0 = fr0 - (dp * fr8) ; fr1 = fr1 - (dp * fr9) ; fr2 = fr2 - (dp * fr10) subtract_component: fldi0 fr3 fipr fv8,fv0 fmov fr3,fr7 fmul fr8,fr7 fsub fr7,fr0 fmov fr3,fr7 fmul fr9,fr7 fsub fr7,fr1 fmov fr3,fr7 fmul fr10,fr7 rts fsub fr7,fr2 ; Normalize the vector in (fr0,fr1,fr2). ; Output in (fr0,fr1,fr2). ; Preserves fr4-fr15, fpul, all integer registers. ; Destroys fr3. normalize: fldi0 fr3 fipr fv0,fv0 fsrra fr3 fmul fr3,fr0 fmul fr3,fr1 rts fmul fr3,fr2 printhex8: mov #8,r0 printhexN: mov.l r4,@-r15 mov r0,r4 add #-8,r0 neg r0,r0 SHLL #2,r0 shld r0,r1 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 mova 9f,r0 mov r0,r3 mov r1,r2 1: mov r2,r0 SHLR #28,r0,r1 SHLL #4,r2 add r3,r0 bsr putchar mov.b @r0,r1 dt r4 bf 1b lds.l @r15+,pr mov.l @r15+,r2 mov.l @r15+,r3 rts mov.l @r15+,r4 .align 4 9: .ascii "0123456789abcdef" .align 2 putchar2: sts.l pr,@-r15 bsr putchar mov.l r1,@-r15 mov.l @r15+,r1 lds.l @r15+,pr putchar: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov r1,r0 mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 tst #SCFDR2_TX_MASK,r0 bf 1b rts nop putstr: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov.b @r1+,r0 tst r0,r0 bt 1f bra 1b mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: ; don't bother waiting for drain here; we do a putchar call, ; which will drain everything, after all putstr calls and ; before anything for which it matters. rts nop print_float: ; float in r1 ; uses r0, r1, r2, fr0, fr1, fr2, fpul sts.l pr,@-r15 ; check for negative; if so, print - and negate lds r1,fpul fsts fpul,fr0 fldi0 fr1 fcmp/gt fr0,fr1 bf 1f bsr putchar mov #'-,r1 fneg fr0 1: ; divide by 10 until it's less than 10, and keep count mov #10,r0 lds r0,fpul float fpul,fr1 mov #0,r2 1: fcmp/gt fr0,fr1 bt 1f fdiv fr1,fr0 bra 1b add #1,r2 1: ; now fr0 < 10 and r2 is the number of divisions we did ; print the first (possibly only) digit before the . ftrc fr0,fpul sts fpul,r1 bsr putchar add #'0,r1 float fpul,fr2 fsub fr2,fr0 ; now, for r2 loops, print next digit 1: cmp/pl r2 bf 1f fmul fr1,fr0 ftrc fr0,fpul sts fpul,r1 float fpul,fr2 bsr putchar add #'0,r1 fsub fr2,fr0 bra 1b add #-1,r2 1: ; print as many digits as necessary to reach 0 ; print a . before the first one, if there are any mov #'.,r1 SETS.L #0f0,r0 lds r0,fpul 1: ; Invariants at this point: ; - fpul contains integer part to be subtracted from fr0 ; - r1 contains next character to print ; - loop if fr0 != 0 at this point fldi0 fr2 fcmp/eq fr0,fr2 bt 2f float fpul,fr2 fsub fr2,fr0 fmul fr1,fr0 bsr putchar ftrc fr0,fpul sts fpul,r1 bra 1b add #'0,r1 2: ; Done. lds.l @r15+,pr rts nop nbgetchar: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_RX_SHIFT,r0,r1 tst #SCFDR2_RX_MASK,r0 bt 1f mov.b @(SCFRDR2-SCIF_BASE,gbr),r0 extu.b r0,r1 mov.w @(SCLSR2-SCIF_BASE,gbr),r0 mov #0,r0 mov.w r0,@(SCLSR2-SCIF_BASE,gbr) rts mov r1,r0 1: rts mov #-1,r0 SETCONST ; Not sure we actually need to align the VBR; the only reason I ; have to suspect we might is that it's the kind of thing I've ; seen relatively often before - interrupt/trap vector tables ; often need to be aligned, not infrequently to a remarkably ; strict boundary. I see no indication in the manuals that ; the SH requires _any_ alignment, but it's easy to do and ; definitely won't hurt anything. (No explicit indication, ; that is. It is implicit in the execution of code at ; VBR+0x100, VBR+0x400, and VBR+0x600 that VBR must be even.) .align 0x10000 ; Exception handling consists of: ; - Save PC and SR in SPC and SSR ; - Set SR bit BL to 1 (block exceptions/interrupts) ; - Set SR bit MD to 1 (privileged mode) ; - Set SR bit RB to 1 (r0-r7 bank 1) ; - Write code to EXPEVT or INTEVT ; - Set PC to vector addr, resume execution intvec = . . = intvec + 0x100 SETS.L #0x100,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x400 SETS.L #0x400,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x600 SETS.L #0x600,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x1000 crash_msg_0: .asciz (13,10,10)"FATAL TRAP"(13,10)"R0 " crash_msg_1: .asciz " R1 " crash_msg_2: .asciz " R2 " crash_msg_3: .asciz " R3 " crash_msg_4: .asciz (13,10)"R4 " crash_msg_5: .asciz " R5 " crash_msg_6: .asciz " R6 " crash_msg_7: .asciz " R7 " crash_msg_8: .asciz (13,10)"R8 " crash_msg_9: .asciz " R9 " crash_msg_10: .asciz " R10 " crash_msg_11: .asciz " R11 " crash_msg_12: .asciz (13,10)"R12 " crash_msg_13: .asciz " R13 " crash_msg_14: .asciz " R14 " crash_msg_15: .asciz " R15 " crash_msg_gbr: .asciz (13,10)"GBR " crash_msg_sr: .asciz " SR " crash_msg_pc: .asciz " PC " crash_msg_mach: .asciz (13,10)"MACH" crash_msg_macl: .asciz " MACL" crash_msg_pr: .asciz " PR " crash_msg_vec: .asciz (13,10)"vector" crash_msg_expevt: .asciz " EXPEVT" crash_msg_intevt: .asciz " INTEVT" crash_msg_done: .asciz (13,10) crash_msg_equal: .asciz " = " .align 4 crash_msgs: .long crash_msg_0 .long crash_msg_1 .long crash_msg_2 .long crash_msg_3 .long crash_msg_4 .long crash_msg_5 .long crash_msg_6 .long crash_msg_7 .long crash_msg_8 .long crash_msg_9 .long crash_msg_10 .long crash_msg_11 .long crash_msg_12 .long crash_msg_13 .long crash_msg_14 .long crash_msg_15 .long crash_msg_gbr .long crash_msg_sr .long crash_msg_pc .long crash_msg_mach .long crash_msg_macl .long crash_msg_pr .long crash_msg_vec .long crash_msg_expevt .long crash_msg_intevt .long 0 .align 2 regdump: mov r15,r5 SETS.L #intstacktop,r15 mov.l r4,@-r15 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 sts.l macl,@-r15 sts.l mach,@-r15 stc.l spc,@-r15 stc.l ssr,@-r15 stc.l gbr,@-r15 mov.l r5,@-r15 mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 mov.l r9,@-r15 mov.l r8,@-r15 stc.l r7_bank,@-r15 stc.l r6_bank,@-r15 stc.l r5_bank,@-r15 stc.l r4_bank,@-r15 stc.l r3_bank,@-r15 stc.l r2_bank,@-r15 stc.l r1_bank,@-r15 stc.l r0_bank,@-r15 SETS.L #SCIF_BASE,r14 SETS.L #crash_msgs,r9 SETS.L #putstr,r8 SETS.L #printhex8,r7 SETS.L #putchar,r6 1: mov.l @r9+,r1 tst r1,r1 bt 1f jsr @r8 nop SETS.L #crash_msg_equal,r1 jsr @r8 nop jsr @r7 mov.l @r15+,r1 bra 1b nop 1: SETS.L #crash_msg_done,r1 jsr @r8 nop jsr @r6 mov #0,r1 SETS.L #0xa0000000,r0 ; hard-reset vector jmp @r0 nop SETCONST .align 4 .space 0x1000 intstacktop = .