; This is designed to be serial-line downloaded to cdcode. ; ; Our memory map: ; ; [8c000000,8c010000) Stack (r15 set by cdcode) ; [8c010000,8c01????) cdcode ; [8c020000,8c02????) Us .include "regs.s" .include "ta-cmds.s" .include "maple-bits.s" VRAM_BASE_32 = 0xa5000000 VRAM_BASE_64 = 0xa4000000 VRAM_SIZE = 8 << 20 STOREQ_BASE = 0xe0000000 VIDREG_BASE = 0xa05f0000 FRAME_X = 640 FRAME_Y = 480 VBLANK_REG = VIDREG_BASE + 0x6900 VBLANK_VBIT = 0x08 DISPLAY_VRAM = VIDREG_BASE + 0x8050 SHORT_FRAME_OFFSET = FRAME_X*2 ; FRAME_X pixels at two bytes each COT_FOVY = 0f1.73 ; cot(FOVy/2), field-of-view angle figure ZNEAR = 0f1 ZFAR = 0f100 DISTANCE = 0f15 BUTTON_FACTOR = 0f3 ;BIT_YMIN = 0f40 ;BIT_YMAX = 0f52 ; Layout of the stuff we keep in video RAM. We double-buffer, so there ; are two of most of these. _a and _b suffixes indicate the pairs. ; ; Unfortunately the rendering and video hardware aren't capable of ; making distinctions equivalent to the difference between the CPU's ; a4xxxxxx and a5xxxxxx views of video RAM (textures always come from ; a4xxxxxx, or, to be more precise, always access video RAM in a way ; compatible with the CPU's a4xxxxxx view, whereas everything else ; comes from a5xxxxxx). So we're stuck jigsawing together a4xxxxxx ; allocations for textures and a5xxxxxx allocations for other stuff. . = VRAM_BASE_64 ; Textures. We have only one texture, used for maze walls, ; which we use with three different palettes depending on the ; orientation of the wall in question. It's an 8x8 texture, ; occupying 64 bytes. texture__base = . texture_wall: .space 64 texture__end = . . = VRAM_BASE_32 + [[texture__end - texture__base] / 2] .align 8 ; Space to render into. Each field takes up FRAME_X*FRAME_Y ; pixels at two bytes per pixel. (If it's displayed ; interlaced, this is handled with the display hardware; in ; memory it's totally non-interlaced.) render_buf_size = FRAME_X * FRAME_Y * 2 render_buf_a: .space render_buf_size render_buf_b: .space render_buf_size ; Tile descriptors. There is one of these, at 6 longs, per ; tile; there is also a 24-long header. Each tile is 32x32 ; pixels. So for a 640x480 screen, we need ; 24+(6*(640/32)*(480/32)) longs of space. (I don't know what ; happens if the screen width or height is not a multiple of ; 32.) Each tile also uses 64 bytes of buffer space. ta_buffers_size_cmd_list = 512 * 1024 ta_buffers_size_tile_buffer = 64 * [FRAME_X/32] * [FRAME_Y/32] ta_buffers_size_tile_descriptor = 4 * [24 + [6 * [FRAME_X/32] * [FRAME_Y/32]]] . = VRAM_BASE_32 + 0x00400000 ta_buffers_cmd_list_a: .space ta_buffers_size_cmd_list ta_buffers_cmd_list_b: .space ta_buffers_size_cmd_list ta_buffers_tile_buffer_a: .space ta_buffers_size_tile_buffer ta_buffers_tile_buffer_b: .space ta_buffers_size_tile_buffer ta_buffers_tile_descriptor_a: .space ta_buffers_size_tile_descriptor ta_buffers_tile_descriptor_b: .space ta_buffers_size_tile_descriptor ; End of layout of video RAM. . = 0x8c020000 .sz any .pr any SETS.L #main,r0 jmp @r0 nop SETCONST .sz 0 .pr 0 ; Our "data segment". ; The maple command and response buffers. The hardware ; requires these be aligned on 32-byte boundaries. .align 32 maple_cmd: .long XDESC_LAST | [0 << XDESC_PORTSHIFT] | [1 << XDESC_LENSHIFT] .long maple_resp & DMA_ADDRMASK MapleFrame CMD_GETCOND, 0, ADDR_MAIN, 0, 0, 1 .long @BSL[FUNC_CONTROLLER] .align 32 ; 1024 is the largest the hardware supports, so it's a safe ; limit. (The amount actually used is usually fairly small.) maple_resp: .space 1024 ; The base matrix (composition of screenview, projection, and ; translation). .align 8 base_matrix: .space 16*4 ; The current and previous controller input state. The ; patterns we initialize this to are what the controller sends ; when it's not being touched. .align 4 curistate: .long 0x0000ffff, 0x80808080 previstate: .long 0x0000ffff, 0x80808080 ; Stack pointer for abrupt return to cdcode. .align 4 throw_sp: .space 4 ; RNG state. .align 4 RNG_STATE_WORDS = 64 rng_state: .space 4 * RNG_STATE_WORDS rng_hand: .space 1 ; The maze itself. MZX = 3 MZY = 3 MZZ = 3 MZXY = MZX * MZY MZWALLS = [MZX * MZY * MZZ * 3] - [MZX * MZY] - [MZY * MZZ] - [MZZ * MZX] MZCELLS = MZX * MZY * MZZ MZMAX = MZX .if MZY > MZMAX MZMAX = MZY .endif .if MZZ > MZMAX MZMAX = MZZ .endif MZC_PX = 0x0001 MZC_PY = 0x0002 MZC_PZ = 0x0004 MZC_S = 0x0008 MZC_MX = 0x0010 MZC_MY = 0x0020 MZC_MZ = 0x0040 MZC_G = 0x0080 MZC_P = 0x0100 .align 2 maze: .space 2 * MZCELLS .align 4 mwalls: .space 4 * MZWALLS .align 2 mcells: .space 2 * MZCELLS ; Cookies to pass to the hardware (void *tiles[2] in tatest) .align 4 tiledesc_cookies: .space 2*4 ; Tile buffers (the 64-bytes-per-tile work space) .align 4 tilebuffers: .long ta_buffers_tile_buffer_a .long ta_buffers_tile_buffer_b ; Tile descriptors (the spaces in which the descriptors are ; built) .align 4 tiledescs: .long ta_buffers_tile_descriptor_a .long ta_buffers_tile_descriptor_b ; Camera location and orientation. Location is (X,Y,Z); ; orientation is stored in the form of each of the world axes ; as a normalized vector in the camera's coordinate system. .align 4 eye_loc: .long 0f0.5, 0f0.5, 0f0.5 eye_x: .long 0f1, 0f0, 0f0 eye_y: .long 0f0, 0f1, 0f0 eye_z: .long 0f0, 0f0, 0f1 ; Scene corner coordinates. .align 4 vertex_coords: .macro vtx_x y, z .long 0f0, $(y), $(z) .long 0f1, $(y), $(z) .long 0f2, $(y), $(z) .long 0f3, $(y), $(z) .endm .macro vtx_y z vtx_x 0f0, $(z) vtx_x 0f1, $(z) vtx_x 0f2, $(z) vtx_x 0f3, $(z) .endm vtx_y 0f0 vtx_y 0f1 vtx_y 0f2 vtx_y 0f3 n_vertex_coords = [. - vertex_coords] / [3*4] xform_coords: .space n_vertex_coords*3*4 ; Texture twiddling table. Why "twiddle"? That's the term ; used in tatest's comments. It appears to be interleaving ; the bits of the numbers that form texture coordinates, so ; that the texels conceptually at (x,y) and (x+1,y), where ; x=ABCDEFG0 and y=abcdefgh (say), are stored at offsets ; aAbBcCdDeEfFgGh0 (x) and aAbBcCdDeEfFgGh1 (x+1). ; ; Why do it? Because, in the words of another tatest comment, ; "palette based textures can not be non-twiddled". Why ; design hardware that way? MC, in email, passed along an ; explanation from someone who worked on the hardware, saying ; that twiddled textures provide higher performance, so the ; designers figured the only reason to use non-twiddled ; textures was to use a rendered frame as a texture (for, eg, ; reflections). Since the renderer output is always ; true-colour, that's all they implemented. (The ; "non-twiddled" bit got reused for a different meaning for ; palette-based textures.) ; ; tatest generates a 1024-entry table. We reserve (and set up) ; that much space, but as of this writing use only 256 entries ; of it. ; ; One possible note to beware of is that this may not apply to ; the large dimension of non-square textures. Done naïvely, ; doing this for non-square textures could use excessive ; amounts of memory; it would appear, for example, that an ; 8x256 texture would take up almost as much memory space as a ; 128x256 one (because of all the gaps between the address ; bits). But it may be smarter than that; when I mentioned ; that in mail to MC, he said he had a fuzzy memory that the ; high bits of non-square textures aren't twiddled, that, eg, ; an 8x256 texture in memory consists of 32 consecutive 8x8 ; (twiddled) blocks. But he also warned that memory could be ; wrong, so test this before depending on it. ; .align 2 twiddles: .space 1024*2 ; Current double-buffering buffer number. Always 0 or 1. curbuf: .space 1 ; When set, this causes printing of debugging info, but for ; only one cycle; it's cleared when the info is printed. debug: .byte 0 ; Our "text segment". ; These palettes are straight from tatest; I've just ; reformatted them from C to assembly. It doesn't say where, ; if anywhere, they came from. They're small enough I haven't ; bothered trying to compress them. .align 4 palette_0: .long 0xff000000,0xff3c3c3c,0xff413c3c,0xff493c3c,0xff4d3838,0xff553838,0xff593434,0xff613434 .long 0xff653030,0xff6d3030,0xff712c2c,0xff792c2c,0xff822828,0xff862828,0xff8e2424,0xff922424 .long 0xff9a2020,0xff9e2020,0xffa61c1c,0xffaa1c1c,0xffb21818,0xffb61818,0xffbe1414,0xffc71414 .long 0xffcb1010,0xffd31010,0xffd70c0c,0xffdf0c0c,0xffe30808,0xffeb0808,0xffef0404,0xfff70404 .long 0xffff0000,0xffff0400,0xffff0c00,0xffff1400,0xffff1c00,0xffff2400,0xffff2c00,0xffff3400 .long 0xffff3c00,0xffff4500,0xffff4d00,0xffff5500,0xffff5d00,0xffff6500,0xffff6d00,0xffff7500 .long 0xffff7d00,0xffff8600,0xffff8e00,0xffff9600,0xffff9e00,0xffffa600,0xffffae00,0xffffb600 .long 0xffffbe00,0xffffc700,0xffffcf00,0xffffd700,0xffffdf00,0xffffe700,0xffffef00,0xfffff700 .long 0xffffff00,0xffffff04,0xffffff0c,0xffffff14,0xffffff1c,0xffffff24,0xffffff2c,0xffffff34 .long 0xffffff3c,0xffffff45,0xffffff4d,0xffffff55,0xffffff5d,0xffffff65,0xffffff6d,0xffffff75 .long 0xffffff7d,0xffffff86,0xffffff8e,0xffffff96,0xffffff9e,0xffffffa6,0xffffffae,0xffffffb6 .long 0xffffffbe,0xffffffc7,0xffffffcf,0xffffffd7,0xffffffdf,0xffffffe7,0xffffffef,0xfffffff7 .long 0xffffffff,0xffffffff,0xfffffbfb,0xfffffbf7,0xfffff7f3,0xfffff7ef,0xfffff3eb,0xfffff3e7 .long 0xffffefe3,0xffffefdf,0xffffebdb,0xffffebd7,0xffffe7d3,0xffffe7cf,0xffffe3cb,0xffffe3c7 .long 0xffffdfc3,0xffffdfbe,0xffffdbba,0xffffdbb6,0xffffd7b2,0xffffd7ae,0xffffd3aa,0xffffd3a6 .long 0xffffcfa2,0xffffcf9e,0xffffcb9a,0xffffcb96,0xffffc792,0xffffc78e,0xffffc38a,0xffffc386 .long 0xffffbe82,0xffffba7d,0xffffba79,0xffffb675,0xffffb671,0xffffb26d,0xffffb269,0xffffae65 .long 0xffffae61,0xffffaa5d,0xffffaa59,0xffffa655,0xffffa651,0xffffa24d,0xffffa249,0xffff9e45 .long 0xffff9e41,0xffff9a3c,0xffff9a38,0xffff9634,0xffff9630,0xffff922c,0xffff9228,0xffff8e24 .long 0xffff8e20,0xffff8a1c,0xffff8a18,0xffff8614,0xffff8610,0xffff820c,0xffff8208,0xffff7d04 .long 0xffff7900,0xffff7900,0xffff7500,0xffff7100,0xffff6d00,0xffff6900,0xffff6500,0xffff6100 .long 0xffff5d00,0xffff5900,0xffff5500,0xffff5100,0xffff4d00,0xffff4900,0xffff4500,0xffff4100 .long 0xffff3c00,0xffff3c00,0xffff3800,0xffff3400,0xffff3000,0xffff2c00,0xffff2800,0xffff2400 .long 0xffff2000,0xffff1c00,0xffff1800,0xffff1400,0xffff1000,0xffff0c00,0xffff0800,0xffff0400 .long 0xffff0000,0xffff0000,0xfffb0000,0xfff70000,0xfff70000,0xfff30000,0xffef0000,0xffeb0000 .long 0xffeb0000,0xffe70000,0xffe30000,0xffe30000,0xffdf0000,0xffdb0000,0xffd70000,0xffd70000 .long 0xffd30000,0xffcf0000,0xffcf0000,0xffcb0000,0xffc70000,0xffc30000,0xffc30000,0xffbe0000 .long 0xffba0000,0xffba0000,0xffb60000,0xffb20000,0xffae0000,0xffae0000,0xffaa0000,0xffa60000 .long 0xffa20000,0xffa20000,0xff9e0404,0xff9a0404,0xff960808,0xff920808,0xff8e0c0c,0xff8e0c0c .long 0xff8a1010,0xff861010,0xff821414,0xff7d1414,0xff791818,0xff791818,0xff751c1c,0xff711c1c .long 0xff6d2020,0xff692020,0xff652424,0xff652424,0xff612828,0xff5d2828,0xff592c2c,0xff552c2c .long 0xff513030,0xff513030,0xff4d3434,0xff493434,0xff453838,0xff413838,0xff3c3c3c,0xff3c3c3c palette_1: .long 0xff000000,0xff000000,0xff000004,0xff00000c,0xff000010,0xff000018,0xff000020,0xff000024 .long 0xff00002c,0xff000030,0xff000038,0xff000041,0xff000045,0xff00004d,0xff000051,0xff000059 .long 0xff000061,0xff000065,0xff00006d,0xff000075,0xff000079,0xff000082,0xff000086,0xff00008e .long 0xff000096,0xff00009a,0xff0000a2,0xff0000a6,0xff0000ae,0xff0000b6,0xff0000ba,0xff0000c3 .long 0xff0000cb,0xff0004cb,0xff000ccb,0xff0010cf,0xff0018cf,0xff001cd3,0xff0024d3,0xff0028d3 .long 0xff0030d7,0xff0038d7,0xff003cdb,0xff0045db,0xff0049db,0xff0051df,0xff0055df,0xff005de3 .long 0xff0065e3,0xff0069e3,0xff0071e7,0xff0075e7,0xff007deb,0xff0082eb,0xff008aeb,0xff008eef .long 0xff0096ef,0xff009ef3,0xff00a2f3,0xff00aaf3,0xff00aef7,0xff00b6f7,0xff00bafb,0xff00c3fb .long 0xff00cbff,0xff04cbff,0xff0ccbff,0xff14cfff,0xff1ccfff,0xff24d3ff,0xff2cd3ff,0xff34d3ff .long 0xff3cd7ff,0xff45d7ff,0xff4ddbff,0xff55dbff,0xff5ddbff,0xff65dfff,0xff6ddfff,0xff75e3ff .long 0xff7de3ff,0xff86e3ff,0xff8ee7ff,0xff96e7ff,0xff9eebff,0xffa6ebff,0xffaeebff,0xffb6efff .long 0xffbeefff,0xffc7f3ff,0xffcff3ff,0xffd7f3ff,0xffdff7ff,0xffe7f7ff,0xffeffbff,0xfff7fbff .long 0xffffffff,0xfffbffff,0xfff7ffff,0xfff3ffff,0xffebffff,0xffe7ffff,0xffe3ffff,0xffdbffff .long 0xffd7ffff,0xffd3ffff,0xffcbffff,0xffc7ffff,0xffc3ffff,0xffbaffff,0xffb6ffff,0xffb2ffff .long 0xffaaffff,0xffa6ffff,0xffa2ffff,0xff9effff,0xff96ffff,0xff92ffff,0xff8effff,0xff86ffff .long 0xff82ffff,0xff7dffff,0xff75ffff,0xff71ffff,0xff6dffff,0xff65ffff,0xff61ffff,0xff5dffff .long 0xff55ffff,0xff51ffff,0xff4dffff,0xff49ffff,0xff41ffff,0xff3cffff,0xff38ffff,0xff30ffff .long 0xff2cffff,0xff28ffff,0xff20ffff,0xff1cffff,0xff18ffff,0xff10ffff,0xff0cffff,0xff08ffff .long 0xff00ffff,0xff00fbff,0xff00f7ff,0xff00f3ff,0xff00ebff,0xff00e7ff,0xff00e3ff,0xff00dbff .long 0xff00d7ff,0xff00d3ff,0xff00cbff,0xff00c7ff,0xff00c3ff,0xff00baff,0xff00b6ff,0xff00b2ff .long 0xff00aaff,0xff00a6ff,0xff00a2ff,0xff009eff,0xff0096ff,0xff0092ff,0xff008eff,0xff0086ff .long 0xff0082ff,0xff007dff,0xff0075ff,0xff0071ff,0xff006dff,0xff0065ff,0xff0061ff,0xff005dff .long 0xff0055ff,0xff0051ff,0xff004dff,0xff0049ff,0xff0041ff,0xff003cff,0xff0038ff,0xff0030ff .long 0xff002cff,0xff0028ff,0xff0020ff,0xff001cff,0xff0018ff,0xff0010ff,0xff000cff,0xff0008ff .long 0xff0000ff,0xff0000fb,0xff0000f7,0xff0000f3,0xff0000ef,0xff0000eb,0xff0000e7,0xff0000e3 .long 0xff0000df,0xff0000db,0xff0000d7,0xff0000d3,0xff0000cf,0xff0000cb,0xff0000c7,0xff0000c3 .long 0xff0000be,0xff0000ba,0xff0000b6,0xff0000b2,0xff0000ae,0xff0000aa,0xff0000a6,0xff0000a2 .long 0xff00009e,0xff00009a,0xff000096,0xff000092,0xff00008e,0xff00008a,0xff000086,0xff000082 .long 0xff00007d,0xff000079,0xff000075,0xff000071,0xff00006d,0xff000069,0xff000065,0xff000061 .long 0xff00005d,0xff000059,0xff000055,0xff000051,0xff00004d,0xff000049,0xff000045,0xff000041 .long 0xff00003c,0xff000038,0xff000034,0xff000030,0xff00002c,0xff000028,0xff000024,0xff000020 .long 0xff00001c,0xff000018,0xff000014,0xff000010,0xff00000c,0xff000008,0xff000000,0xff000000 palette_2: .long 0xff000000,0xff9208e7,0xff9208e3,0xff9608e3,0xff9a04df,0xff9e04df,0xff9e04db,0xffa204db .long 0xffa600d7,0xffaa00d7,0xffaa00d3,0xffae00cf,0xffb200cf,0xffb600cb,0xffb600c7,0xffba00c7 .long 0xffbe00c3,0xffbe00be,0xffc300be,0xffc700ba,0xffc700b6,0xffcb00b6,0xffcf00b2,0xffcf00ae .long 0xffd300aa,0xffd700aa,0xffd700a6,0xffdb04a2,0xffdb049e,0xffdf049e,0xffdf049a,0xffe30896 .long 0xffe30892,0xffe70892,0xffe7088e,0xffeb0c8a,0xffeb0c86,0xffef0c82,0xffef1082,0xffef107d .long 0xfff31479,0xfff31475,0xfff31475,0xfff71871,0xfff7186d,0xfff71c69,0xfffb1c65,0xfffb2065 .long 0xfffb2061,0xfffb245d,0xffff2859,0xffff2859,0xffff2c55,0xffff2c51,0xffff304d,0xffff344d .long 0xffff3449,0xffff3845,0xffff3c45,0xffff3c41,0xffff413c,0xffff453c,0xffff4538,0xffff4934 .long 0xffff4d34,0xffff4d30,0xffff512c,0xffff552c,0xffff5928,0xffff5928,0xfffb5d24,0xfffb6120 .long 0xfffb6520,0xfffb651c,0xfff7691c,0xfff76d18,0xfff77118,0xfff37514,0xfff37514,0xfff37914 .long 0xffef7d10,0xffef8210,0xffef820c,0xffeb860c,0xffeb8a0c,0xffe78e08,0xffe79208,0xffe39208 .long 0xffe39608,0xffdf9a04,0xffdf9e04,0xffdb9e04,0xffdba204,0xffd7a600,0xffd7aa00,0xffd3aa00 .long 0xffcfae00,0xffcfb200,0xffcbb600,0xffc7b600,0xffc7ba00,0xffc3be00,0xffbebe00,0xffbec300 .long 0xffbac700,0xffb6c700,0xffb6cb00,0xffb2cf00,0xffaecf00,0xffaad300,0xffaad700,0xffa6d700 .long 0xffa2db04,0xff9edb04,0xff9edf04,0xff9adf04,0xff96e308,0xff92e308,0xff92e708,0xff8ee708 .long 0xff8aeb0c,0xff86eb0c,0xff82ef0c,0xff82ef10,0xff7def10,0xff79f314,0xff75f314,0xff75f314 .long 0xff71f718,0xff6df718,0xff69f71c,0xff65fb1c,0xff65fb20,0xff61fb20,0xff5dfb24,0xff59ff28 .long 0xff59ff28,0xff55ff2c,0xff51ff2c,0xff4dff30,0xff4dff34,0xff49ff34,0xff45ff38,0xff45ff3c .long 0xff41ff3c,0xff3cff41,0xff3cff45,0xff38ff45,0xff34ff49,0xff34ff4d,0xff30ff4d,0xff2cff51 .long 0xff2cff55,0xff28ff59,0xff28ff59,0xff24fb5d,0xff20fb61,0xff20fb65,0xff1cfb65,0xff1cf769 .long 0xff18f76d,0xff18f771,0xff14f375,0xff14f375,0xff14f379,0xff10ef7d,0xff10ef82,0xff0cef82 .long 0xff0ceb86,0xff0ceb8a,0xff08e78e,0xff08e792,0xff08e392,0xff08e396,0xff04df9a,0xff04df9e .long 0xff04db9e,0xff04dba2,0xff00d7a6,0xff00d7aa,0xff00d3aa,0xff00cfae,0xff00cfb2,0xff00cbb6 .long 0xff00c7b6,0xff00c7ba,0xff00c3be,0xff00bebe,0xff00bec3,0xff00bac7,0xff00b6c7,0xff00b6cb .long 0xff00b2cf,0xff00aecf,0xff00aad3,0xff00aad7,0xff00a6d7,0xff04a2db,0xff049edb,0xff049edf .long 0xff049adf,0xff0896e3,0xff0892e3,0xff0892e7,0xff088ee7,0xff0c8aeb,0xff0c86eb,0xff0c82ef .long 0xff1082ef,0xff107def,0xff1479f3,0xff1475f3,0xff1475f3,0xff1871f7,0xff186df7,0xff1c69f7 .long 0xff1c65fb,0xff2065fb,0xff2061fb,0xff245dfb,0xff2859ff,0xff2859ff,0xff2c55ff,0xff2c51ff .long 0xff304dff,0xff344dff,0xff3449ff,0xff3845ff,0xff3c45ff,0xff3c41ff,0xff413cff,0xff453cff .long 0xff4538ff,0xff4934ff,0xff4d34ff,0xff4d30ff,0xff512cff,0xff552cff,0xff5928ff,0xff5928ff .long 0xff5d24fb,0xff6120fb,0xff6520fb,0xff651cfb,0xff691cf7,0xff6d18f7,0xff7118f7,0xff7514f3 .long 0xff7514f3,0xff7914f3,0xff7d10ef,0xff8210ef,0xff820cef,0xff860ceb,0xff8a0ceb,0xff8e08e7 ; Video initialization parameters. Most of these I don't ; understand; what documentation I have has been saved here as ; comments. The comment "magic" means "meaning unknown". ; ; These lists are taken pretty much directly from tatest, which ; says of them "These values mainly from Dans 3dtest ; program...". ; ; Since these are longwords stores, the offset must always be ; multiples of 4; the terminator is any value which isn't. ; (We use 1, but set_params accepts anything whose low two ; bits are nonzero.) ; .macro param offset, value .word $(offset) .long $(value) .endm .macro endparam .word 1 .endm .align 2 three_d_params: param 0x80a8, 0x15d1c951 ; magic param 0x80a0, 0x00000020 ; magic param 0x8008, 0x00000000 ; TA out of reset param 0x8048, 0x00000009 ; "alpha config" - ? param 0x8068, [FRAME_X<<16]|0 ; pixel clipping x param 0x806c, [FRAME_Y<<16]|0 ; pixel clipping y param 0x8110, 0x00093f39 ; magic param 0x8098, 0x00800408 ; magic param 0x804c, [FRAME_X*2]/8 ; "display align" - ? param 0x8078, 0f1.0 param 0x8084, 0x00000000 ; magic param 0x8030, 0x00000101 ; magic param 0x80b0, 0x007f7f7f ; fog table colour param 0x80b4, 0x007f7f7f ; fog vertex colour param 0x80c0, 0x00000000 ; colour clamp min param 0x80bc, 0xffffffff ; colour clamp max param 0x8080, 0x00000007 ; magic param 0x8074, 0x00000001 ; "cheap shadow" - ? param 0x807c, 0x0027df77 ; magic param 0x8008, 0x00000001 ; TA into reset param 0x8008, 0x00000000 ; TA out of reset param 0x80e4, 0x00000000 ; "stride width" - ? param 0x6884, 0x00000000 ; disable all interrupt enables param 0x6930, 0x00000000 param 0x6938, 0x00000000 param 0x6900, 0xffffffff ; reset all pending interrupts param 0x6908, 0xffffffff param 0x6930, 0x002807ec ; re-enable some events (which?) param 0x6938, 0x0000000e param 0x80b8, 0x0000ff07 ; fog density (meanings?) param 0x80b4, 0x007f7f7f ; fog vertex colour param 0x80b0, 0x007f7f7f ; fog table colour param 0x8108, 0x00000003 ; 32bit palette (?) endparam screen_params: param 0x80e8, 0x00160000 ; screen control (?) param 0x8044, 0x00800000 ; pixel mode ("vb+0x11" - ?) param 0x805c, 0x00000000 ; size modulo and display lines ("vb+0x17" - ?) param 0x80d0, 0x00000100 ; interlace flags (bit meanings?) param 0x80d8, 0x020c0359 ; magic param 0x80cc, 0x001501fe ; magic param 0x80d4, 0x007e0345 ; horizontal border (meaning? - see below) param 0x80dc, 0x00240204 ; vertical position (meaning?) param 0x80e0, 0x07d6c63f ; sync control (meaning?) param 0x80ec, 0x000000a4 ; horizontal position (meaning?) param 0x80f0, 0x00120012 ; vertical border (meanings?) param 0x80c8, 0x03450000 ; "set to same as border H in 80d4" - ? param 0x8068, [FRAME_X-1]<<16 ; (X resolution - 1) << 16 param 0x806c, [FRAME_Y-1]<<16 ; (Y resolution - 1) << 16 param 0x804c, 0x000000a0 ; "display align" - ? param 0x8118, 0x00008040 ; magic param 0x80f4, 0x00000401 ; "anti-aliasing" - ? param 0x8048, 0x00000009 ; "alpha config" - ? param 0x7814, 0x00000000 ; "more interrupt control stuff" - ? param 0x7834, 0x00000000 param 0x7854, 0x00000000 param 0x7874, 0x00000000 param 0x78bc, 0x4659404f param 0x8040, 0x00000000 ; border colour endparam ; "???" here means "not documented in tatest at all" ; The "2" in these is the offset from the beginning of the ; param to the place where we store the (longword) value. cmdlist_params: param 0x8008, 0x00000001 ; TA into reset param 0x8008, 0x00000000 ; TA out of reset cmdlist_param_tilebuf_a = 2 + . - cmdlist_params param 0x8124, 0 param 0x812c, 0 ; ??? cmdlist_param_cmdlist = 2 + . - cmdlist_params param 0x8128, 0 param 0x8130, 0 ; ??? param 0x813c, [[[FRAME_Y/32]-1]<<16] | [[FRAME_X/32]-1] cmdlist_param_tilebuf_b = 2 + . - cmdlist_params param 0x8164, 0 param 0x8140, 0x00100002 ; ??? param 0x8144, 0x80000000 ; confirm settings endparam .align 2 main: ; Things we need to do here: ; - Set up the FPU (including clearing SR.FD) ; - Set up the VBR (including clearing SR.BL and SR.RB) ; - Save r10-r15 against a possible return to cdcode ; Things we do not need to do because cdcode has done them: ; - Set up the SCIF (r14 is the SCIF's base address) ; - Set up a stack (r15 is cdcode's stack pointer) ; We do, though, need to put r14 in the gbr; all we use the gbr for is ; access to the SCIF. We could use other addressing modes, but the ; gbr-relative modes have 8 bits of offset, whereas the other ; register-plus-offset modes have only 4. mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 ldc r14,gbr SETS.L #0,r1 lds r1,fpscr SETS.L #intvec,r0 ldc r0,vbr stc sr,r1 SETS.L #~[SR_FD|SR_RB|SR_BL],r2 and r2,r1 ldc r1,sr ; Note that r0-r7 may have changed if we switched banks. SETS.L #throw_sp,r0 mov.l r15,@r0 ; Application code begins here. bsr init_rng nop bsr init_maze nop bsr print_maze nop bsr clear_vram nop bsr init_maple nop bsr init_powervr nop bsr init_video nop bsr init_palette nop bsr init_twiddling nop bsr init_textures nop bsr init_tiledesc nop bsr init_3dvalues nop 1: bsr one_frame nop SETS.L #nbgetchar,r0 jsr @r0 nop cmp/pz r0 bf 1b throw_out: SETS.L #throw_sp,r0 mov.l @r0,r15 mov.l @r15+,r10 mov.l @r15+,r11 mov.l @r15+,r12 mov.l @r15+,r13 mov.l @r15+,r14 jmp @r11 nop print_maze: sts.l pr,@-r15 SETS.L #maze,r8 SETS.L #MZX,r9 SETS.L #MZY,r10 SETS.L #MZZ,r11 SETS.L #0,r14 ; z 3: SETS.L #0,r13 ; y 2: SETS.L #0,r12 ; x 1: bsr putchar mov #'(,r1 bsr printdec mov r12,r1 bsr putchar mov #',,r1 bsr printdec mov r13,r1 bsr putchar mov #',,r1 bsr printdec mov r14,r1 bsr putchar mov #'),r1 mul.l r10,r14 sts macl,r0 add r13,r0 mul.l r9,r0 sts macl,r0 add r12,r0 SHLL #1,r0 mov.w @(r0,r8),r2 SHLL #25,r2 shll r2 movt r1 bsr putchar add #'0,r1 shll r2 movt r1 bsr putchar add #'0,r1 shll r2 movt r1 bsr putchar add #'0,r1 shll r2 shll r2 movt r1 bsr putchar add #'0,r1 shll r2 movt r1 bsr putchar add #'0,r1 shll r2 movt r1 bsr putchar add #'0,r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 add #1,r12 cmp/hi r12,r9 bt 1b add #1,r13 cmp/hi r13,r10 bt 2b add #1,r14 cmp/hi r14,r11 bt 3b lds.l @r15+,pr rts nop ; We assume MZC_Px can be converted to MZC_Mx by shifting by four bits. .if [[MZC_PX << 4] != MZC_MX] | [[MZC_PY << 4] != MZC_MY] | [[MZC_PZ << 4] != MZC_MZ] .error Update init_maze, or fix MZC_Px and MZC_Mx! .endif init_maze: sts.l pr,@-r15 ; Clear all maze cells and all cell clump IDs. SETS.L #MZCELLS,r1 SETS.L #0,r2 SETS.L #maze+[2*MZCELLS],r3 SETS.L #mcells+[2*MZCELLS],r4 1: dt r1 mov.w r2,@-r3 bf/s 1b mov.w r2,@-r4 ; Initialize the walls. SETS.L #mwalls,r11 SETS.L #MZWALLS+1,r10 SETS.L #MZX-1,r14 4: SETS.L #MZY-1,r13 3: SETS.L #MZZ-1,r12 2: mov r12,r2 SHLL #8,r2 or r13,r2 SHLL #8,r2 or r14,r2 SHLL #8,r2 tst r12,r12 bt 1f bsr 9f mov #MZC_MZ,r3 1: tst r13,r13 bt 1f bsr 9f mov #MZC_MY,r3 1: tst r14,r14 bt 1f bsr 9f mov #MZC_MX,r3 1: add #-1,r12 cmp/pz r12 bt 2b add #-1,r13 cmp/pz r13 bt 3b add #-1,r14 cmp/pz r14 bt 4b dt r10 bt 1f 8: bsr panic nop 9: dt r10 bt/s 8b or r2,r3 mov.l r3,@r11 rts add #4,r11 1: SETS.L #1,r14 ; next clump ID SETS.L #MZCELLS-1,r13 ; highest non-1 clump ID SETS.L #mcells,r12 SETS.L #MZWALLS,r11 ; number of walls remaining SETS.L #mwalls,r10 SETS.L #maze,r9 6: 1: mov r13,r0 SHLL #1,r0 mov.w @(r0,r12),r1 dt r1 bf 1f add #-1,r13 cmp/pz r13 bt 1b ; No cells with non-1 clump IDs - all done! lds.l @r15+,pr rts nop 2: bsr panic nop 1: cmp/pl r11 bf 2b bsr random_mod mov r11,r0 mov r0,r8 ; random index to look at SHLL #2,r0 mov.l @(r0,r10),r1 ; wall in question add #-1,r11 cmp/hi r8,r11 bf 1f mov r11,r2 SHLL #2,r2 add r10,r2 mov.l @r2,r3 mov.l r3,@(r0,r10) 1: mov r1,r4 SHLR #8,r4 mov r4,r3 SHLR #8,r3 mov r3,r2 SHLR #8,r2 SETS.L #MZY,r5 mul.l r2,r5 sts macl,r6 extu.b r3,r3 add r3,r6 .if MZX != MZY SETS.L #MZX,r5 .endif mul.l r6,r5 sts macl,r6 extu.b r4,r4 add r4,r6 ; linear index of maze cell ; linear index of cell to look at is in r6 ; walls element is in r1 mov r6,r7 extu.b r1,r2 mov r2,r0 cmp/eq #MZC_MX,r0 bf 1f bra 2f add #-1,r7 1: cmp/eq #MZC_MY,r0 bf 1f bra 2f add #-MZX,r7 1: cmp/eq #MZC_MZ,r0 bf 1f SETS.L #MZX*MZY,r0 bra 2f sub r0,r7 1: bsr panic nop 2: ; randomly-chosen cell index is in r6 ; adjacent cell's index is in r7 ; walls element is in r1 ; bit is in r2 mov r6,r0 SHLL #1,r0 mov.w @(r0,r12),r3 ; chosen cell clump ID mov r7,r0 SHLL #1,r0 mov.w @(r0,r12),r4 ; adjacent cell clump ID tst r3,r3 bt 1f tst r4,r4 bt 2f cmp/eq r3,r4 bt 9f ; both cells are the same clump; do nothing ; opening between two different clumps ; renumber the greater to match the lesser cmp/hi r3,r4 bt 3f xor r3,r4 xor r4,r3 xor r3,r4 3: mov r3,r1 SETS.L #MZCELLS,r8 mov r12,r5 5: mov.w @r5,r0 cmp/eq r0,r4 bf 4f mov.w r3,@r5 4: dt r8 bf/s 5b add #2,r5 bra 8f nop 2: ; opening into new space - just store new clump ID bra 8f mov.w r3,@(r0,r12) 1: tst r4,r4 bt 2f ; opening from new space - just store new clump ID mov r6,r0 SHLL #1,r0 bra 8f mov.w r4,@(r0,r12) 2: ; opening between two hitherto untouched cells - new clump mov.w r14,@(r0,r12) mov r6,r0 SHLL #1,r0 mov.w r14,@(r0,r12) add #1,r14 8: ; bookkeeping done; just open wall mov r6,r0 SHLL #1,r0 mov.w @(r0,r9),r1 or r2,r1 mov.w r1,@(r0,r9) ; This shift assumes that MZC_Mx can be converted into MZC_Px ; a right shift of four bits. SHLR #4,r2 mov r7,r0 SHLL #1,r0 mov.w @(r0,r9),r1 or r2,r1 mov.w r1,@(r0,r9) 9: ; all done, including any opening bra 6b nop SETCONST ; Returns a random integer modulo r0. ; Uses r1, r2, r3, r4, dr0, dr1, fpul, macl. ; Result is returned in r0. random_mod: lds r0,fpul sts fpscr,r1 mov r1,r2 SETS.L #FPSCR_PR,r0 or r0,r1 lds r1,fpscr .pr 1 float fpul,dr0 SETS.L #rng_hand,r1 mov.b @r1,r4 SETS.L #rng_state,r3 mov r4,r0 SHLL #2,r0 mov.l @(r0,r3),r0 cmp/pl r4 bt 1f bsr stir_rng_preserving nop bra 2f nop 1: add #-1,r4 mov.b r4,@r1 2: SHLR #1,r0 sts fpul,r1 lds r0,fpul float fpul,dr2 fdiv dr0,dr2 ftrc dr2,fpul float fpul,dr2 fmul dr0,dr2 ftrc dr2,fpul sts fpul,r3 lds r2,fpscr .pr 0 rts sub r3,r0 ; We deliberately don't initialize rng_buf here; we stir the area first ; thing, and this way we may get some extra entropy from whatever is ; lying around in that memory. We don't need cryptographic levels of ; unguessability here. init_rng: sts.l pr,@-r15 bsr stir_rng nop bsr rng_rtc nop bsr stir_rng nop lds.l @r15+,pr rts nop rng_rtc: sts pr,r3 SETS.L #G2DRAIN_ADDR,r1 SETS.L #G2DRAIN_BIT,r2 1: mov.l @r1,r0 tst r2,r0 bf 1b SETS.L #G2RTC_BASE,r7 SETS.L #0,r1 1: SETS.L #3,r5 mov r1,r6 2: bsr read_rtc_once cmp/eq r1,r6 bf 1b dt r5 bf 2b mov r1,r4 SETS.L #0,r5 1: bsr read_rtc_once add #1,r5 cmp/eq r1,r4 bt 1b SETS.L #rng_state,r1 mov.l @r1,r0 add r4,r0 mov.l r0,@r1 mov.l @(4,r1),r0 add r5,r0 jmp @r3 mov.l r0,@(4,r1) ; Assumes G2RTC_BASE is in r7 and r0-r2 are scratch. ; Returned value is in r1. Disturbs nothing else. read_rtc_once: mov.l @r7,r1 mov.l @(4,r7),r2 SHLL #16,r1/r0 extu.w r2,r2 rts or r2,r1 stir_rng_preserving: mov.l r0,@-r15 mov.l r1,@-r15 mov.l r2,@-r15 mov.l r3,@-r15 sts.l pr,@-r15 mov.l r4,@-r15 mov.l r5,@-r15 mov.l r6,@-r15 bsr stir_rng mov.l r7,@-r15 mov.l @r15+,r7 mov.l @r15+,r6 mov.l @r15+,r5 mov.l @r15+,r4 lds.l @r15+,pr mov.l @r15+,r3 mov.l @r15+,r2 mov.l @r15+,r1 rts mov.l @r15+,r0 ; Uses r0-r7. Disturbs nothing else. stir_rng: SETS.L #0x12345678,r1 SETS.L #0x04c11db7,r2 ; edb88320 bit-reversed SETS.L #rng_state,r3 SETS.L #RNG_STATE_WORDS,r4 mov r4,r5 mov r3,r6 3: mov.l @r6+,r7 xor r7,r1 SETS.L #32,r7 2: shll r1 bf 1f xor r2,r1 1: dt r7 bf 2b dt r5 bf 3b 3: mov.l @r3,r7 mov.l r1,@r3 add r7,r1 SETS.L #8,r7 2: shll r1 bf 1f xor r2,r1 1: dt r7 bf 2b dt r4 bf/s 3b add #4,r3 .if rng_hand == rng_state+[4*RNG_STATE_WORDS] ; nothing .elif @IS_SB[rng_hand-[rng_state+[4*RNG_STATE_WORDS]]] add #rng_hand-[rng_state+[4*RNG_STATE_WORDS]],r3 .else SETS.L #rng_hand,r3 .endif SETS.L #RNG_STATE_WORDS-1,r0 rts mov.b r0,@r3 clear_vram: SETS.L #QACR0,r1 SETS.L #QACR1,r2 SETS.L #[[VRAM_BASE_64>>26]&7]<<2,r3 SETS.L #STOREQ_BASE+[4*16],r4 SETS.L #0,r5 mov.l r3,@r1 mov.l r3,@r2 SETS.L #16,r0 1: dt r0 bf/s 1b mov.l r5,@-r4 SETS.L #VRAM_SIZE/32,r1 SETS.L #[VRAM_BASE_64&0x03ffffc0]|0xe0000000,r2 1: pref @r2 dt r1 bf/s 1b add #32,r2 mov.l r5,@r4 add #4*16,r4 rts mov.l r5,@r4 set_params: ; r1 points to params table SETS.L #VIDREG_BASE,r2 1: mov.w @r1+,r0 tst #3,r0 bf/s 1f extu.w r0,r0 mov.w @r1+,r3 mov.w @r1+,r4 SHLL #16,r4 extu.w r3,r3 or r3,r4 add r2,r0 bra 1b mov.l r4,@r0 1: rts nop init_maple: mova 9f,r0 1: mov.l @r0+,r1 tst r1,r1 bt 1f mov.l @r0+,r2 bra 1b mov.l r2,@r1 1: rts nop .align 4 9: .long BUS_RESET, BUS_RESET_VALUE .long BUS_RESET2, BUS_RESET2_VALUE .long BUS_SPEED, SPEED_2MBPS|[50000<>1)] = ; compute_texture(i, j, 0) | (compute_texture(i, j+1, 0)<<8); ; /* Texture 1 = Julia */ ; tex[1][twiddletab[i]|(twiddletab[j]>>1)] = ; compute_texture(i, j, 1) | (compute_texture(i, j+1, 1)<<8); ; } ; ; We change some names, but it's otherwise pretty similar. We keep a ; lot of stuff on the stack rather than in registers; while we might ; have enough registers, this means I don't have to think about ; register allocation as much. It also means the texture computation ; functions have a much freer hand with registers. ; ; Arguably we should write these through 0x84000000 and then flush the ; d$, but this is initialization code and hence uncached performance ; is acceptable here. ; init_textures: sts.l pr,@-r15 ; Wall texture. SETS.L #twiddles,r7 mov.l r7,@-r15 SETS.L #texture_wall,r8 mov.l r8,@-r15 mov #0,r0 mov.l r0,@-r15 2: mov #0,r0 mov.l r0,@-r15 ; stack = x y tex twiddles 1: mov.l @r15,r1 ; x bsr compute_texture mov.l @(4,r15),r2 ; y mov.l r0,@-r15 ; valA(x,y) mov.l @(4,r15),r1 ; x mov.l @(8,r15),r2 ; y bsr compute_texture add #1,r1 ; r0=valA(x+1,y) mov.l @r15+,r1 ; valA(x,y) SHLL #8,r0 or r1,r0 ; combined vals mov.l r0,@-r15 ; stack = vals x y tex twiddles mov.l @(16,r15),r2 ; twiddles mov.l @(4,r15),r1 ; x SHLL #1,r1 add r2,r1 mov.w @r1,r1 mov.l @(8,r15),r3 ; y SHLL #1,r3 add r2,r3 mov.w @r3,r3 SHLL #1,r3 or r1,r3 ; r3 now holds twiddled texture offset mov.l @(12,r15),r2 ; tex add r3,r2 mov.l @r15,r0 ; val mov.w r0,@r2 add #4,r15 ; pop vals SETS.L #8,r1 mov.l @r15,r0 ; x add #2,r0 cmp/hs r1,r0 bf/s 1b mov.l r0,@r15 add #4,r15 ; pop x mov.l @r15,r0 ; y add #1,r0 cmp/hs r1,r0 bf/s 2b mov.l r0,@r15 add #4,r15 ; pop tex, twiddles lds.l @r15+,pr rts nop compute_texture: ; return(255&(int)(20*hypot(x-3.5,y-3.5))) SETS.L #@FLOAT[0f3.5],r0 lds r0,fpul fsts fpul,fr2 mov #20,r0 lds r0,fpul float fpul,fr3 lds r1,fpul float fpul,fr0 fsub fr2,fr0 lds r2,fpul float fpul,fr1 fsub fr2,fr1 fmul fr0,fr0 fmul fr1,fr1 fadd fr1,fr0 fsqrt fr0 fmul fr3,fr0 ftrc fr0,fpul sts fpul,r0 rts extu.b r0,r0 init_tiledesc: sts.l pr,@-r15 SETS.L #tiledesc_cookies,r4 SETS.L #tilebuffers,r5 SETS.L #tiledescs,r6 mov.l r4,@-r15 mov.l @(4,r5),r0 mov.l r0,@-r15 mov.l @(4,r6),r0 mov.l r0,@-r15 mov.l @r6,r2 bsr setup_tiledesc mov.l @r5,r3 mov.l @(8,r15),r4 mov.l r0,@r4 mov.l @r15+,r2 bsr setup_tiledesc mov.l @r15+,r3 mov.l @r15+,r4 mov.l r0,@(4,r4) SETS.L #curbuf,r1 mov #0,r0 lds.l @r15+,pr rts mov.b r0,@r1 setup_tiledesc: ; in tatest terms, this is ta_create_tile_descriptors. ptr is ; r2, buf is r3, w is FRAME_X/32, and h is FRAME_Y/32. No ; registers r0-r9 are important upon return; they all are ; available to us. ; vr = ptr mov r2,r4 ; vr is r4 ; bf = ((unsigned int)buf)&0x007fffff (buf is dead after this) SETS.L #0x007fffff,r0 and r0,r3 ; bf is r3 from here on ; strbase = (((unsigned int)ptr)&0x007fffff)|0x80000000 ; ptr is _not_ dead here, but 0x007fffff is. SETS.L #0x80000000,r7 and r2,r0 or r0,r7 ; strbase is r7 ; for (18 loops) *vr++ = 0 mov #18,r1 mov #0,r0 1: mov.l r0,@r4 dt r1 bf/s 1b add #4,r4 ; *vr++ = 0x10000000 ; *vr++ = 0x80000000 (five times) SETS.L #0x10000000,r1 mov.l r1,@r4 SETS.L #0x80000000,r1 mov.l r1,@(4,r4) mov.l r1,@(8,r4) mov.l r1,@(12,r4) mov.l r1,@(16,r4) mov.l r1,@(20,r4) add #24,r4 SETS.L #FRAME_X/32,r8 ; w is r8 SETS.L #FRAME_Y/32,r9 ; h is r9 ; for (x=0;x " .align 2 setup_cmd_list: ; In tatest terms, this is ta_set_target, but with args ; computed here based on curbuf rather than being passed in. sts.l pr,@-r15 SETS.L #curbuf,r1 mov.b @r1,r1 SHLL #2,r1 SETS.L #cmdlists,r2 SETS.L #tilebuffers,r3 add r1,r2 mov.l @r2,r2 add r1,r3 mov.l @r3,r3 SETS.L #0x007fffff,r4 and r4,r2 and r4,r3 swap.w r2,r4 swap.w r3,r5 SETS.L #cmdlist_params,r0 SETS.L #cmdlist_param_tilebuf_a,r1 mov.w r3,@(r0,r1) add #2,r1 mov.w r5,@(r0,r1) SETS.L #cmdlist_param_tilebuf_b,r1 mov.w r3,@(r0,r1) add #2,r1 mov.w r5,@(r0,r1) SETS.L #cmdlist_param_cmdlist,r1 mov.w r2,@(r0,r1) add #2,r1 mov.w r4,@(r0,r1) bsr set_params mov r0,r1 SETS.L #VIDREG_BASE+0x8144,r0 mov.l @r0,r0 lds.l @r15+,pr rts nop draw_scene: sts.l pr,@-r15 SETS.L #scene_faces,r9 SETS.L #n_scene_faces,r8 SETS.L #ta_cmd,r7 SETS.L #0,r6 SETS.L #0f1,r5 SETS.L #xform_coords,r4 SETS.L #3*4,r3 1: SETS.L #TA_CMD_POLYGON|TA_CMD_POLYGON_TYPE_OPAQUE|TA_CMD_POLYGON_SUBLIST|TA_CMD_POLYGON_STRIPLENGTH_2|TA_CMD_POLYGON_TEXTURED,r0 mov.l r0,@r7 ; cmd SETS.L #TA_POLYMODE1_Z_GREATER|TA_POLYMODE1_CULL_CCW,r0 mov.l r0,@(4,r7) ; mode1 SETS.L #TA_TEXTUREMODE_CLUT8,r1 mov.l @(48,r9),r0 ; palette number SHLL #TA_TEXTUREMODE_CLUTBANK8_SHIFT,r0/r2 or r0,r1 mov.l @(52,r9),r0 ; texture number SETS.L #textures,r2 SHLL #3,r0 add r0,r2 mov.l @r2,r0 ; texture pointer mov.l @(4,r2),r2 ; size bits SETS.L #cur_texture_mode,r10 mov.l @r10,r10 or r2,r10 mov.l r10,@(8,r7) ; mode2 SHXR #TA_TEXTUREMODE_ADDRESS_SHIFT,r0 SETS.L #TA_TEXTUREMODE_ADDRESS_MASK,r2 and r2,r0 or r0,r1 mov.l r1,@(12,r7) ; texture mov.l r6,@(16,r7) ; alpha mov.l r6,@(20,r7) ; red mov.l r6,@(24,r7) ; green bsr commit_ta_cmd mov.l r6,@(28,r7) ; blue SETS.L #TA_CMD_VERTEX,r1 mov.l r1,@r7 ; cmd mov.l r6,@(28,r7) ; ocolour not r6,r1 mov.l r1,@(24,r7) ; colour mov.l @r9,r1 mulu.w r1,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(4,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(8,r9),r0 bsr commit_ta_cmd mov.l r0,@(20,r7) ; v mov.l @(12,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(16,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(20,r9),r0 bsr commit_ta_cmd mov.l r0,@(20,r7) ; v mov.l @(24,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(28,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(32,r9),r0 bsr commit_ta_cmd mov.l r0,@(20,r7) ; v mov.l @(36,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l @(40,r9),r0 mov.l r0,@(16,r7) ; u mov.l @(44,r9),r0 mov.l r0,@(20,r7) ; v SETS.L #TA_CMD_VERTEX|TA_CMD_VERTEX_EOS,r1 bsr commit_ta_cmd mov.l r1,@r7 ; cmd dt r8 bf/s 1b add #56,r9 ; making this a loop saves only one instruction and adds time. mov.l r6,@r7 mov.l r6,@(4,r7) mov.l r6,@(8,r7) mov.l r6,@(12,r7) mov.l r6,@(16,r7) mov.l r6,@(20,r7) mov.l r6,@(24,r7) bsr commit_ta_cmd mov.l r6,@(28,r7) lds.l @r15+,pr rts nop commit_ta_cmd: ; In tatest terms, this is ta_commit_list(), with the argument ; always being ta_cmd. SETS.L #QACR0,r1 SETS.L #STOREQ_BASE,r14 SETS.L #[[TA_CMD_BASE>>26]&7]<<2,r13 SETS.L #ta_cmd,r12 SETS.L #8,r11 mov.l r13,@r1 mov r14,r10 1: mov.l @r12+,r0 dt r11 mov.l r0,@r14 bf/s 1b add #4,r14 rts pref @r10 handle_maple: SETS.L #BUS_STATE,r3 1: mov.l @r3,r0 tst #BUS_STATE_RUNNING,r0 bf 1b SETS.L #maple_resp,r0 ; We ocbi only one cache line, because the parts of the ; response we care about fit in a single cache line. The ; hardware's alignment requirements for maple buffers match ; cache line alignments, and we access only 8 bytes of it at ; low offsets. ; ; We arguably should ocbi the line back just before we kick off ; the maple operation rather than waiting until here. Since ; we never write to this cache line, the only difference I see ; is whether it sits around in the cache in the interim. This ; might conceivably affect something, but even if it does I ; have trouble seeing the difference being more than one cache ; line fill penalty. ocbi @r0 mov.l @(8,r0),r1 mov.l @(12,r0),r2 SETS.L #curistate,r0 mov.l r1,@r0 mov.l r2,@(4,r0) rts nop await_video: ; In tatest terms, this is everything in the main loop after ; the call to ta_commit_end(). ; ta_wait_render() SETS.L #TA_RENDER_EVENT,r1 SETS.L #TA_RENDER_BIT,r2 1: mov.l @r1,r0 tst r2,r0 bt 1b mov.l r2,@r1 ; wait_bovp() SETS.L #VBLANK_REG,r1 SETS.L #VBLANK_VBIT,r2 mov.l r2,@r1 1: mov.l @r1,r0 tst r2,r0 bt 1b rts mov.l r2,@r1 next_frame: ; Switch to the previously-rendered screen SETS.L #curbuf,r10 SETS.L #render_buf,r11 mov.b @r10,r0 SHLL #2,r0 mov.l @(r0,r11),r1 SETS.L #0x007fffff,r12 SETS.L #DISPLAY_VRAM,r3 and r12,r1 mov.l r1,@r3 SETS.L #SHORT_FRAME_OFFSET,r0 add r0,r1 mov.l r1,@(4,r3) ; Kick off rendering to the screen we just stopped displaying ; In tatest terms, this is ta_begin_render. mov.b @r10,r0 ; curbuf SETS.L #cmdlists,r1 SHLL #2,r0 SETS.L #tiledesc_cookies,r2 mov.l @(r0,r1),r1 ; cmdlist mov.l @(r0,r2),r2 ; tiles xor #4,r0 mov.l @(r0,r11),r3 ; scrn SETS.L #VIDREG_BASE+0x8138,r4 SETS.L #0x12,r5 SETS.L #0,r6 mov.l @r4,r4 SETS.L #VRAM_BASE_32,r0 or r0,r4 ; taend 1: mov.l r6,@r4 dt r5 bf/s 1b add #4,r4 add #-0x12*4,r4 ; We could use set_params here, but between the number of ; values to store and the need to break longs into two words, ; it's less pain to do it this way. ; ; Do we have to do all these in exactly this order? I suspect ; not, but, absent documentation, it's hard to tell how much ; deviation is OK. We stick strictly to tatest's order. SETS.L #VIDREG_BASE+0x802c,r5 and r12,r2 mov.l r2,@r5 ; 0xa05f802c add #0x8020-0x802c,r5 mov r1,r0 and r12,r0 mov.l r0,@r5 ; 0xa05f8020 add #0x8060-0x8020,r5 and r12,r3 mov.l r3,@r5 ; 0xa05f8060 add #0x808c-0x8060,r5 sub r1,r4 SHLL #1,r4 SETS.L #0x01000000,r0 or r4,r0 mov.l r0,@r5 ; 0xa05f808c add #0x8088-0x808c,r5 SETS.L #0x3e4cccc0,r0 ; tatest says "zclip" mov.l r0,@r5 ; 0xa05f8088 add #0x8068-0x8088,r5 SETS.L #[FRAME_X-1]<<16,r0 ; tatest calls it "clipw" mov.l r0,@r5 ; 0xa05f8068 add #0x806c-0x8068,r5 SETS.L #[FRAME_Y-1]<<16,r0 ; tatest calls it "cliph" mov.l r0,@r5 ; 0xa05f806c add #0x804c-0x806c,r5 SETS.L #[FRAME_X*2]>>3,r0 ; tatest calls it "modulo" mov.l r0,@r5 ; 0xa05f804c add #0x8048-0x804c,r5 SETS.L #TA_PIXFMT_RGB565|TA_PIXFMT_DITHER,r0 ; tatest calls it "pixfmt" mov.l r0,@r5 ; 0xa05f8048 add #0x8014-0x8048,r5 SETS.L #0xffffffff,r0 ; tatest says "Launch!" mov.l r0,@r5 ; 0xa05f8014 ; curbuf = ! curbuf mov.b @r10,r0 tst r0,r0 bt/s 1f add #1,r0 mov #0,r0 1: rts mov.b r0,@r10 SETCONST ; Rotate (fr0,fr1,fr2) by fpul fsca units around axis (fr4,fr5,fr6). ; fpul is in integer format (as required by fsca). ; The axis vector must be normalized already. ; Output in (fr0,fr1,fr2). ; Preserves fr4-fr6, fr13-fr15, fpul, all CPU registers. ; Destroys fr3, fr7-fr12. ; Let s = sin(fpul), c = cos(fpul); output in terms of input is ; ; fr0 = (fr0 * ((fr4 * fr4 * (1-c)) + c)) + A ; (fr1 * ((fr4 * fr5 * (1-c)) - (fr6 * s))) + B ; (fr2 * ((fr4 * fr6 * (1-c)) + (fr5 * s))) C ; ; fr1 = (fr0 * ((fr5 * fr4 * (1-c)) + (fr6 * s))) + D ; (fr1 * ((fr5 * fr5 * (1-c)) + c)) + E ; (fr2 * ((fr5 * fr6 * (1-c)) - (fr4 * s))) F ; ; fr2 = (fr0 * ((fr6 * fr4 * (1-c)) - (fr5 * s))) + G ; (fr1 * ((fr6 * fr5 * (1-c)) + (fr4 * s))) + H ; (fr2 * ((fr6 * fr6 * (1-c)) + c)) I rotate_around_axis: fsca fpul,fr8 ; fr8 = s, fr9 = c fldi1 fr3 fsub fr9,fr3 ; fr3 = 1-c fmov fr4,fr7 ; fr4 fmul fr4,fr7 ; fr4 * fr4 fmul fr3,fr7 ; fr4 * fr4 * (1-c) fadd fr9,fr7 ; (fr4 * fr4 * (1-c)) + c fmul fr0,fr7 ; A fmov fr4,fr10 ; fr4 fmul fr5,fr10 ; fr4 * fr5 fmul fr3,fr10 ; fr4 * fr5 * (1-c) fmov fr6,fr11 ; fr6 fmul fr8,fr11 ; fr6 * s fsub fr11,fr10 ; (fr4 * fr5 * (1-c)) - (fr6 * s) fmul fr1,fr10 ; B fadd fr10,fr7 ; A + B fmov fr4,fr12 ; fr4 fmul fr6,fr12 ; fr4 * fr6 fmul fr3,fr12 ; fr4 * fr6 * (1-c) fmov fr5,fr11 ; fr5 fmul fr8,fr11 ; fr5 * s fadd fr11,fr12 ; (fr4 * fr6 * (1-c)) + (fr5 * s) fmul fr2,fr12 ; C fadd fr7,fr12 ; output fr0 fmov fr5,fr7 ; fr5 fmul fr4,fr7 ; fr5 * fr4 fmul fr3,fr7 ; fr5 * fr4 * (1-c) fmov fr6,fr10 ; fr6 fmul fr8,fr10 ; fr6 * s fadd fr10,fr7 ; (fr5 * fr4 * (1-c)) + (fr6 * s) fmul fr0,fr7 ; D fmov fr5,fr11 ; fr5 fmul fr6,fr11 ; fr5 * fr6 fmul fr3,fr11 ; fr5 * fr6 * (1-c) fmov fr4,fr10 ; fr4 fmul fr8,fr10 ; fr4 * s ; This is our point of maximum register use. ; We have the following, all live, at this point: ; fr0,fr1,fr2 = input values ; fr3 = 1-c ; fr4,fr5,fr6,fpul = input values to be preserved ; fr7 = D ; fr8 = s ; fr9 = c ; fr10 = fr4 * s ; fr11 = fr5 * fr6 * (1-c) ; fr12 = output fr0 fsub fr10,fr11 ; (fr5 * fr6 * (1-c)) - (fr4 * s) fmul fr2,fr11 ; F fadd fr7,fr11 ; D + F fmov fr5,fr10 ; fr5 fmul fr5,fr10 ; fr5 * fr5 fmul fr3,fr10 ; fr5 * fr5 * (1-c) fadd fr9,fr10 ; (fr5 * fr5 * (1-c)) + c fmul fr1,fr10 ; E fadd fr10,fr11 ; output fr1 fmov fr6,fr7 ; fr6 fmul fr6,fr7 ; fr6 * fr6 fmul fr3,fr7 ; fr6 * fr6 * (1-c) fadd fr9,fr7 ; (fr6 * fr6 * (1-c)) + c [fr9 dead] fmul fr7,fr2 ; I [fr2 dead] fmov fr6,fr7 ; fr6 fmul fr5,fr7 ; fr6 * fr5 fmul fr3,fr7 ; fr6 * fr5 * (1-c) fmov fr4,fr10 ; fr4 fmul fr8,fr10 ; fr4 * s fadd fr10,fr7 ; (fr6 * fr5 * (1-c)) + (fr4 * s) fmul fr1,fr7 ; H [fr1 dead] fadd fr7,fr2 ; H + I fmov fr6,fr7 ; fr6 fmul fr4,fr7 ; fr6 * fr4 fmul fr3,fr7 ; fr6 * fr4 * (1-c) [fr3 dead] fmul fr5,fr8 ; fr5 * s [fr8 dead] fsub fr8,fr7 ; (fr6 * fr4 * (1-c)) - (fr5 * s) fmul fr0,fr7 ; G [fr0 dead] fadd fr7,fr2 ; output fr2 fmov fr11,fr1 ; output fr1 rts fmov fr12,fr0 ; output fr0 ; Modifies (fr0,fr1,fr2) by subtracting off the component in the ; direction of (fr8,fr9,fr10). ; (fr8,fr9,fr10) must be normalized already. ; Output in (fr0,fr1,fr2). ; Preserves fr4-fr6, fr8-fr15, fpul, all CPU registers. ; Destroys fr3, fr7. ; Output in terms of input is ; ; Let dp = (fr0 * fr8) + (fr1 * fr9) + (fr2 * fr10) ; ; fr0 = fr0 - (dp * fr8) ; fr1 = fr1 - (dp * fr9) ; fr2 = fr2 - (dp * fr10) subtract_component: fldi0 fr3 fipr fv8,fv0 fmov fr3,fr7 fmul fr8,fr7 fsub fr7,fr0 fmov fr3,fr7 fmul fr9,fr7 fsub fr7,fr1 fmov fr3,fr7 fmul fr10,fr7 rts fsub fr7,fr2 ; Normalize the vector in (fr0,fr1,fr2). ; Output in (fr0,fr1,fr2). ; Preserves fr4-fr15, fpul, all integer registers. ; Destroys fr3. normalize: fldi0 fr3 fipr fv0,fv0 fsrra fr3 fmul fr3,fr0 fmul fr3,fr1 rts fmul fr3,fr2 ; computes (fr3,fr4,fr5) × (fr0,fr1,fr2) -> (fr0,fr1,fr2) ; uses fr6 as temporary; destroys fr3/fr4/fr5 inputs too ; ( (fr4*fr2)-(fr5*fr1) , (fr5*fr0)-(fr3*fr2) , (fr3*fr1)-(fr4*fr0) ) ; A B C D E F G H I crossproduct: fmov fr0,fr6 fmul fr5,fr6 ; D fmul fr1,fr5 ; C, input fr1 now dead fmul fr3,fr1 ; G, input fr3 now dead fmul fr2,fr3 ; F, input fr2 now dead fmul fr4,fr2 ; A, input fr4 now dead fmul fr0,fr4 ; I, input fr0 and fr5 now dead fmov fr2,fr0 ; A, temporary fr2 now dead fsub fr5,fr0 ; B, A and C now dead fmov fr1,fr2 ; G, temporary fr1 now dead fsub fr4,fr2 ; H, G and I now dead fmov fr6,fr1 ; D, temporary fr6 now dead rts fsub fr3,fr1 ; E, D and F now dead ; Input value in r1 ; Destroys r0, r1 printdec01: mov.l r4,@-r15 mov.l r3,@-r15 sts.l pr,@-r15 bsr printdec mov.l r2,@-r15 mov.l @r15+,r2 lds.l @r15+,pr mov.l @r15+,r3 rts mov.l @r15+,r4 ; Input value in r1 ; Destroys r0, r1, r2, r3, r4 printdec: sts.l pr,@-r15 tst r1,r1 bf 1f lds.l @r15+,pr bra putchar mov #'0,r1 1: mov r1,r2 SETS.L #p10table,r3 1: mov.l @r3+,r4 cmp/hs r4,r2 bf 1b 2: mov #'0,r1 1: cmp/hs r4,r2 bf 1f sub r4,r2 bra 1b add #1,r1 1: bsr putchar nop mov.l @r3+,r4 tst r4,r4 bf 2b lds.l @r15+,pr rts nop .align 4 p10table: .long 1000000000 .long 100000000 .long 10000000 .long 1000000 .long 100000 .long 10000 .long 1000 .long 100 .long 10 .long 1 .long 0 .align 2 printhex8: mov #8,r0 printhexN: mov.l r4,@-r15 mov r0,r4 add #-8,r0 neg r0,r0 SHLL #2,r0 shld r0,r1 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 mova 9f,r0 mov r0,r3 mov r1,r2 1: mov r2,r0 SHLR #28,r0/r1 SHLL #4,r2 add r3,r0 bsr putchar mov.b @r0,r1 dt r4 bf 1b lds.l @r15+,pr mov.l @r15+,r2 mov.l @r15+,r3 rts mov.l @r15+,r4 .align 4 9: .ascii "0123456789abcdef" .align 2 putchar: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov r1,r0 mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 tst #SCFDR2_TX_MASK,r0 bf 1b rts nop putstr: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov.b @r1+,r0 tst r0,r0 bt 1f bra 1b mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: ; don't bother waiting for drain here; we do a putchar call, ; which will drain everything, after all putstr calls and ; before anything for which it matters. rts nop print_float: ; float in r1 ; uses r0, r1, r2, fr0, fr1, fr2, fpul sts.l pr,@-r15 ; check for negative; if so, print - and negate lds r1,fpul fsts fpul,fr0 fldi0 fr1 fcmp/gt fr0,fr1 bf 1f bsr putchar mov #'-,r1 fneg fr0 1: ; divide by 10 until it's less than 10, and keep count mov #10,r0 lds r0,fpul float fpul,fr1 mov #0,r2 1: fcmp/gt fr0,fr1 bt 1f fdiv fr1,fr0 bra 1b add #1,r2 1: ; now fr0 < 10 and r2 is the number of divisions we did ; print the first (possibly only) digit before the . ftrc fr0,fpul sts fpul,r1 bsr putchar add #'0,r1 float fpul,fr2 fsub fr2,fr0 ; now, for r2 loops, print next digit 1: cmp/pl r2 bf 1f fmul fr1,fr0 ftrc fr0,fpul sts fpul,r1 float fpul,fr2 bsr putchar add #'0,r1 fsub fr2,fr0 bra 1b add #-1,r2 1: ; print as many digits as necessary to reach 0 ; print a . before the first one, if there are any mov #'.,r1 SETS.L #0f0,r0 lds r0,fpul 1: ; Invariants at this point: ; - fpul contains integer part to be subtracted from fr0 ; - r1 contains next character to print ; - loop if fr0 != 0 at this point fldi0 fr2 fcmp/eq fr0,fr2 bt 2f float fpul,fr2 fsub fr2,fr0 fmul fr1,fr0 bsr putchar ftrc fr0,fpul sts fpul,r1 bra 1b add #'0,r1 2: ; Done. lds.l @r15+,pr rts nop nbgetchar: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_RX_SHIFT,r0/r1 tst #SCFDR2_RX_MASK,r0 bt 1f mov.b @(SCFRDR2-SCIF_BASE,gbr),r0 extu.b r0,r1 mov.w @(SCLSR2-SCIF_BASE,gbr),r0 mov #0,r0 mov.w r0,@(SCLSR2-SCIF_BASE,gbr) rts mov r1,r0 1: rts mov #-1,r0 panic: sts.l pr,@-r15 mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 mov.l r9,@-r15 mov.l r8,@-r15 mov.l r7,@-r15 mov.l r6,@-r15 mov.l r5,@-r15 mov.l r4,@-r15 mov.l r3,@-r15 mov.l r2,@-r15 mov.l r1,@-r15 mov.l r0,@-r15 SETS.L #panic_msg,r1 bsr putstr nop SETS.L #16,r2 1: bsr printhex8 mov.l @r15+,r1 SETS.L #panic_crlf,r1 bsr putstr nop dt r2 bf 1b SETS.L #throw_sp,r0 mov.l @r0,r15 SETS.L #throw_out,r0 jmp @r0 nop panic_msg: .ascii "panic" panic_crlf: .asciz (13,10) .align 2 SETCONST .include "crash-handler.s"