; Debugging flags. Set these to 1 to turn on various debugging output. ; set_params debug_set_params = 0 ; rendering cycle kickoff debug_start_render = 0 ; texture setup debug_texture = 0 ; TA command commits debug_ta_commit = 0 ; This is designed to be serial-line downloaded to cdcode. ; ; This matters mostly in the interfaces it means we expect. In ; particular, we are not called with a bsr/jsr; we are entered with a ; jmp, and our return-to address, to the extent that we have one, is ; in r11, not pr. We also expect registers set up the way cdcode sets ; them; in particular, we expect r15 to be set to point to a stack, ; 8c010000 (or a little below that if cdcode happens to have anything ; on the stack) and r14 set to the SCIF's base address. If we return ; to cdcode, it expects those two, and r11 and r10, to be preserved. ; It doesn't mind if we trash r12/r13, but we preserve them too. ; ; Our memory map: ; ; [8c000000,8c010000) Stack (r15 set by cdcode) ; [8c010000,8c01????) cdcode ; [8c020000,8c0?????) Us ; ; Our entry point is 8c020000. We don't set an entry point here with ; .entry because then send-s would jump to us directly, and I'd rather ; do that manually. ; Throughout this file, MC, used as a name in the comments, means ; Marcus Comstedt, and tatest is a C program of his which he ; distributes as an example of a 3D rendering program. This file ; started out as tatest.s, a (manual) rewriting in assembly of tatest, ; but it's evolving in its own directions. .include "regs.s" .include "ta-cmds.s" .include "maple-bits.s" VRAM_BASE_32 = 0xa5000000 VRAM_BASE_64 = 0xa4000000 VRAM_SIZE = 8 << 20 STOREQ_BASE = 0xe0000000 VIDREG_BASE = 0xa05f0000 X_SIZE = 640 Y_SIZE = 480 VBLANK_REG = VIDREG_BASE + 0x6900 VBLANK_VBIT = 0x08 DISPLAY_VRAM = VIDREG_BASE + 0x8050 SHORT_FRAME_OFFSET = X_SIZE*2 ; X_SIZE pixels at two bytes each COT_FOVY = 0f1.73 ; cot(FOVy/2), field-of-view angle figure ZNEAR = 0f1 ZFAR = 0f100 ; Layout of some things in video RAM. We double-buffer, so there are ; two of most of these. _a and _b suffixes indicate the pairs. ; ; We copy tatest's layout in video RAM. This means we put rendered ; scenes at [a5000000,a512c000) and TA tile buffers, tile descriptors, ; and command lists at [a5400000,a550df00) (these values are for ; X_SIZE=640 Y_SIZE=480), with textures at a4400000+ and a4410000+, ; the latter two using the same memory as a5200000+ and a5600000+. I ; don't know how much of this is compelled by the hardware and how ; much is just how tatest happens to do it. ; Texture space. Each texture uses 256*256 bytes. Since these ; are static as far as the rendering is concerned, _a and _b ; do not indicate double-buffering but rather just two ; textures. . = VRAM_BASE_64 + 0x00400000 texture_a: .space 65536 texture_b: .space 65536 ; Space to render into. Each field takes up X_SIZE*Y_SIZE ; pixels at two bytes per pixel. (If it's displayed ; interlaced, this is handled with the display hardware; in ; memory it's totally non-interlaced.) render_buf_size = X_SIZE * Y_SIZE * 2 . = VRAM_BASE_32 render_buf_a: .space render_buf_size render_buf_b: .space render_buf_size ; Tile descriptors. There is one of these, at 6 longs, per ; tile; there is also a 24-long header. Each tile is 32x32 ; pixels. So for a 640x480 screen, we need ; 24+(6*(640/32)*(480/32)) longs of space. (I don't know what ; happens if the screen width or height is not a multiple of ; 32.) Each tile also uses 64 bytes of buffer space. ta_buffers_size_cmd_list = 512 * 1024 ta_buffers_size_tile_buffer = 64 * [X_SIZE/32] * [Y_SIZE/32] ta_buffers_size_tile_descriptor = 4 * [24 + [6 * [X_SIZE/32] * [Y_SIZE/32]]] . = VRAM_BASE_32 + 0x00400000 ta_buffers_cmd_list_a: .space ta_buffers_size_cmd_list ta_buffers_cmd_list_b: .space ta_buffers_size_cmd_list ta_buffers_tile_buffer_a: .space ta_buffers_size_tile_buffer ta_buffers_tile_buffer_b: .space ta_buffers_size_tile_buffer ta_buffers_tile_descriptor_a: .space ta_buffers_size_tile_descriptor ta_buffers_tile_descriptor_b: .space ta_buffers_size_tile_descriptor ; End of layout of video RAM. . = 0x8c020000 SETS.L #main,r0 jmp @r0 nop SETCONST ; Our "data segment". We don't really have segments the way ; the term implies. The data is here rather than at the end ; so the symbols' values are known by the time the assembler ; sees them later. This is not critical, but does produce ; slightly better code. ; ; Things here are ordered approximately by decreasing alignment ; requirement. Not essential, just avoids needless gaps. ; The maple command and response buffers. The hardware ; requires they be aligned on 32-byte boundaries. .align 32 maple_cmd: .long XDESC_LAST | [0 << XDESC_PORTSHIFT] | [1 << XDESC_LENSHIFT] .long maple_resp & DMA_ADDRMASK MapleFrame CMD_GETCOND, 0, ADDR_MAIN, 0, 0, 1 .long @BSL[FUNC_CONTROLLER] .align 32 ; 1024 is the largest the hardware supports, so it's a safe ; limit. (The amount actually used is usually fairly small.) maple_resp: .space 1024 ; The base matrix (composition of screenview, projection, and ; translation). .align 8 base_matrix: .space 16*4 ; The current and previous controller input state. The ; patterns we initialize this to are what the controller sends ; when it's not being touched. .align 4 curistate: .long 0x0000ffff, 0x80808080 previstate: .long 0x0000ffff, 0x80808080 ; Pointers to the texture memory. We have two different ; textures (each of which is used with three different ; palettes). .align 4 textures: .space 2*4 ; Pointers to the two places screens get rendered into. .align 4 render_buf: .long render_buf_a .long render_buf_b ; Cookies to pass to the hardware (void *tiles[2] in tatest) .align 4 tiledesc_cookies: .space 2*4 ; Command lists (ta_buffers cmd_list arrays in tatest; we point ; to them rather than using a struct to generate offsets) .align 4 cmdlists: .long ta_buffers_cmd_list_a .long ta_buffers_cmd_list_b ; Tile buffers (the 64-bytes-per-tile work space) .align 4 tilebuffers: .long ta_buffers_tile_buffer_a .long ta_buffers_tile_buffer_b ; Tile descriptors (this is the space in which the descriptors ; are built) .align 4 tiledescs: .long ta_buffers_tile_descriptor_a .long ta_buffers_tile_descriptor_b ; Current orientation, stored in the form of the world axes in ; eye coordinates. This is just rotations. eye_x: .long 0f1, 0f0, 0f0 eye_y: .long 0f0, 0f1, 0f0 eye_z: .long 0f0, 0f0, 0f1 ; Scene corner coordinates. .align 4 vertex_coords: .long -0f1, -0f1, -0f1 ; 0 .long -0f1, -0f1, 0f1 ; 1 .long -0f1, 0f1, 0f1 ; 2 .long -0f1, 0f1, -0f1 ; 3 .long 0f1, 0f1, -0f1 ; 4 .long 0f1, 0f1, 0f1 ; 5 .long 0f1, -0f1, 0f1 ; 6 .long 0f1, -0f1, -0f1 ; 7 .long -0f3, -0f3, -0f3 ; 8 .long 0f3, -0f3, -0f3 ; 9 .long -0f3, -0f3, -0f2 ; 10 .long 0f3, -0f3, -0f2 ; 11 .long -0f3, -0f2, -0f2 ; 12 .long 0f3, -0f2, -0f2 ; 13 .long -0f3, -0f2, -0f3 ; 14 .long 0f3, -0f2, -0f3 ; 15 n_vertex_coords = [. - vertex_coords] / [3*4] xform_coords: .space n_vertex_coords*3*4 ; Coordinate numbers of the various faces' corners, with ; palette numbers and texture numbers. ; ; Based on alignment restrictions, this should be below, with ; the .align 1 stuff. We keep it here to keep it next to the ; vertex_coords list, accepting the possible loss of a few ; bytes of space to .align. .macro face v1,v2,v3,v4,pal,tex .byte $(v2),$(v1),$(v3),$(v4), $(pal), $(tex) .endm scene_faces: face 0, 1, 2, 3, 0, 0 face 0, 7, 6, 1, 1, 0 face 0, 3, 4, 7, 2, 0 face 5, 6, 7, 4, 0, 1 face 5, 4, 3, 2, 1, 1 face 5, 2, 1, 6, 2, 1 face 8,10,12,14, 0, 0 face 8, 9,11,10, 1, 0 face 8,14,15, 9, 2, 0 face 13,11, 9,15, 0, 1 face 13,15,14,12, 1, 1 face 13,12,10,11, 2, 1 n_scene_faces = [. - scene_faces] / 6 ; A command to be sent to the TA. There are two kinds of ; commands, one 32 bytes and one 64 bytes. We reserve space ; for the larger against future need (we don't currently use ; 64-byte commands). .align 4 ta_cmd: .space 64 ; These palettes are straight from tatest; I've just ; reformatted them from C to assembly. It doesn't say where, ; if anywhere, they came from. They're small enough I haven't ; bothered trying to compress them. .align 4 palette_0: .long 0xff000000,0xff3c3c3c,0xff413c3c,0xff493c3c,0xff4d3838,0xff553838,0xff593434,0xff613434 .long 0xff653030,0xff6d3030,0xff712c2c,0xff792c2c,0xff822828,0xff862828,0xff8e2424,0xff922424 .long 0xff9a2020,0xff9e2020,0xffa61c1c,0xffaa1c1c,0xffb21818,0xffb61818,0xffbe1414,0xffc71414 .long 0xffcb1010,0xffd31010,0xffd70c0c,0xffdf0c0c,0xffe30808,0xffeb0808,0xffef0404,0xfff70404 .long 0xffff0000,0xffff0400,0xffff0c00,0xffff1400,0xffff1c00,0xffff2400,0xffff2c00,0xffff3400 .long 0xffff3c00,0xffff4500,0xffff4d00,0xffff5500,0xffff5d00,0xffff6500,0xffff6d00,0xffff7500 .long 0xffff7d00,0xffff8600,0xffff8e00,0xffff9600,0xffff9e00,0xffffa600,0xffffae00,0xffffb600 .long 0xffffbe00,0xffffc700,0xffffcf00,0xffffd700,0xffffdf00,0xffffe700,0xffffef00,0xfffff700 .long 0xffffff00,0xffffff04,0xffffff0c,0xffffff14,0xffffff1c,0xffffff24,0xffffff2c,0xffffff34 .long 0xffffff3c,0xffffff45,0xffffff4d,0xffffff55,0xffffff5d,0xffffff65,0xffffff6d,0xffffff75 .long 0xffffff7d,0xffffff86,0xffffff8e,0xffffff96,0xffffff9e,0xffffffa6,0xffffffae,0xffffffb6 .long 0xffffffbe,0xffffffc7,0xffffffcf,0xffffffd7,0xffffffdf,0xffffffe7,0xffffffef,0xfffffff7 .long 0xffffffff,0xffffffff,0xfffffbfb,0xfffffbf7,0xfffff7f3,0xfffff7ef,0xfffff3eb,0xfffff3e7 .long 0xffffefe3,0xffffefdf,0xffffebdb,0xffffebd7,0xffffe7d3,0xffffe7cf,0xffffe3cb,0xffffe3c7 .long 0xffffdfc3,0xffffdfbe,0xffffdbba,0xffffdbb6,0xffffd7b2,0xffffd7ae,0xffffd3aa,0xffffd3a6 .long 0xffffcfa2,0xffffcf9e,0xffffcb9a,0xffffcb96,0xffffc792,0xffffc78e,0xffffc38a,0xffffc386 .long 0xffffbe82,0xffffba7d,0xffffba79,0xffffb675,0xffffb671,0xffffb26d,0xffffb269,0xffffae65 .long 0xffffae61,0xffffaa5d,0xffffaa59,0xffffa655,0xffffa651,0xffffa24d,0xffffa249,0xffff9e45 .long 0xffff9e41,0xffff9a3c,0xffff9a38,0xffff9634,0xffff9630,0xffff922c,0xffff9228,0xffff8e24 .long 0xffff8e20,0xffff8a1c,0xffff8a18,0xffff8614,0xffff8610,0xffff820c,0xffff8208,0xffff7d04 .long 0xffff7900,0xffff7900,0xffff7500,0xffff7100,0xffff6d00,0xffff6900,0xffff6500,0xffff6100 .long 0xffff5d00,0xffff5900,0xffff5500,0xffff5100,0xffff4d00,0xffff4900,0xffff4500,0xffff4100 .long 0xffff3c00,0xffff3c00,0xffff3800,0xffff3400,0xffff3000,0xffff2c00,0xffff2800,0xffff2400 .long 0xffff2000,0xffff1c00,0xffff1800,0xffff1400,0xffff1000,0xffff0c00,0xffff0800,0xffff0400 .long 0xffff0000,0xffff0000,0xfffb0000,0xfff70000,0xfff70000,0xfff30000,0xffef0000,0xffeb0000 .long 0xffeb0000,0xffe70000,0xffe30000,0xffe30000,0xffdf0000,0xffdb0000,0xffd70000,0xffd70000 .long 0xffd30000,0xffcf0000,0xffcf0000,0xffcb0000,0xffc70000,0xffc30000,0xffc30000,0xffbe0000 .long 0xffba0000,0xffba0000,0xffb60000,0xffb20000,0xffae0000,0xffae0000,0xffaa0000,0xffa60000 .long 0xffa20000,0xffa20000,0xff9e0404,0xff9a0404,0xff960808,0xff920808,0xff8e0c0c,0xff8e0c0c .long 0xff8a1010,0xff861010,0xff821414,0xff7d1414,0xff791818,0xff791818,0xff751c1c,0xff711c1c .long 0xff6d2020,0xff692020,0xff652424,0xff652424,0xff612828,0xff5d2828,0xff592c2c,0xff552c2c .long 0xff513030,0xff513030,0xff4d3434,0xff493434,0xff453838,0xff413838,0xff3c3c3c,0xff3c3c3c palette_1: .long 0xff000000,0xff000000,0xff000004,0xff00000c,0xff000010,0xff000018,0xff000020,0xff000024 .long 0xff00002c,0xff000030,0xff000038,0xff000041,0xff000045,0xff00004d,0xff000051,0xff000059 .long 0xff000061,0xff000065,0xff00006d,0xff000075,0xff000079,0xff000082,0xff000086,0xff00008e .long 0xff000096,0xff00009a,0xff0000a2,0xff0000a6,0xff0000ae,0xff0000b6,0xff0000ba,0xff0000c3 .long 0xff0000cb,0xff0004cb,0xff000ccb,0xff0010cf,0xff0018cf,0xff001cd3,0xff0024d3,0xff0028d3 .long 0xff0030d7,0xff0038d7,0xff003cdb,0xff0045db,0xff0049db,0xff0051df,0xff0055df,0xff005de3 .long 0xff0065e3,0xff0069e3,0xff0071e7,0xff0075e7,0xff007deb,0xff0082eb,0xff008aeb,0xff008eef .long 0xff0096ef,0xff009ef3,0xff00a2f3,0xff00aaf3,0xff00aef7,0xff00b6f7,0xff00bafb,0xff00c3fb .long 0xff00cbff,0xff04cbff,0xff0ccbff,0xff14cfff,0xff1ccfff,0xff24d3ff,0xff2cd3ff,0xff34d3ff .long 0xff3cd7ff,0xff45d7ff,0xff4ddbff,0xff55dbff,0xff5ddbff,0xff65dfff,0xff6ddfff,0xff75e3ff .long 0xff7de3ff,0xff86e3ff,0xff8ee7ff,0xff96e7ff,0xff9eebff,0xffa6ebff,0xffaeebff,0xffb6efff .long 0xffbeefff,0xffc7f3ff,0xffcff3ff,0xffd7f3ff,0xffdff7ff,0xffe7f7ff,0xffeffbff,0xfff7fbff .long 0xffffffff,0xfffbffff,0xfff7ffff,0xfff3ffff,0xffebffff,0xffe7ffff,0xffe3ffff,0xffdbffff .long 0xffd7ffff,0xffd3ffff,0xffcbffff,0xffc7ffff,0xffc3ffff,0xffbaffff,0xffb6ffff,0xffb2ffff .long 0xffaaffff,0xffa6ffff,0xffa2ffff,0xff9effff,0xff96ffff,0xff92ffff,0xff8effff,0xff86ffff .long 0xff82ffff,0xff7dffff,0xff75ffff,0xff71ffff,0xff6dffff,0xff65ffff,0xff61ffff,0xff5dffff .long 0xff55ffff,0xff51ffff,0xff4dffff,0xff49ffff,0xff41ffff,0xff3cffff,0xff38ffff,0xff30ffff .long 0xff2cffff,0xff28ffff,0xff20ffff,0xff1cffff,0xff18ffff,0xff10ffff,0xff0cffff,0xff08ffff .long 0xff00ffff,0xff00fbff,0xff00f7ff,0xff00f3ff,0xff00ebff,0xff00e7ff,0xff00e3ff,0xff00dbff .long 0xff00d7ff,0xff00d3ff,0xff00cbff,0xff00c7ff,0xff00c3ff,0xff00baff,0xff00b6ff,0xff00b2ff .long 0xff00aaff,0xff00a6ff,0xff00a2ff,0xff009eff,0xff0096ff,0xff0092ff,0xff008eff,0xff0086ff .long 0xff0082ff,0xff007dff,0xff0075ff,0xff0071ff,0xff006dff,0xff0065ff,0xff0061ff,0xff005dff .long 0xff0055ff,0xff0051ff,0xff004dff,0xff0049ff,0xff0041ff,0xff003cff,0xff0038ff,0xff0030ff .long 0xff002cff,0xff0028ff,0xff0020ff,0xff001cff,0xff0018ff,0xff0010ff,0xff000cff,0xff0008ff .long 0xff0000ff,0xff0000fb,0xff0000f7,0xff0000f3,0xff0000ef,0xff0000eb,0xff0000e7,0xff0000e3 .long 0xff0000df,0xff0000db,0xff0000d7,0xff0000d3,0xff0000cf,0xff0000cb,0xff0000c7,0xff0000c3 .long 0xff0000be,0xff0000ba,0xff0000b6,0xff0000b2,0xff0000ae,0xff0000aa,0xff0000a6,0xff0000a2 .long 0xff00009e,0xff00009a,0xff000096,0xff000092,0xff00008e,0xff00008a,0xff000086,0xff000082 .long 0xff00007d,0xff000079,0xff000075,0xff000071,0xff00006d,0xff000069,0xff000065,0xff000061 .long 0xff00005d,0xff000059,0xff000055,0xff000051,0xff00004d,0xff000049,0xff000045,0xff000041 .long 0xff00003c,0xff000038,0xff000034,0xff000030,0xff00002c,0xff000028,0xff000024,0xff000020 .long 0xff00001c,0xff000018,0xff000014,0xff000010,0xff00000c,0xff000008,0xff000000,0xff000000 palette_2: .long 0xff000000,0xff9208e7,0xff9208e3,0xff9608e3,0xff9a04df,0xff9e04df,0xff9e04db,0xffa204db .long 0xffa600d7,0xffaa00d7,0xffaa00d3,0xffae00cf,0xffb200cf,0xffb600cb,0xffb600c7,0xffba00c7 .long 0xffbe00c3,0xffbe00be,0xffc300be,0xffc700ba,0xffc700b6,0xffcb00b6,0xffcf00b2,0xffcf00ae .long 0xffd300aa,0xffd700aa,0xffd700a6,0xffdb04a2,0xffdb049e,0xffdf049e,0xffdf049a,0xffe30896 .long 0xffe30892,0xffe70892,0xffe7088e,0xffeb0c8a,0xffeb0c86,0xffef0c82,0xffef1082,0xffef107d .long 0xfff31479,0xfff31475,0xfff31475,0xfff71871,0xfff7186d,0xfff71c69,0xfffb1c65,0xfffb2065 .long 0xfffb2061,0xfffb245d,0xffff2859,0xffff2859,0xffff2c55,0xffff2c51,0xffff304d,0xffff344d .long 0xffff3449,0xffff3845,0xffff3c45,0xffff3c41,0xffff413c,0xffff453c,0xffff4538,0xffff4934 .long 0xffff4d34,0xffff4d30,0xffff512c,0xffff552c,0xffff5928,0xffff5928,0xfffb5d24,0xfffb6120 .long 0xfffb6520,0xfffb651c,0xfff7691c,0xfff76d18,0xfff77118,0xfff37514,0xfff37514,0xfff37914 .long 0xffef7d10,0xffef8210,0xffef820c,0xffeb860c,0xffeb8a0c,0xffe78e08,0xffe79208,0xffe39208 .long 0xffe39608,0xffdf9a04,0xffdf9e04,0xffdb9e04,0xffdba204,0xffd7a600,0xffd7aa00,0xffd3aa00 .long 0xffcfae00,0xffcfb200,0xffcbb600,0xffc7b600,0xffc7ba00,0xffc3be00,0xffbebe00,0xffbec300 .long 0xffbac700,0xffb6c700,0xffb6cb00,0xffb2cf00,0xffaecf00,0xffaad300,0xffaad700,0xffa6d700 .long 0xffa2db04,0xff9edb04,0xff9edf04,0xff9adf04,0xff96e308,0xff92e308,0xff92e708,0xff8ee708 .long 0xff8aeb0c,0xff86eb0c,0xff82ef0c,0xff82ef10,0xff7def10,0xff79f314,0xff75f314,0xff75f314 .long 0xff71f718,0xff6df718,0xff69f71c,0xff65fb1c,0xff65fb20,0xff61fb20,0xff5dfb24,0xff59ff28 .long 0xff59ff28,0xff55ff2c,0xff51ff2c,0xff4dff30,0xff4dff34,0xff49ff34,0xff45ff38,0xff45ff3c .long 0xff41ff3c,0xff3cff41,0xff3cff45,0xff38ff45,0xff34ff49,0xff34ff4d,0xff30ff4d,0xff2cff51 .long 0xff2cff55,0xff28ff59,0xff28ff59,0xff24fb5d,0xff20fb61,0xff20fb65,0xff1cfb65,0xff1cf769 .long 0xff18f76d,0xff18f771,0xff14f375,0xff14f375,0xff14f379,0xff10ef7d,0xff10ef82,0xff0cef82 .long 0xff0ceb86,0xff0ceb8a,0xff08e78e,0xff08e792,0xff08e392,0xff08e396,0xff04df9a,0xff04df9e .long 0xff04db9e,0xff04dba2,0xff00d7a6,0xff00d7aa,0xff00d3aa,0xff00cfae,0xff00cfb2,0xff00cbb6 .long 0xff00c7b6,0xff00c7ba,0xff00c3be,0xff00bebe,0xff00bec3,0xff00bac7,0xff00b6c7,0xff00b6cb .long 0xff00b2cf,0xff00aecf,0xff00aad3,0xff00aad7,0xff00a6d7,0xff04a2db,0xff049edb,0xff049edf .long 0xff049adf,0xff0896e3,0xff0892e3,0xff0892e7,0xff088ee7,0xff0c8aeb,0xff0c86eb,0xff0c82ef .long 0xff1082ef,0xff107def,0xff1479f3,0xff1475f3,0xff1475f3,0xff1871f7,0xff186df7,0xff1c69f7 .long 0xff1c65fb,0xff2065fb,0xff2061fb,0xff245dfb,0xff2859ff,0xff2859ff,0xff2c55ff,0xff2c51ff .long 0xff304dff,0xff344dff,0xff3449ff,0xff3845ff,0xff3c45ff,0xff3c41ff,0xff413cff,0xff453cff .long 0xff4538ff,0xff4934ff,0xff4d34ff,0xff4d30ff,0xff512cff,0xff552cff,0xff5928ff,0xff5928ff .long 0xff5d24fb,0xff6120fb,0xff6520fb,0xff651cfb,0xff691cf7,0xff6d18f7,0xff7118f7,0xff7514f3 .long 0xff7514f3,0xff7914f3,0xff7d10ef,0xff8210ef,0xff820cef,0xff860ceb,0xff8a0ceb,0xff8e08e7 ; Video initialization parameters. Most of these I don't ; understand; what documentation I have has been saved here as ; comments. The comment "magic" means "meaning unknown". ; ; These lists are taken pretty much directly from tatest, which ; says of them "These values mainly from Dans 3dtest ; program...". ; ; Since these are longwords stores, the offset must always be ; multiples of 4; the terminator is any value which isn't. ; (We use 1, but set_params accepts anything whose low two ; bits are nonzero.) ; .macro param offset, value .word $(offset) .long $(value) .endm .macro endparam .word 1 .endm .align 2 three_d_params: param 0x80a8, 0x15d1c951 ; magic param 0x80a0, 0x00000020 ; magic param 0x8008, 0x00000000 ; TA out of reset param 0x8048, 0x00000009 ; "alpha config" - ? param 0x8068, [X_SIZE<<16]|0 ; pixel clipping x param 0x806c, [Y_SIZE<<16]|0 ; pixel clipping y param 0x8110, 0x00093f39 ; magic param 0x8098, 0x00800408 ; magic param 0x804c, [X_SIZE*2]/8 ; "display align" - ? param 0x8078, 0f1.0 param 0x8084, 0x00000000 ; magic param 0x8030, 0x00000101 ; magic param 0x80b0, 0x007f7f7f ; fog table colour param 0x80b4, 0x007f7f7f ; fog vertex colour param 0x80c0, 0x00000000 ; colour clamp min param 0x80bc, 0xffffffff ; colour clamp max param 0x8080, 0x00000007 ; magic param 0x8074, 0x00000001 ; "cheap shadow" - ? param 0x807c, 0x0027df77 ; magic param 0x8008, 0x00000001 ; TA into reset param 0x8008, 0x00000000 ; TA out of reset param 0x80e4, 0x00000000 ; "stride width" - ? param 0x6884, 0x00000000 ; disable all interrupt enables param 0x6930, 0x00000000 param 0x6938, 0x00000000 param 0x6900, 0xffffffff ; reset all pending interrupts param 0x6908, 0xffffffff param 0x6930, 0x002807ec ; re-enable some events (which?) param 0x6938, 0x0000000e param 0x80b8, 0x0000ff07 ; fog density (meanings?) param 0x80b4, 0x007f7f7f ; fog vertex colour param 0x80b0, 0x007f7f7f ; fog table colour param 0x8108, 0x00000003 ; 32bit palette (?) endparam screen_params: param 0x80e8, 0x00160000 ; screen control (?) param 0x8044, 0x00800000 ; pixel mode ("vb+0x11" - ?) param 0x805c, 0x00000000 ; size modulo and display lines ("vb+0x17" - ?) param 0x80d0, 0x00000100 ; interlace flags (bit meanings?) param 0x80d8, 0x020c0359 ; magic param 0x80cc, 0x001501fe ; magic param 0x80d4, 0x007e0345 ; horizontal border (meaning? - see below) param 0x80dc, 0x00240204 ; vertical position (meaning?) param 0x80e0, 0x07d6c63f ; sync control (meaning?) param 0x80ec, 0x000000a4 ; horizontal position (meaning?) param 0x80f0, 0x00120012 ; vertical border (meanings?) param 0x80c8, 0x03450000 ; "set to same as border H in 80d4" - ? param 0x8068, [X_SIZE-1]<<16 ; (X resolution - 1) << 16 param 0x806c, [Y_SIZE-1]<<16 ; (Y resolution - 1) << 16 param 0x804c, 0x000000a0 ; "display align" - ? param 0x8118, 0x00008040 ; magic param 0x80f4, 0x00000401 ; "anti-aliasing" - ? param 0x8048, 0x00000009 ; "alpha config" - ? param 0x7814, 0x00000000 ; "more interrupt control stuff" - ? param 0x7834, 0x00000000 param 0x7854, 0x00000000 param 0x7874, 0x00000000 param 0x78bc, 0x4659404f param 0x8040, 0x00000000 ; border colour endparam ; "???" here means "not documented in tatest at all" ; The "2" in these is the offset from the beginning of the ; param to the place where we store the (longword) value. cmdlist_params: param 0x8008, 0x00000001 ; TA into reset param 0x8008, 0x00000000 ; TA out of reset cmdlist_param_tilebuf_a = 2 + . - cmdlist_params param 0x8124, 0 param 0x812c, 0 ; ??? cmdlist_param_cmdlist = 2 + . - cmdlist_params param 0x8128, 0 param 0x8130, 0 ; ??? param 0x813c, [[[Y_SIZE/32]-1]<<16] | [[X_SIZE/32]-1] cmdlist_param_tilebuf_b = 2 + . - cmdlist_params param 0x8164, 0 param 0x8140, 0x00100002 ; ??? param 0x8144, 0x80000000 ; confirm settings endparam ; Texture twiddling table. Why "twiddle"? That's the term ; used in tatest's comments. It appears to be interleaving ; the bits of the numbers that form texture coordinates, so ; that the texels conceptually at (x,y) and (x+1,y), where ; x=ABCDEFG0 and y=abcdefgh (say), are stored at offsets ; aAbBcCdDeEfFgGh0 (x) and aAbBcCdDeEfFgGh1 (x+1). ; ; Why do it? Because, in the words of another tatest comment, ; "palette based textures can not be non-twiddled". Why ; design hardware that way? MC, in email, passed along an ; explanation from someone who worked on the hardware, saying ; that twiddled textures provide higher performance, so the ; designers figured the only reason to use non-twiddled ; textures was to use a rendered frame as a texture (for, eg, ; reflections). Since the renderer output is always ; true-colour, that's all they implemented. (The ; "non-twiddled" bit got reused for a different meaning for ; palette-based textures.) ; ; tatest generates a 1024-entry table. We reserve (and set up) ; that much space, but as of this writing use only 256 entries ; of it. ; ; One possible note to beware of is that this may not apply to ; the large dimension of non-square textures. Done naïvely, ; doing this for non-square textures could use excessive ; amounts of memory; it would appear, for example, that an ; 8x256 texture would take up almost as much memory space as a ; 128x256 one (because of all the gaps between the address ; bits). But it may be smarter than that; when I mentioned ; that in mail to MC, he said he had a fuzzy memory that the ; high bits of non-square textures aren't twiddled, that, eg, ; an 8x256 texture in memory consists of 32 consecutive 8x8 ; (twiddled) blocks. But he also warned that memory could be ; wrong, so test this before depending on it. ; .align 2 twiddles: .space 1024*2 ; Current double-buffering buffer number. Always 0 or 1. curbuf: .space 1 ; When set, this causes printing of debugging info, but for ; only one cycle; it's cleared when the info is printed. debug: .byte 0 .align 2 main: ; The only things startup.s sets up that cdcode hasn't already done for ; us are (1) fpscr and (2) clearing bss. We don't have bss because we ; aren't linked by a conventional linker. FPSCR needs setup too. So ; does the VBR. Make sure FD, RB, and BL are clear in the SR. We ; don't need to copy r10-r15, even for the sake of returning to ; cdcode, because only r0-r7 are banked. We save r10-r14 on the stack ; so that we can use them; they matter only on return to cdcode, which ; happens only controlledly. mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 ldc r14,gbr stc sr,r1 SETS.L #~[SR_FD|SR_RB|SR_BL],r2 and r2,r1 ldc r1,sr ; Note that r0-r7 may have just changed if we switched banks. mov #0,r1 lds r1,fpscr SETS.L #intvec,r0 ldc r0,vbr ; Real code begins here. bsr clear_vram nop bsr init_maple nop bsr init_powervr nop bsr init_video nop bsr init_palette nop bsr init_twiddling nop bsr init_textures nop bsr init_tiledesc nop bsr init_3dvalues nop 1: bsr one_frame nop bsr nbgetchar nop cmp/pz r0 bf 1b cmp/eq #'d,r0 bt setdebug done: bsr putchar mov #13,r1 bsr putchar mov #10,r1 ; Turn SR.BL (back) on before returning to cdcode. stc sr,r1 SETS.L #SR_BL,r2 or r2,r1 ldc r1,sr mov.l @r15+,r10 mov.l @r15+,r11 mov.l @r15+,r12 mov.l @r15+,r13 lds r11,pr rts mov.l @r15+,r14 setdebug: SETS.L #debug,r1 SETS.L #1,r0 bra 1b mov.b r0,@r1 clear_vram: SETS.L #QACR0,r1 SETS.L #QACR1,r2 SETS.L #[[VRAM_BASE_64>>26]&7]<<2,r3 SETS.L #STOREQ_BASE+[4*16],r4 SETS.L #0,r5 mov.l r3,@r1 mov.l r3,@r2 SETS.L #16,r0 1: dt r0 bf/s 1b mov.l r5,@-r4 SETS.L #VRAM_SIZE/32,r1 SETS.L #[VRAM_BASE_64&0x03ffffc0]|0xe0000000,r2 1: pref @r2 dt r1 bf/s 1b add #32,r2 mov.l r5,@r4 add #4*16,r4 rts mov.l r5,@r4 set_params: .if debug_set_params sts.l pr,@-r15 .endif ; r1 points to params table SETS.L #VIDREG_BASE,r2 1: mov.w @r1+,r0 tst #3,r0 bf/s 1f extu.w r0,r0 mov.w @r1+,r3 mov.w @r1+,r4 SHLL #16,r4 extu.w r3,r3 or r3,r4 add r2,r0 .if debug_set_params mov.l r0,@-r15 mov.l r1,@-r15 sts.l pr,@-r15 bsr putchar mov #'*,r1 bsr printhex8 mov.l @(8,r15),r1 bsr putchar mov #'=,r1 bsr printhex8 mov r4,r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 lds.l @r15+,pr mov.l @r15+,r1 mov.l @r15+,r0 .endif bra 1b mov.l r4,@r0 1: .if debug_set_params lds.l @r15+,pr .endif rts nop init_maple: mova 9f,r0 1: mov.l @r0+,r1 tst r1,r1 bt 1f mov.l @r0+,r2 bra 1b mov.l r2,@r1 1: rts nop .align 4 9: .long BUS_RESET, BUS_RESET_VALUE .long BUS_RESET2, BUS_RESET2_VALUE .long BUS_SPEED, SPEED_2MBPS|[50000<= -128] add #PDTRA-PCTRA,r8 .else SETS.L #PDTRA,r8 .endif mov.w @r8,r0 SHXR #8,r0 and #3,r0 mov r0,r9 SETS.L #VIDREG_BASE+0x8000,r8 mov r8,r2 add #8,r2 SETS.L #0,r6 mov.l r6,@r2 ; 0xa05f8008, "TA out of reset" add #0x40-8,r2 mov.l r6,@r2 ; 0xa05f8040, border colour mov #0x5,r3 ; 5/6/5 2bpp, no scan doubling, display enabled SETS.L #240,r7 mov r9,r0 tst #2,r0 bf 1f SHLL #1,r7 swap.w r3,r0 ; |= 0x00800000, clock doubler or #0x80,r0 swap.w r0,r3 1: add #0x44-0x40,r2 mov.l r3,@r2 ; 0xa05f8044, display mode add #0x50-0x44,r2 mov.l r6,@r2 ; 0xa05f8050, vram base offset 1 SETS.L #SHORT_FRAME_OFFSET,r3 ; pixels * bytes-per-pixel add #0x54-0x50,r2 mov.l r3,@r2 ; 0xa05f8054, vram base offset 2 SETS.L #1<<8,r3 ; VO, negative H and V sync SETS.L #[X_SIZE/2],r4 ; longs of (16bpp) pixel data per scanline SETS.L #1,r5 mov r9,r0 tst #2,r0 bt 1f add r4,r5 SETS.L #0x10,r0 ; interlaced, NTSC colour or r0,r3 1: SHLL #10,r5 add r7,r5 add #-1,r5 SHLL #10,r5 add r4,r5 add #-1,r5 add #0x5c-0x54,r2 mov.l r5,@r2 ; 0xa05f805c, display size and modulo add #0xd0-0x5c,r2 mov.l r3,@r2 ; 0xa05f80d0, video encapsulation SETS.L #0x007e0345,r8 ; doesn't make sense per doc add #0xd4-0xd0,r2 mov.l r8,@r2 ; 0xa05f80d4, H border range SETS.L #[524<<16]|857,r8; NTSC/VGA add #0xd8-0xd4,r2 mov.l r8,@r2 ; 0xa05f80d8, full video size mov r9,r0 and #2,r0 mov r0,r3 SHLL #3,r0 or r3,r0 SETS.L #36,r3 sub r0,r3 mov r3,r0 SHLL #16,r0 or r0,r3 mov r3,r8 add r7,r8 add #0xdc-0xd8,r2 mov.l r8,@r2 ; 0xa05f80dc, V border range SETS.L #22<<16,r8 ; N=magic, pixel duplication disabled add #0xe8-0xdc,r2 mov.l r8,@r2 ; 0xa05f80e8, additional video settings SETS.L #0xa4,r8 add #0xec-0xe8,r2 mov.l r8,@r2 ; 0xa05f80ec, H position add #0xf0-0xec,r2 mov.l r3,@r2 ; 0xa05f80f0, V position SETS.L #260,r4 mov r9,r0 tst #2,r0 bf 1f SETS.L #510,r4 1: SETS.L #0x21<<16,r3 or r3,r4 add #0xcc-0xf0,r2 mov.l r4,@r2 ; 0xa05f80cc, raster event position mov r9,r0 tst #1,r0 bt/s 1f mov #0,r8 mov #3,r8 1: SETS.L #0xa0702c00,r3 mov.l r8,@r2 ; 0xa0702c00, "Select RGB/CVBS" (??) lds.l @r15+,pr rts nop SETCONST init_palette: SETS.L #0xa05f9000,r1 SETS.L #256*4,r7 mov r1,r3 add r7,r3 mov r3,r5 add r7,r5 SETS.L #palette_0,r2 SETS.L #palette_1,r4 SETS.L #palette_2,r6 SETS.L #256,r7 1: mov.l @r2+,r0 mov.l r0,@r1 mov.l @r4+,r0 mov.l r0,@r3 mov.l @r6+,r0 mov.l r0,@r5 add #4,r1 add #4,r3 dt r7 bf/s 1b add #4,r5 rts nop init_twiddling: SETS.L #twiddles+[1024*2],r1 SETS.L #1024,r2 SETS.L #0x00300,r3 SETS.L #0x000f0,r4 SETS.L #0x00c0c,r5 SETS.L #0x22222,r6 1: add #-1,r2 mov r2,r0 and r3,r0 SHLL #8,r0 mov r2,r7 not r3,r8 and r8,r7 or r0,r7 mov r7,r0 and r4,r0 SHLL #4,r0 not r4,r8 and r8,r7 or r0,r7 mov r7,r0 and r5,r0 SHLL #2,r0 not r5,r8 and r8,r7 or r0,r7 mov r7,r0 and r6,r0 SHLL #1,r0 not r6,r8 and r8,r7 or r0,r7 tst r2,r2 bf/s 1b mov.w r7,@-r1 rts nop ; The C code this is based upon (again, from tatest) ; ; for(i=0; i<256; i++) ; for(j=0; j<256; j+=2) { ; /* Texture 0 = Mandelbrot */ ; tex[0][twiddletab[i]|(twiddletab[j]>>1)] = ; compute_texture(i, j, 0) | (compute_texture(i, j+1, 0)<<8); ; /* Texture 1 = Julia */ ; tex[1][twiddletab[i]|(twiddletab[j]>>1)] = ; compute_texture(i, j, 1) | (compute_texture(i, j+1, 1)<<8); ; } ; ; We change some names (eg, compute_texture_a and compute_texture_b ; rather than a third arg to compute_texture), but it's otherwise ; pretty similar. We keep a lot of stuff on the stack rather than in ; registers; while we might have enough registers, this means I don't ; have to think about register allocation as much. It also means the ; texture computation functions have a much freer hand with registers. init_textures: sts.l pr,@-r15 SETS.L #twiddles,r7 mov.l r7,@-r15 SETS.L #textures,r9 SETS.L #texture_a,r8 SETS.L #texture_b,r6 mov.l r8,@r9 mov.l r6,@(4,r9) mov.l r6,@-r15 mov.l r8,@-r15 mov #0,r0 mov.l r0,@-r15 2: mov #0,r0 mov.l r0,@-r15 ; stack = x y tex0 tex1 twiddles 1: mov.l @r15,r1 ; x bsr compute_texture_a mov.l @(4,r15),r2 ; y mov.l r0,@-r15 ; valA(x,y) mov.l @(4,r15),r1 ; x mov.l @(8,r15),r2 ; y bsr compute_texture_a add #1,r1 mov.l @r15+,r1 ; valA(x+1,y) SHLL #8,r0 or r1,r0 ; combined vals mov.l r0,@-r15 mov.l @(4,r15),r1 ; x bsr compute_texture_b mov.l @(8,r15),r2 ; y mov.l r0,@-r15 ; valB(x,y) mov.l @(8,r15),r1 ; x mov.l @(12,r15),r2 ; y bsr compute_texture_b add #1,r1 mov.l @r15+,r1 ; valB(x+1,y) SHLL #8,r0 or r1,r0 ; combined vals mov.l r0,@-r15 ; stack = valsB valsA x y tex0 tex1 twiddles mov.l @(24,r15),r2 ; twiddles mov.l @(8,r15),r1 ; x SHLL #1,r1 add r2,r1 mov.w @r1,r1 mov.l @(12,r15),r3 ; y SHLL #1,r3 add r2,r3 mov.w @r3,r3 SHLL #1,r3 or r1,r3 ; r3 now holds twiddled texture offset mov.l @(16,r15),r2 ; tex0 add r3,r2 .if debug_texture bsr printhex8 mov.l @(8,r15),r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @(12,r15),r1 bsr putchar2 mov #' ,r1 bsr printhex8 mov r2,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @(4,r15),r1 bsr putchar2 mov #' ,r1 .endif mov.l @(4,r15),r0 ; valA mov.w r0,@r2 mov.l @(20,r15),r2 ; tex1 add r3,r2 .if debug_texture bsr printhex8 mov r2,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r15,r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 .endif mov.l @r15,r0 ; valB mov.w r0,@r2 add #8,r15 ; pop valA, valB SETS.L #256,r1 mov.l @r15,r0 ; x add #2,r0 cmp/hs r1,r0 bf/s 1b mov.l r0,@r15 add #4,r15 ; pop x mov.l @r15,r0 ; y add #1,r0 cmp/hs r1,r0 bf/s 2b mov.l r0,@r15 add #16,r15 ; pop remaining lds.l @r15+,pr rts nop ; The disabled code below is an attempt at cloning tatest's texture ; computation. It doesn't work and I don't care why enough to bother ; debugging it when I can do something quicker and cheaper and just as ; good for smoke-test purposes (see the .else block). .if 0 ; Texture computation. Texture A is the Mandelbrot set; ; texture B is the Julia set - or, at least, quick-&-dirty ; approximations to them; as tatest says, "I'm not trying to ; get any points for correct mathematics here, only a cheap ; way to get some textures for my code :)". ; These functions use r1 and r2 for input (x=r1, y=r2) and r0 ; for output. CPU registers r3-r9 and all FPU registers are ; available for use, as is the stack (r15 is the SP). compute_texture_a: SETS.L #texsetup_a,r0 bra compute_texture_common nop compute_texture_b: SETS.L #texsetup_b,r0 compute_texture_common: SETS.L #128,r3 lds r3,fpul float fpul,fr0 lds r1,fpul float fpul,fr1 lds r2,fpul float fpul,fr2 SETS.L #16384,r3 lds r3,fpul float fpul,fr3 fsub fr0,fr1 fsub fr0,fr2 fmul fr3,fr1 fmul fr3,fr2 SETS.L #0f1.313747,r3 lds r3,fpul fsts fpul,fr5 fsub fr5,fr1 SETS.L #0f0.073227,r3 lds r3,fpul fsts fpul,fr6 sts pr,r3 jsr @r0 fsub fr6,fr2 lds r3,pr ; Inner loop from tatest's analogous function ; ; do { ; float tmp_r = z_re; ; z_re = z_re*z_re - z_im*z_im + c_re; ; z_im = 2*tmp_r*z_im + c_im; ; } while(++n<255 && z_re*z_re+z_im*z_im<=2.0); ; ; Register usage ; ; fr0 tmp_r ; fr1 c_re ; fr2 c_im ; fr3 z_re ; fr4 z_im ; fr5 2.0 ; fr6 z_re**2 ; fr7 z_im**2 (LHS of comparison, at end) ; fr8+ scratch ; r0 ~n (we count down and complement later) ; ; We don't use fr5 for the 2*, but we do for the <=2.0. fldi1 fr5 fadd fr5,fr5 SETS.L #255,r0 1: ; tmp_r = z_re fmov fr3,fr0 ; z_re = z_re*z_re - z_im*z_im + c_re fmov fr3,fr6 fmul fr3,fr6 fmov fr4,fr7 fmul fr4,fr7 fmov fr6,fr3 fsub fr7,fr3 fadd fr1,fr3 ; z_im = 2*tmp_r*z_im + c_im fmul fr0,fr4 fadd fr4,fr4 dt r0 bt/s 1f fadd fr2,fr4 fadd fr6,fr7 fcmp/gt fr5,fr7 bf 1b 1: not r0,r0 rts extu.b r0,r0 texsetup_a: fldi0 fr3 rts fldi0 fr4 texsetup_b: fmov fr1,fr3 fmov fr2,fr4 fmov fr5,fr1 fneg fr1 fmov fr6,fr2 rts fneg fr2 .else ; Texture A is diagonal stripes; texture B is concentric circles ; centred on (0,80). compute_texture_a: ; return(255&(x+y)) add r2,r1 rts extu.b r1,r0 compute_texture_b: ; return(255&(int)hypot(x,y-80)) lds r1,fpul float fpul,fr0 add #-80,r2 lds r2,fpul float fpul,fr1 fmul fr0,fr0 fmul fr1,fr1 fadd fr1,fr0 fsqrt fr0 ftrc fr0,fpul sts fpul,r0 rts extu.b r0,r0 .endif SETCONST init_tiledesc: sts.l pr,@-r15 SETS.L #tiledesc_cookies,r4 SETS.L #tilebuffers,r5 SETS.L #tiledescs,r6 mov.l r4,@-r15 mov.l @(4,r5),r0 mov.l r0,@-r15 mov.l @(4,r6),r0 mov.l r0,@-r15 mov.l @r6,r2 bsr setup_tiledesc mov.l @r5,r3 mov.l @(8,r15),r4 mov.l r0,@r4 mov.l @r15+,r2 bsr setup_tiledesc mov.l @r15+,r3 mov.l @r15+,r4 mov.l r0,@(4,r4) SETS.L #curbuf,r1 mov #0,r0 lds.l @r15+,pr rts mov.b r0,@r1 setup_tiledesc: ; in tatest terms, this is ta_create_tile_descriptors. ptr is ; r2, buf is r3, w is X_SIZE/32, and h is Y_SIZE/32. No ; registers r0-r9 are important upon return; they all are ; available to us. ; vr = ptr mov r2,r4 ; vr is r4 ; bf = ((unsigned int)buf)&0x007fffff (buf is dead after this) SETS.L #0x007fffff,r0 and r0,r3 ; bf is r3 from here on ; strbase = (((unsigned int)ptr)&0x007fffff)|0x80000000 ; ptr is _not_ dead here, but 0x007fffff is. SETS.L #0x80000000,r7 ; strbase is r7 and r2,r0 or r0,r7 ; for (18 loops) *vr++ = 0 mov #18,r1 mov #0,r0 1: mov.l r0,@r4 dt r1 bf/s 1b add #4,r4 ; *vr++ = 0x10000000 ; *vr++ = 0x80000000 (five times) SETS.L #0x10000000,r1 mov.l r1,@r4 SETS.L #0x80000000,r1 mov.l r1,@(4,r4) mov.l r1,@(8,r4) mov.l r1,@(12,r4) mov.l r1,@(16,r4) mov.l r1,@(20,r4) add #24,r4 SETS.L #X_SIZE/32,r8 ; w is r8 SETS.L #Y_SIZE/32,r9 ; h is r9 ; for (x=0;x " .align 2 setup_cmd_list: ; In tatest terms, this is ta_set_target, but with args ; computed here based on curbuf rather than being passed in. sts.l pr,@-r15 SETS.L #curbuf,r1 mov.b @r1,r1 SHLL #2,r1 SETS.L #cmdlists,r2 SETS.L #tilebuffers,r3 add r1,r2 mov.l @r2,r2 add r1,r3 mov.l @r3,r3 SETS.L #0x007fffff,r4 and r4,r2 and r4,r3 swap.w r2,r4 swap.w r3,r5 SETS.L #cmdlist_params,r0 SETS.L #cmdlist_param_tilebuf_a,r1 mov.w r3,@(r0,r1) add #2,r1 mov.w r5,@(r0,r1) SETS.L #cmdlist_param_tilebuf_b,r1 mov.w r3,@(r0,r1) add #2,r1 mov.w r5,@(r0,r1) SETS.L #cmdlist_param_cmdlist,r1 mov.w r2,@(r0,r1) add #2,r1 mov.w r4,@(r0,r1) bsr set_params mov r0,r1 SETS.L #VIDREG_BASE+0x8144,r0 mov.l @r0,r0 lds.l @r15+,pr rts nop SETCONST ; This does not have an exact tatest analog. It corresponds to ; the six draw_face() calls and the ta_commit_end() after ; them. draw_cube: sts.l pr,@-r15 SETS.L #scene_faces,r9 SETS.L #n_scene_faces,r8 SETS.L #ta_cmd,r7 SETS.L #0,r6 SETS.L #0f1,r5 SETS.L #xform_coords,r4 SETS.L #3*4,r3 1: SETS.L #TA_CMD_POLYGON|TA_CMD_POLYGON_TYPE_OPAQUE|TA_CMD_POLYGON_SUBLIST|TA_CMD_POLYGON_STRIPLENGTH_2|TA_CMD_POLYGON_TEXTURED,r0 mov.l r0,@r7 ; cmd SETS.L #TA_POLYMODE1_Z_ALWAYS|TA_POLYMODE1_CULL_CCW,r0 mov.l r0,@(4,r7) ; mode1 SETS.L #TA_POLYMODE2_BLEND_DEFAULT|TA_POLYMODE2_FOG_DISABLED|TA_POLYMODE2_TEXTURE_CLAMP_U|TA_POLYMODE2_TEXTURE_CLAMP_V|TA_POLYMODE2_BILINEAR_FILTER|TA_POLYMODE2_MIPMAP_D_1_00|TA_POLYMODE2_TEXTURE_REPLACE|TA_POLYMODE2_U_SIZE_256|TA_POLYMODE2_V_SIZE_256,r0 mov.l r0,@(8,r7) ; mode2 SETS.L #TA_TEXTUREMODE_CLUT8,r1 mov.b @(4,r9),r0 SHLL #TA_TEXTUREMODE_CLUTBANK8_SHIFT,r0,r2 or r0,r1 mov.b @(5,r9),r0 SETS.L #textures,r2 SHLL #2,r0 mov.l @(r0,r2),r0 SHXR #TA_TEXTUREMODE_ADDRESS_SHIFT,r0 SETS.L #TA_TEXTUREMODE_ADDRESS_MASK,r2 and r2,r0 or r0,r1 mov.l r1,@(12,r7) ; texture mov.l r6,@(16,r7) ; alpha mov.l r6,@(20,r7) ; red mov.l r6,@(24,r7) ; green bsr commit_ta_cmd mov.l r6,@(28,r7) ; blue SETS.L #TA_CMD_VERTEX,r1 mov.l r1,@r7 ; cmd mov.l r6,@(28,r7) ; ocolour not r6,r1 mov.l r1,@(24,r7) ; colour mov.b @r9,r1 mulu.w r1,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l r6,@(16,r7) ; u bsr commit_ta_cmd mov.l r6,@(20,r7) ; v mov.b @(1,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l r5,@(16,r7) ; u bsr commit_ta_cmd mov.l r6,@(20,r7) ; v mov.b @(2,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l r6,@(16,r7) ; u bsr commit_ta_cmd mov.l r5,@(20,r7) ; v mov.b @(3,r9),r0 mulu.w r0,r3 sts macl,r0 add r4,r0 mov.l @r0,r2 mov.l r2,@(4,r7) ; x mov.l @(4,r0),r2 mov.l r2,@(8,r7) ; y mov.l @(8,r0),r2 mov.l r2,@(12,r7) ; z mov.l r5,@(16,r7) ; u mov.l r5,@(20,r7) ; v SETS.L #TA_CMD_VERTEX|TA_CMD_VERTEX_EOS,r1 bsr commit_ta_cmd mov.l r1,@r7 ; cmd dt r8 bf/s 1b add #6,r9 ; making this a loop saves only one instruction and adds time. mov.l r6,@r7 mov.l r6,@(4,r7) mov.l r6,@(8,r7) mov.l r6,@(12,r7) mov.l r6,@(16,r7) mov.l r6,@(20,r7) mov.l r6,@(24,r7) bsr commit_ta_cmd mov.l r6,@(28,r7) lds.l @r15+,pr rts nop commit_ta_cmd: ; In tatest terms, this is ta_commit_list(), with the argument ; always being ta_cmd. .if debug_ta_commit sts.l pr,@-r15 bsr putchar2 mov #'(,r1 .endif SETS.L #QACR0,r1 SETS.L #STOREQ_BASE,r14 SETS.L #[[TA_CMD_BASE>>26]&7]<<2,r13 SETS.L #ta_cmd,r12 SETS.L #8,r11 .if debug_ta_commit mov.l r1,@-r15 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 bsr putchar mov #' ,r1 bsr printhex8 mov.l @r12+,r1 add #-8*4,r12 mov.l @r15+,r1 .endif mov.l r13,@r1 mov r14,r10 1: mov.l @r12+,r0 dt r11 mov.l r0,@r14 bf/s 1b add #4,r14 .if debug_ta_commit pref @r10 bsr putchar2 mov #'),r1 lds.l @r15+,pr rts nop .else rts pref @r10 .endif handle_maple: SETS.L #BUS_STATE,r3 1: mov.l @r3,r0 tst #BUS_STATE_RUNNING,r0 bf 1b SETS.L #maple_resp,r0 ; We ocbi only one cache line, because the parts of the ; response we care about fit in a single cache line. The ; hardware's alignment requirements for maple buffers match ; cache line alignments, and we access only 8 bytes of it at ; low offsets. ocbi @r0 mov.l @(8,r0),r1 mov.l @(12,r0),r2 SETS.L #curistate,r0 mov.l r1,@r0 mov.l r2,@(4,r0) rts nop await_video: ; In tatest terms, this is everything in the main loop after ; the call to ta_commit_end(). ; ta_wait_render() SETS.L #TA_RENDER_EVENT,r1 SETS.L #TA_RENDER_BIT,r2 1: mov.l @r1,r0 tst r2,r0 bt 1b mov.l r2,@r1 ; wait_bovp() SETS.L #VBLANK_REG,r1 SETS.L #VBLANK_VBIT,r2 mov.l r2,@r1 1: mov.l @r1,r0 tst r2,r0 bt 1b mov.l r2,@r1 next_frame: .if debug_start_render sts.l pr,@-r15 .endif ; Switch to the previously-rendered screen SETS.L #curbuf,r10 SETS.L #render_buf,r11 mov.b @r10,r0 SHLL #2,r0 mov.l @(r0,r11),r1 SETS.L #0x007fffff,r12 SETS.L #DISPLAY_VRAM,r3 and r12,r1 mov.l r1,@r3 SETS.L #SHORT_FRAME_OFFSET,r0 add r0,r1 mov.l r1,@(4,r3) ; Kick off rendering to the screen we just stopped displaying ; In tatest terms, this is ta_begin_render. mov.b @r10,r0 ; curbuf SETS.L #cmdlists,r1 SHLL #2,r0 SETS.L #tiledesc_cookies,r2 mov.l @(r0,r1),r1 ; cmdlist mov.l @(r0,r2),r2 ; tiles xor #4,r0 mov.l @(r0,r11),r3 ; scrn SETS.L #VIDREG_BASE+0x8138,r4 SETS.L #0x12,r5 SETS.L #0,r6 mov.l @r4,r4 SETS.L #VRAM_BASE_32,r0 or r0,r4 ; taend 1: mov.l r6,@r4 dt r5 bf/s 1b add #4,r4 add #-0x12*4,r4 ; We could use set_params here, but with the number of values ; to store and the need to break longs into two words, it's ; less pain to do it this way. Do we have to do all these in ; exactly this order? I suspect not, but, absent ; documentation, it's hard to tell how much deviation is OK. ; We stick strictly to the order tatest uses. SETS.L #VIDREG_BASE+0x802c,r5 and r12,r2 .if debug_start_render bsr 9f mov r2,r0 .endif mov.l r2,@r5 ; 0xa05f802c add #0x8020-0x802c,r5 mov r1,r0 and r12,r0 .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8020 add #0x8060-0x8020,r5 and r12,r3 .if debug_start_render bsr 9f mov r3,r0 .endif mov.l r3,@r5 ; 0xa05f8060 add #0x808c-0x8060,r5 sub r1,r4 SHLL #1,r4 SETS.L #0x01000000,r0 or r4,r0 .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f808c add #0x8088-0x808c,r5 SETS.L #0x3e4cccc0,r0 ; tatest says "zclip" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8088 add #0x8068-0x8088,r5 SETS.L #[X_SIZE-1]<<16,r0 ; tatest calls it "clipw" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8068 add #0x806c-0x8068,r5 SETS.L #[Y_SIZE-1]<<16,r0 ; tatest calls it "cliph" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f806c add #0x804c-0x806c,r5 SETS.L #[X_SIZE*2]>>3,r0 ; tatest calls it "modulo" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f804c add #0x8048-0x804c,r5 SETS.L #TA_PIXFMT_RGB565|TA_PIXFMT_DITHER,r0 ; tatest calls it "pixfmt" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8048 add #0x8014-0x8048,r5 SETS.L #0xffffffff,r0 ; tatest says "Launch!" .if debug_start_render bsr 9f nop .endif mov.l r0,@r5 ; 0xa05f8014 ; curbuf = ! curbuf mov.b @r10,r0 tst r0,r0 bt/s 1f add #1,r0 mov #0,r0 1: .if debug_start_render lds.l @r15+,pr .endif rts mov.b r0,@r10 .if debug_start_render 9: ; about to mov.l r0,@r5; print it ; must preserve all input registers except pr mov.l r0,@-r15 mov.l r1,@-r15 sts.l pr,@-r15 bsr printhex8 mov r5,r1 bsr putchar mov #'=,r1 bsr printhex8 mov.l @(8,r15),r1 bsr putchar mov #13,r1 bsr putchar mov #10,r1 lds.l @r15+,pr mov.l @r15+,r1 rts mov.l @r15+,r0 .endif SETCONST printhex8: mov #8,r0 printhexN: mov.l r4,@-r15 mov r0,r4 add #-8,r0 neg r0,r0 SHLL #2,r0 shld r0,r1 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 mova 9f,r0 mov r0,r3 mov r1,r2 1: mov r2,r0 SHLR #28,r0,r1 SHLL #4,r2 add r3,r0 bsr putchar mov.b @r0,r1 dt r4 bf 1b lds.l @r15+,pr mov.l @r15+,r2 mov.l @r15+,r3 rts mov.l @r15+,r4 .align 4 9: .ascii "0123456789abcdef" .align 2 putchar2: sts.l pr,@-r15 bsr putchar mov.l r1,@-r15 mov.l @r15+,r1 lds.l @r15+,pr putchar: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov r1,r0 mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 tst #SCFDR2_TX_MASK,r0 bf 1b rts nop putstr: 1: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_TX_SHIFT,r0 and #SCFDR2_TX_MASK,r0 cmp/eq #16,r0 bt 1b mov.b @r1+,r0 tst r0,r0 bt 1f bra 1b mov.b r0,@(SCFTDR2-SCIF_BASE,gbr) 1: ; don't bother waiting for drain here; we do a putchar call, ; which will drain everything, after all putstr calls and ; before anything for which it matters. rts nop print_float: ; float in r1 ; uses r0, r1, r2, fr0, fr1, fr2, fpul sts.l pr,@-r15 ; check for negative; if so, print - and negate lds r1,fpul fsts fpul,fr0 fldi0 fr1 fcmp/gt fr0,fr1 bf 1f bsr putchar mov #'-,r1 fneg fr0 1: ; divide by 10 until it's less than 10, and keep count mov #10,r0 lds r0,fpul float fpul,fr1 mov #0,r2 1: fcmp/gt fr0,fr1 bt 1f fdiv fr1,fr0 bra 1b add #1,r2 1: ; now fr0 < 10 and r2 is the number of divisions we did ; print the first (possibly only) digit before the . ftrc fr0,fpul sts fpul,r1 bsr putchar add #'0,r1 float fpul,fr2 fsub fr2,fr0 ; now, for r2 loops, print next digit 1: cmp/pl r2 bf 1f fmul fr1,fr0 ftrc fr0,fpul sts fpul,r1 float fpul,fr2 bsr putchar add #'0,r1 fsub fr2,fr0 bra 1b add #-1,r2 1: ; print as many digits as necessary to reach 0 ; print a . before the first one, if there are any mov #'.,r1 SETS.L #0f0,r0 lds r0,fpul 1: ; At this point, we have ; - fpul contains integer part to be subtracted from fr0 ; - r1 contains next character to print ; - loop if fr0 != 0 at this point fldi0 fr2 fcmp/eq fr0,fr2 bt 2f float fpul,fr2 fsub fr2,fr0 fmul fr1,fr0 bsr putchar ftrc fr0,fpul sts fpul,r1 bra 1b add #'0,r1 2: ; Done. lds.l @r15+,pr rts nop nbgetchar: mov.w @(SCFDR2-SCIF_BASE,gbr),r0 SHXR #SCFDR2_RX_SHIFT,r0,r1 tst #SCFDR2_RX_MASK,r0 bt 1f mov.b @(SCFRDR2-SCIF_BASE,gbr),r0 extu.b r0,r1 mov.w @(SCLSR2-SCIF_BASE,gbr),r0 mov #0,r0 mov.w r0,@(SCLSR2-SCIF_BASE,gbr) rts mov r1,r0 1: rts mov #-1,r0 SETCONST ; Not sure we actually need to align the VBR; the only reason I ; have to suspect we might is that it's the kind of thing I've ; seen relatively often before - interrupt/trap vector tables ; often need to be aligned, not infrequently to a remarkably ; strict boundary. I see no indication in the manuals that ; the SH requires _any_ alignment, but it's easy to do and ; definitely won't hurt anything. (No explicit indication, ; that is. It is implicit in the execution of code at ; VBR+0x100, VBR+0x400, and VBR+0x600 that VBR must be even.) .align 0x10000 ; Exception handling consists of: ; - Save PC and SR in SPC and SSR ; - Set SR bit BL to 1 (block exceptions/interrupts) ; - Set SR bit MD to 1 (privileged mode) ; - Set SR bit RB to 1 (r0-r7 bank 1) ; - Write code to EXPEVT or INTEVT ; - Set PC to vector addr, resume execution intvec = . . = intvec + 0x100 SETS.L #0x100,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x400 SETS.L #0x400,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x600 SETS.L #0x600,r2 SETS.L #EXPEVT,r0 mov.l @r0,r3 SETS.L #INTEVT,r0 SETS.L #regdump,r1 jmp @r1 mov.l @r0,r4 SETCONST . = intvec + 0x1000 crash_msg_0: .asciz (13,10,10)"FATAL TRAP"(13,10)"R0 " crash_msg_1: .asciz " R1 " crash_msg_2: .asciz " R2 " crash_msg_3: .asciz " R3 " crash_msg_4: .asciz (13,10)"R4 " crash_msg_5: .asciz " R5 " crash_msg_6: .asciz " R6 " crash_msg_7: .asciz " R7 " crash_msg_8: .asciz (13,10)"R8 " crash_msg_9: .asciz " R9 " crash_msg_10: .asciz " R10 " crash_msg_11: .asciz " R11 " crash_msg_12: .asciz (13,10)"R12 " crash_msg_13: .asciz " R13 " crash_msg_14: .asciz " R14 " crash_msg_15: .asciz " R15 " crash_msg_gbr: .asciz (13,10)"GBR " crash_msg_sr: .asciz " SR " crash_msg_pc: .asciz " PC " crash_msg_mach: .asciz (13,10)"MACH" crash_msg_macl: .asciz " MACL" crash_msg_pr: .asciz " PR " crash_msg_vec: .asciz (13,10)"vector" crash_msg_expevt: .asciz " EXPEVT" crash_msg_intevt: .asciz " INTEVT" crash_msg_done: .asciz (13,10) crash_msg_equal: .asciz " = " .align 4 crash_msgs: .long crash_msg_0 .long crash_msg_1 .long crash_msg_2 .long crash_msg_3 .long crash_msg_4 .long crash_msg_5 .long crash_msg_6 .long crash_msg_7 .long crash_msg_8 .long crash_msg_9 .long crash_msg_10 .long crash_msg_11 .long crash_msg_12 .long crash_msg_13 .long crash_msg_14 .long crash_msg_15 .long crash_msg_gbr .long crash_msg_sr .long crash_msg_pc .long crash_msg_mach .long crash_msg_macl .long crash_msg_pr .long crash_msg_vec .long crash_msg_expevt .long crash_msg_intevt .long 0 .align 2 regdump: mov r15,r5 SETS.L #intstacktop,r15 mov.l r4,@-r15 mov.l r3,@-r15 mov.l r2,@-r15 sts.l pr,@-r15 sts.l macl,@-r15 sts.l mach,@-r15 stc.l spc,@-r15 stc.l ssr,@-r15 stc.l gbr,@-r15 mov.l r5,@-r15 mov.l r14,@-r15 mov.l r13,@-r15 mov.l r12,@-r15 mov.l r11,@-r15 mov.l r10,@-r15 mov.l r9,@-r15 mov.l r8,@-r15 stc.l r7_bank,@-r15 stc.l r6_bank,@-r15 stc.l r5_bank,@-r15 stc.l r4_bank,@-r15 stc.l r3_bank,@-r15 stc.l r2_bank,@-r15 stc.l r1_bank,@-r15 stc.l r0_bank,@-r15 SETS.L #SCIF_BASE,r14 SETS.L #crash_msgs,r9 SETS.L #putstr,r8 SETS.L #printhex8,r7 SETS.L #putchar,r6 1: mov.l @r9+,r1 tst r1,r1 bt 1f jsr @r8 nop SETS.L #crash_msg_equal,r1 jsr @r8 nop jsr @r7 mov.l @r15+,r1 bra 1b nop 1: SETS.L #crash_msg_done,r1 jsr @r8 nop jsr @r6 mov #0,r1 SETS.L #0xa0000000,r0 ; hard-reset vector jmp @r0 nop SETCONST .align 4 .space 0x1000 intstacktop = .