/* * There is a problem here. The sigset_t API defines no way to tell * what the maximum signal/bit number supported by a sigset_t is. At * least one implementation doesn't check bit number arguments, just * accessing outside of the sigset_t if the number is out of range, so * we can't do something like sigfillset and then counting the set * bits. * * We blindly assume that all implementations support at least 128 * bits, and that, for the os2em conversion, we can ignore bits above * 128. * * Goddammit. 5.2/amd64's implementation of nested functions is just * straight-up broken. Sometimes they don't even work; other times, * they mostly-work but produce grossly broken debugging info. So we * don't use them unless we have to (eg, throw-out functions). It's a * pity, because they really do make for a significantly more * expressive language. * * SPARC's floating point is IEEE 754. This represents numbers as * * (sign)(exponent)(mantissa) * * where the sign is one bit, 0 for +ve and 1 for -ve; exponent is 8 * bits for single and 11 for double, and mantissa does not include * the hidden bit and is the remainder of the bits: 23 for single and * 52 for double. SPARC v8 also includes quad, a 128-bit format for * which the exponent is 15 bits and the mantissa is 112, but what * we're emulating doesn't include them. The exponent is stored with * a bias added to it: 127 for single and 1023 for double. That is, * for example, 1.0 is represented as 0x3f800000 for single or * 0x3ff0000000000000 for double. * * Special cases in floating point: * * Biased exponent 0, mantissa 0: zero, +ve or -ve per sign * * Biased exponent ~0, mantissa 0: infinity, +ve or -ve per sign * * Biased exponent 0, mantissa !=0: denormalized * There are also NaNs, but one reference says IEEE 754 does not * specify what bit patterns are or are not NaNs(!). I find that hard * to believe. In any case, the SPARC document specifies that NaNs * are values with biased exponent ~0 and mantissa !=0, with the high * bit of the mantissa field set for a quiet NaN and clear for * a signaling NaN. * * A normalized number with biased exponent E and mantissa field M * represents 2^(E-B)*1.M, where B is the exponent bias for the format * in use. But if the biased exponent is 0, the number is * denormalized (zero can be thought of as a denormal), representing * instead 2^(1-B)*0.M - that is, the hidden bit is 0 instead of 1 and * the effective exponent is what it would be if the biased exponent * were actually 1. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * As of 1.4T, which is what we emulate, filesystem info comes from * statfs(), for which we need to include . As of 4.0.1 * and 5.2, we need to call statvfs(), from . Neither * has compatability with the other. Hence this dance. :-þ * * Similar remarks apply to getfsstat()/getvfsstat(). */ #include #if __NetBSD_Version__ >= 400000003 // 4.0.1 and later #define STATFS_VIA_STATVFS #define GETFSSTAT_VIA_GETVFSSTAT #elif __NetBSD_Version__ <= 104200000 // 1.4T and earlier #undef STATFS_VIA_STATVFS #undef GETFSSTAT_VIA_GETVFSSTAT #else #error "Figure out whether STATFS_VIS_STATVFS and/or GETFSSTAT_VIA_GETVFSSTAT should be defined" #endif #ifdef STATFS_VIA_STATVFS #include #else #include #endif #ifdef GETFSSTAT_VIA_GETVFSSTAT #include #else #include #endif #include "user.h" #include "types.h" #include "tracemgr.h" #include "realthing.h" #include "stdio-util.h" /* * The revs I use that don't have WNOREAP have WNOWAIT, with the same * functionality. It's not documented, but it's there, apparently for * compatability with other OSes but usable natively. */ #if defined(WNOWAIT) && !defined(WNOREAP) #define WNOREAP WNOWAIT #endif /* * I see no clean way to implement __getcwd as a wrapper around getcwd, * and __getcwd has no declaration visible outside libc. So, this. * * Ugh. */ extern int __getcwd(char *, size_t); #define NWINDOWS 8 extern const char *__progname; #include "sysent.h" #include "em-const.h" // PAGE_SIZE must be a power of two #define PAGE_SIZE 4096 #define USRSTACK 0xf0000000 #define MAXSSIZE (1U<<24) #define MAXDSIZE 0x04000000 #define MAXFDS 4096 #define STACKGAPLEN 400 // Second arg must be a power of two #define ROUND_UP(a,b) (((a)+(b)-1) & ~((b)-1)) #define ROUND_DOWN(a,b) ((a) & ~((b)-1)) #define REP4(x) x, x, x, x #define REP8(x) REP4(x), REP4(x) #define REP16(x) REP8(x), REP8(x) #define REP32(x) REP16(x), REP16(x) #define REP64(x) REP32(x), REP32(x) #define REP128(x) REP64(x), REP64(x) #define REP256(x) REP128(x), REP128(x) #define REP2b(x) REP4(x) #define REP3b(x) REP8(x) #define REP4b(x) REP16(x) #define REP5b(x) REP32(x) #define REP6b(x) REP64(x) #define REP7b(x) REP128(x) #define REP8b(x) REP256(x) #define OPC(opc) (((opc)>>30)&3) /* format select bits */ #define OP2(opc) (((opc)>>22)&7) /* opcode bits, format 2 */ #define DREG(opc) (((opc)>>25)&31) /* dest reg bits, formats 2 and 3 */ #define A(opc) (((opc)>>29)&1) /* annul bit, format 2 */ #define COND(opc) (((opc)>>25)&15) /* condition code bits, format 2 */ #define IMM22(opc) ((opc)&0x003fffff) /* immediate data, format 2 */ #define DISP22(opc) signextend(IMM22(opc),22) /* displacement, format 2 */ #define OP3(opc) (((opc)>>19)&0x3f) /* opcode bits, format 3 */ #define SREG1(opc) (((opc)>>14)&31) /* source reg 1 bits, format 3 */ #define SREG2(opc) ((opc)&31) /* source reg 2 bits, format 3 */ #define I(opc) (((opc)>>13)&1) /* immediate bit, format 3 */ #define ASI(opc) (((opc)>>5)&0xff) /* alternative space bits, format 3 */ #define SIMM13(opc) signextend((opc)&0x1fff,13) /* immediate data, format 3 */ #define OPF(opc) (((opc)>>5)&0x1ff) /* FPU opcode bits, format 3 */ #define P_R 0x01 #define P_W 0x02 #define P_X 0x04 #define SYSCALL_IMPL(fn) void fn(SCARGS *args __attribute__((__unused__)), SCRV *rv __attribute__((__unused__))) #define SYSCALL_SETERR(e) do { rv->err = (e); } while (0) #define SYSCALL_ERR(e) do { SYSCALL_SETERR((e)); return; } while (0) #define SYSCALL_SETRET(v) do { rv->rv = (v); rv->flags |= SCRV_RVSET; } while (0) #define SYSCALL_RET(v) do { SYSCALL_SETRET((v)); return; } while (0) #define SYSCALL_RET2(v1,v2) do { rv->rv = (v1); rv->rv2 = (v2); rv->flags |= SCRV_RVSET | SCRV_RV2SET; return; } while (0) /* * The state of the initial-exec machinery: * * INITIAL means nothing has been done with it yet. * DELAY means it should be delayed to give the UI a crack at it. * WORKED means it worked. * FAILED means it failed. */ typedef enum { IES_INITIAL = 1, IES_DELAY, IES_WORKED, IES_FAILED, } INIT_EXEC_STATE; /* * Kinds of operation we may need to back out (or otherwise fixup) * during a vfork. See the comments on sc___vfork14 and struct * vforkbackout for more. */ typedef enum { VFB_OPEN, VFB_CLOSE, VFB_DUP2, VFB_TRCMGR, } VFBKIND; /* * The stages of a vfork operation. See the comment on sc___vfork14 * for more. */ typedef enum { VFORK_NONE = 1, VFORK_START, VFORK_FAIL, VFORK_SUCCESS, } VFORKSTAGE; /* * The possible kinds of floating-point number. * * FPK_NORMAL is most numbers. * FPK_ZERO is zero (+ve or -ve). * FPK_DENORM is denormals. * FPK_INFTY is infinity (+ve or -ve). * FPK_NaN is a NaN (quiet or signaling). */ typedef enum { FPK_NORMAL = 1, FPK_ZERO, FPK_DENORM, FPK_INFTY, FPK_NaN, } FPKIND; typedef struct memseg MEMSEG; typedef struct memsegops MEMSEGOPS; typedef struct state STATE; typedef struct fd FD; typedef struct memseg_priv_malloc MEMSEG_PRIV_MALLOC; typedef struct memseg_priv_mmap MEMSEG_PRIV_MMAP; typedef struct memseg_priv_arena MEMSEG_PRIV_ARENA; typedef struct malblock MALBLOCK; typedef struct trc TRC; typedef struct memacc MEMACC; typedef struct mfblk MFBLK; typedef struct scrv SCRV; typedef struct sig SIG; typedef struct nulterm_status NULTERM_STATUS; typedef struct vforkbackout VFORKBACKOUT; typedef struct emsigset EMSIGSET; typedef struct statestack STATESTACK; typedef struct elf_ctx ELF_CTX; typedef struct psect_ops PSECT_OPS; typedef struct memwatch MEMWATCH; typedef struct sym SYM; typedef struct stab STAB; typedef struct symlist SYMLIST; typedef struct regwin REGWIN; typedef struct bpt BPT; typedef struct bitval BITVAL; typedef struct fpnum FPNUM; typedef struct iov IOV; typedef struct io_priv_rw IO_PRIV_RW; typedef struct io_priv_rwv IO_PRIV_RWV; typedef struct int128 INT128; typedef struct vm VM; /* * A VM space. */ struct vm { MEMSEG *m; uint32_t dbrk; } ; #define INITVM() ((VM){.m=0,.dbrk=0}) /* * A 128-bit integer. We use these occasionally when doing floating * point operations. */ struct int128 { uint64_t h; uint64_t l; } ; /* * Represents one segment of data for an emulated I/O operation. * Inspired, in both function and name, by struct iovec, as used by, * eg, readv/writev. */ struct iov { uint32_t base; uint32_t len; } ; /* * I/O private for sc_{,p}{read,write}(). */ struct io_priv_rw { FD *fd; IOV iov; uint64_t off; } ; /* * I/O private for sc_{,p}{read,write}v(). */ struct io_priv_rwv { FD *fd; uint64_t off; uint32_t niov; uint32_t (*iov)[2]; } ; /* * A floating-point number, broken apart into its pieces. * * This is used for both single and double floats; the difference is * the sizes of the parts when (re)assembled. * * raw is the raw bitpattern (in the low 32 bits, for single); it is * defined only sometimes (in general, for FPNUMs obtained by cracking * binary representations). sign is 0 for +ve or 1 for -ve. bexp is * the biased exponent, ie, the value in the IEEE bitpattern. exp is * bexp with the bias subtracted off. mant is the mantissa; it has * had the hidden bit (0 for denormals and zeros, 1 for everything * else) restored, but is otherwise just the bitpattern from the IEEE * value. The mantissa is in the low 24 (single) or 53 (double) bits * of mant. kind is the general class of the number; see FPKIND, * above. * * Note that an FPNUM does not inherently know whether it's holding a * single or a double (though it's implicit in the difference between * exp and bexp). Code using these is expected to know a priori what * precision number it's dealing with. */ struct fpnum { uint64_t raw; int sign; int bexp; int exp; uint64_t mant; FPKIND kind; } ; // EXPBIAS_x is the bias value // MAXBEXP_x is the bexp value for infinities and NaNs // MANTBITS_x is the number of mantissa bits, not counting hidden bit // SIGNAN_x() on the mant field of a NaN is true iff it's signaling. #define EXPBIAS_S 127 #define MAXBEXP_S 255 #define EXPBIAS_D 1023 #define MAXBEXP_D 2047 #define MANTBITS_S 23 #define MANTBITS_D 52 #define SIGNAN_D(m) (! ((m) & (1ULL << (MANTBITS_D-1)))) #define SIGNAN_S(m) (! ((m) & (1ULL << (MANTBITS_S-1)))) /* * A bit with a name. */ struct bitval { const char *name; uint32_t bit; } ; /* * A breakpoint. This doesn't really need to be a struct, but it makes * the code easier to extend to add additional stuff to breakpoints, * like ignore counts or actions to take when hit. */ struct bpt { uint32_t addr; } ; /* * A symbol from a symbol table (see STAB). */ struct sym { char *name; uint32_t val; } ; /* * A symbol table, a collection of SYMs. * * In order to support (relatively) fast lookups, we make this a * separate data structure, instead of just keeping SYMs in a linked * list. We use a sorted array, with searches using binary search. * (But see also SYMLIST.) */ struct stab { SYM *syms; int nsyms; char *strs; int strslen; uint32_t textbeg; uint32_t textend; } ; #define STAB_INIT_EMPTY { .syms = 0, .nsyms = 0, .strs = 0 } /* * A linked list of SYMs. This is used while reading symbols in; the * resulting list is processed into a STAB for routine lookups. */ struct symlist { SYMLIST *link; SYM *sym; } ; /* * A memory watchpoint. */ struct memwatch { MEMWATCH *link; uint32_t base; uint32_t len; uint32_t end; unsigned int flags; #define MWF_TRIPPED 0x00000001 } ; /* * Common data for "read an ELF file" operations. We want to do one of * these while doing another (for PT_INTERP dynamic-linker loading * when execing dynamically-linked executables), necessitating * something at least a bit like this. */ struct elf_ctx { // Path of file being read const char *path; // (Underlying) OS file descriptor open onto path. int fd; // The Elf32_Ehdr of the file, as read off disk. Elf32_Ehdr eh; // The Elf32_Phdrs of the file, as read off disk, and their count. Elf32_Phdr *ph; int phn; // The Elf32_Shdrs of the file, as read off disk, and their count. Elf32_Shdr *sh; int shn; // PT_INTERP content, a zero-length string if none (yet). char interp[em_MAXPATHLEN+1]; // The value of e_entry, internalized. uint32_t entry; // VA of the beginning of the text segment. uint32_t taddr; // VA of the beginning of the data segment. uint32_t daddr; // Offset to relocate all loaded sections by. uint32_t loadbase; // End of data section. uint32_t dend; // Value for AT_PHDR Aux32Info struct. uint32_t dli_pha; // Value for AT_PHENT Aux32Info struct. uint32_t dli_phes; // Value for AT_PHNUM Aux32Info struct. uint32_t dli_phn; // Value for AT_BASE Aux32Info struct. uint32_t dli_interp; // Value for AT_ENTRY Aux32Info struct. uint32_t dli_entry; // PT_PHDR value, sometimes copied to dli_pha. uint32_t phdr; } ; /* * There are three program headers we care about. When scanning * different files' headers, we want different operations. This * collects them together. */ struct psect_ops { void (*pt_load)(ELF_CTX *, Elf32_Phdr *, void (*)(void)); void (*pt_interp)(ELF_CTX *, Elf32_Phdr *, void (*)(void)); void (*pt_phdr)(ELF_CTX *, Elf32_Phdr *, void (*)(void)); } ; #define PSECT_OPS_INIT(name) {\ &psect_pt_load_##name, \ &psect_pt_interp_##name, \ &psect_pt_phdr_##name, \ } /* * An emulated-OS sigset_t. Indexed by emulated signal number - 1. */ struct emsigset { uint32_t bits[4]; } ; /* * When reading NUL-terminated strings out of emulated-machine memory, * we need to take cleanup actions before returning. This * encapsualtes the state for one such string. That is, there is * normally one of these for each such string. */ struct nulterm_status { char *tofree; } ; /* * One signal's worth of pseudo-kernel signal handling state. This is * basically a mirror of emulated struct sigaction. */ struct sig { uint32_t handler; EMSIGSET mask; uint32_t flags; } ; /* * State needed to return from a syscall. A syscall can return zero, * one, or two values, or it can return an error. If err is nonzero, * it's returning an error; otherwise, rv and/or rv2 indicate the * return values, if any (SCRV_RVSET/SCRV_RV2SET indicate which are * meaningful). SCRV_G2R and SCRV_G7R support the SYSCALL_G2RFLAG and * SYSCALL_G7RFLAG bits userland can OR into the syscall number (they * indicate that, in the no-error case, the syscall is to return to * %g2 or %g7 rather than to %pc). SCRV_BYPASS indicates that the * syscall return code should bypass all the state fiddling that * normally happens and just return to the emulator main loop. */ struct scrv { uint32_t err; unsigned int flags; #define SCRV_G2R 0x00000001 #define SCRV_G7R 0x00000002 #define SCRV_RVSET 0x00000004 #define SCRV_RV2SET 0x00000008 #define SCRV_BYPASS 0x00000010 uint32_t rv; uint32_t rv2; } ; #if 0 struct mfblk { char *buf; int alloc; int len; char **strptr; int *allocptr; } ; #endif /* * A block of accesses to memory. Memory accesses tend to occur in * contiguous blocks; collapsing them has proved pragmatically useful. * A MEMACC represents such a block of accesses. */ struct memacc { uint32_t a1; uint32_t a2; uint8_t *vp; int a; int n; char rw; } ; /* * There is one of these per type of tracing. flags is various flags: * * TRCF_NO_GENERIC_UI * This type of tracing should not use the generic UI * code; it is managed by special-case code. * * f is a FILE * for writing tracing of this type to, or nil if it's * turned off. inx is the index of this TRC in the trace[] array. * dest is a human-readable string describing where output goes, for * reporting. */ struct trc { const char * const name; unsigned int flags; #define TRCF_NO_GENERIC_UI 0x00000001 FILE *f; int inx; char *dest; char *pl; int pla; int pln; } ; /* * Private data for a MEMSEG allocated with malloc, suitable for such * things as non-shared anonymous mmap and grown data segments. */ struct memseg_priv_malloc { void *tofree; } ; /* * Private data for a MEMSEG arising from an emulated mmap() call. We * refcount these, since splits can lead to multiple MEMSEGs referring * to a single underlying mmap()ped area. */ struct memseg_priv_mmap { int refcnt; char *mapped; uint32_t size; uint32_t mapflags; } ; /* * Private data for a malloc-arena MEMSEG. There normally is only one * such MEMSEG, but there can be multiple if something punches a hole * in it. But there's only one arena. * * refcnt is the reference count. * mem is the underlying memory. * free is the list of free space. * live is the AVL tree of live blocks. * old is the DLL of freed but not yet reclaimed blocks. * * See the comment on MALBLOCK for more. */ struct memseg_priv_arena { #define ARENA_SIZE (1U<<28) #define ARENA_STACK_GAP (1U<<24) MEMSEG *seg; MALBLOCK *free; MALBLOCK *live; MALBLOCK *old; } ; /* * A block in the emulated malloc arena. * * Free blocks are kept in a doubly linked list. Live blocks are kept * in an AVL tree. Freed but not yet reused blocks are kept in a * doubly linked list; when we run out of unused space, the oldest * half of this list is reclaimed, with adjacent blocks merged to the * extent possible, to become the free list. * * In a DLL, l is the backward link and r is the forward link. In an * AVL tree, l/r/u are the left/right/up pointers and bal is the * balance value, with negative indicating the left subtree is deeper. * kind indicates which of these a given block is. * * For free blocks, base/size/end describe the block; rz1 and rz2 are * meaningless. For live blocks, base/size/end describe the emulated * program's view of the block; the redzone before the block is * [rz1..base) and the redzone after it is [end..rz2). For old * blocks, rz1/base/size/end/rz2 retain the values they had when it * was live. */ struct malblock { char kind; #define MBK_FREE 1 #define MBK_LIVE 2 #define MBK_OLD 3 signed char bal; MALBLOCK *l; MALBLOCK *r; MALBLOCK *u; uint32_t rz1; uint32_t base; uint32_t size; uint32_t end; uint32_t rz2; } ; #define REDZONE 64 // in bytes #define ALLOC_GRAIN 8 /* * An emulated file descriptor. */ struct fd { int fd; unsigned int prot; // P_R and/or P_W unsigned int flags; #define FDF_CLEX 0x00000001 } ; /* * A record of something done in a vfork child that we may need to back * out, or otherwise fix up, to compensate for some "kernel" state * being kept in userland and thus not auto-backed-out during vfork. */ struct vforkbackout { VFORKBACKOUT *link; VFBKIND kind; uint32_t emfd; FD fd; int level; } ; /* * This is the method vector for a MEMSEG (qv). * * done is used to destroy whatever is backing the MEMSEG. It is * responsible for freeing any underlying resources, including the * private data pointer when applicable. * * curtail shrinks a MEMSEG at the end (highest addresses), given the * number of bytes to shrink it by. That is, this lowers end and * size, leaving base unchanged. * * behead shrinks a MEMSEG at the beginning (lowest addresses), given * the number of bytes to shrink it by. That is, this raises base and * lowers size, leaving end unchanged. * * split punches a hole in the middle of a MEMSEG. The second arg is * the number of bytes to retain at the low end; the third, at the * high end. This assumes the old MEMSEG is modified and a new MEMSEG * created; the new MEMSEG, which must not be linked into vm, is * returned. * * postexec handles any cleanup after an exec(). Most MEMSEGs go away * upon exec(), though some can be set up to stick around. If this * returns nonzero, the MEMSEG is preserved upon exec(); if zero, it * is destroyed. * * merge is responsible for merging sufficiently similar MEMSEGs when * they abut. When (*merge)(a,b) is called, a and b will (a) have the * same MEMSEGOPS, (b) have identical protection bits, and (c) a->end * will equal b->base. The merge function should either (i) do * nothing and return zero or (ii) destroy b (including updating link * fields and freeing it), merging it into the enlarged a, and return * nonzero. * * check checks an attempted memory access. It is passed the MEMSEG * pointer, the address of the first byte accessed relative to the * MEMSEG's base, the number of bytes, and the kind of access * contemplated (P_R, P_W, or P_X). It can assume that the access * lies entirely within the MEMSEG and that the MEMSEG's protection * has already been checked. * * desc prints a text description of the MEMSEG. It is passed the * MEMSEG pointer and a FILE * to print the text to. It should not * include a newline in its output. */ struct memsegops { const char *name; void (*done)(MEMSEG *); void (*curtail)(MEMSEG *, uint32_t); void (*behead)(MEMSEG *, uint32_t); MEMSEG *(*split)(MEMSEG *, uint32_t, uint32_t); int (*postexec)(MEMSEG *); int (*merge)(MEMSEG *, MEMSEG *); void (*check)(MEMSEG *, uint32_t, uint32_t, unsigned int); void (*desc)(MEMSEG *, FILE *); } ; #define MEMSEGOPS_INIT(name) {\ #name, \ &memseg_done_##name, \ &memseg_curtail_##name, \ &memseg_behead_##name, \ &memseg_split_##name, \ &memseg_postexec_##name, \ &memseg_merge_##name, \ &memseg_check_##name, \ &memseg_desc_##name, \ } /* * A MEMSEG is a piece of emulated virtual space. An important * invariant is end = base + size. Another is that MEMSEGs do not * overlap - at most one MEMSEG maps any particular virtual address. * A third is any given MEMSEG always maps an integral number of * PAGE_SIZE pages; base, size, and end must all be multiples of * PAGE_SIZE. * * MEMSEGs are implemented in an OO style; this struct contains common * data and an ops vector, with any type-specific private data being * behind the priv pointer. */ struct memseg { MEMSEG *link; uint32_t base; uint32_t size; uint32_t end; unsigned char prot; // zero or more of P_[RWX] uint8_t *data; void *priv; MEMSEGOPS *ops; } ; /* * One window's worth of windowed registers. This is the unit of stack * spill and fill. */ struct regwin { uint32_t l[8]; uint32_t i[8]; } ; /* * Emulated machine state. This includes (by intent, at least) * everything that needs to be kept separate between parent and child * in a vfork(), but it gets used most heavily for the sort of thing * that would normally be thought of as hardware state, mostly meaning * machine registers. * * There is no particular reason we have to match the hardware's CWP * and WIM semantics. Our CWP works like the hardware's, but we don't * actually keep a WIM. This is because, the way we manage it, the * conceptual WIM always has exactly one bit set, so we can save * bother by just recording the bit number of that bit. * * For reference, here is what the hardware doc says. * * The CWP is incremented by a RESTORE (or RETT) instruction and * decremented by a SAVE instruction or a trap. ... Each window * shares its ins and outs with the two adjacent windows. The * outs of the CWP+1 window are addressable as the ins of the * current window, and the outs in the current window are the ins * of the CWP-1 window. ... CWP arithmetic is performed modulo * NWINDOWS ... If [a] SAVE, RESTORE, or RETT instruction would * cause the CWP to point to an "invalid" register set, that is, * one whose corresponding WIM bit equals 1 [], a window_overflow * or window_underflow trap is caused. * * Four registers have architecture-fixed functionality. %g0 is fixed * at zero. CALL writes its own address into %o7. Traps write PC and * nPC into %l1 and %l2 of the trap window. * * Traps, including window overflow and underflow traps, do the effect * of a SAVE but without checking for window overflow. * * If a window overflow or underflow trap is taken, CWP is not changed * by the trapping instruction, but is changed by the usual trap * handling. The ADD effect of save/restore also does not happen on * window underflow/overflow. But, after the spill/fill happens, the * save/restore will normally be re-executed, and its effects will * happen then. * * Of course, the stuff about traps doesn't matter to us, because we * are strictly a userland emulator. Anything that traps gets * emulated. * * We do maintain a "we use the FPU" bit. But we don't maintain the * "FPU disabled" bit the hardware does; instead, we initialize the * emulated FPU registers along with all the others in clean_regs() * and just set SF_FPU when code touches them. If you like, you can * think of our "FP disabled" implementation as being partly in * clean_regs() and partly in the SF_FPU settings. */ struct state { // The condition code bits. unsigned int cc; #define CC_N 8 #define CC_Z 4 #define CC_V 2 #define CC_C 1 // The FPU condition code bits. unsigned int fcc; #define FCC_UN 3 // unordered #define FCC_GT 2 // > #define FCC_LT 1 // < #define FCC_EQ 0 // = // Flags. // SF_ANNUL means "next instruction is annulled". // SF_FPU means "touched FPU" (eg, must save/restore state) // SF_SIGRESTART means "pending syscall restart on EINTR" // SF_EMU_MAGIC means "emulator-detect magic is enabled" unsigned int flags; #define SF_ANNUL 0x00000001 #define SF_FPU 0x00000002 #define SF_SIGRESTART 0x00000004 #define SF_EMU_MAGIC 0x00000008 // pc, npc, and y emulate the hardware registers of the same names. uint32_t pc; uint32_t npc; uint32_t y; // xa is the address of the instruction currently being executed. uint32_t xa; // The general-purpose hardware registers. uint32_t regs[32]; REGWIN rw[NWINDOWS]; unsigned int cwp; unsigned int iwp; #define R_G0 0 #define R_G1 1 #define R_G2 2 #define R_G3 3 #define R_G4 4 #define R_G5 5 #define R_G6 6 #define R_G7 7 #define R_O0 8 #define R_O1 9 #define R_O2 10 #define R_O3 11 #define R_O4 12 #define R_O5 13 #define R_O6 14 #define R_O7 15 #define R_L0 16 #define R_L1 17 #define R_L2 18 #define R_L3 19 #define R_L4 20 #define R_L5 21 #define R_L6 22 #define R_L7 23 #define R_I0 24 #define R_I1 25 #define R_I2 26 #define R_I3 27 #define R_I4 28 #define R_I5 29 #define R_I6 30 #define R_I7 31 #define R_SP R_O6 #define R_FP R_I6 // The FPU registers. uint32_t fregs[32]; // Count of instructions executed. unsigned long long int instrs; // Signal mask (in the sigprocmask sense). // Indexed by emulated signal number. uint64_t sigmask; // Signal handling settings. // Indexed by emulated signal number. SIG sigh[em__NSIG]; // True iff cannot interact with the user. int noninteractive; // sigpend[i] true iff signal i is awaiting delivery. // Indexed by emulated signal number. volatile sig_atomic_t sigpend[em__NSIG]; // ignsigs is a mask of signals which are set ignored and thus _we_ // ignore. Indexed by emulated signal number. uint64_t ignsigs; // True iff we're currently executing on the signal stack. // (We don't currently implement signal stacks.) int onsigstack; // Is delivery on the signal stack enabled? int sigstack_enabled; // The signal stack base and size. uint32_t sigstack_base; uint32_t sigstack_size; // Last path successfully exec()ed. For debugging. char *lastexec; } ; /* * Saved machine states are kept in a stack during vfork()s. The stack * rarely gets very deep - there's only one saved state unless a * vforked child itself vforks - but the generality is cheap. */ struct statestack { STATESTACK *link; STATE state; } ; // Command-line args. static const char *exe = 0; static char **cl_args = 0; static int cl_nargs = 0; static char **cl_envp = 0; static int cl_nenvp = 0; // Live machine staet. static STATE s; /* * Breakpoint state variables. * * bpts points to the array of breakpoints, which are kept sorted by * address (to speed checking whether a pc value is in the list). * * abpts is the number of BPTs bpts points to (the number malloc()ed). * * nbpts is the number of BPTs in bpts which are valid (always in the * range [0..abpts]). * * bpt_suppress is a count of emulator cycles during which we should * ignore all breakpoints. This gets reset each time we enter the UI. * * We do breakpoints transparently. Real debuggers typically replace * instructions with trap instructions; we could do that, but if some * code reads the instruction stream it could notice that. Instead, * we check the pc value against the breakpoint list every emulator * cycle. (If the time penalty of checking the whole list each cycle * gets too large, we may need to use a smarter data structure to * store breakpoints.) */ static BPT *bpts; static int abpts; static int nbpts; static int bpt_suppress; // The VM environment. static VM vm; // File descriptors. static FD **fds; static int nfds; // The PID of this process. static int mypid; // Call this to throw out on error. static void (*err_jmp)(void) = 0; // Types of tracing. static TRC trace[] = { { "instr" }, #define TRC_INSTR 0 { "chg" }, #define TRC_CHG 1 { "mem" }, #define TRC_MEM 2 { "syscall" }, #define TRC_SYSCALL 3 { "stack" }, #define TRC_STACK 4 { "vfork" }, #define TRC_VFORK 5 { "signal" }, #define TRC_SIGNAL 6 { "exec" }, #define TRC_EXEC 7 { "vm" }, #define TRC_VM 8 { "window" }, #define TRC_WINDOW 9 { "err" }, #define TRC_ERR 10 { "fp" }, #define TRC_FP 11 { "proc" }, #define TRC_PROC 12 { "arena" }, #define TRC_ARENA 13 { "io", TRCF_NO_GENERIC_UI }, #define TRC_IO 14 { "magic" }, #define TRC_MAGIC 15 { 0 } }; #define TRC__N 16 // Size of I/O data to be dumped. static int io_trace_size; // Elastic array holding memory accesses. static MEMACC *memacc; static int amemacc; static int nmemacc; /* * If this is set, memory accesses aren't recorded even when they * normally would be (ie, when TRC_MEM tracing is on). This is used * when, for example, printing syscall arguments in the syscall entry * code. */ int nomemacc; // Active memory watchpoints. static MEMWATCH *memwatches; /* * Address of the signal-handling trampoline. Conceptually, this * should be part of STATE, but it's always at the same place, so * there's no point. */ static uint32_t sigtramp; // All signals we have values for. Indexed by emulated signal number. #define SIG_ALLMASK ((uint64_t)(\ (1ULL << em_SIGHUP) | \ (1ULL << em_SIGINT) | \ (1ULL << em_SIGQUIT) | \ (1ULL << em_SIGILL) | \ (1ULL << em_SIGTRAP) | \ (1ULL << em_SIGABRT) | \ (1ULL << em_SIGEMT) | \ (1ULL << em_SIGFPE) | \ (1ULL << em_SIGKILL) | \ (1ULL << em_SIGBUS) | \ (1ULL << em_SIGSEGV) | \ (1ULL << em_SIGSYS) | \ (1ULL << em_SIGPIPE) | \ (1ULL << em_SIGALRM) | \ (1ULL << em_SIGTERM) | \ (1ULL << em_SIGURG) | \ (1ULL << em_SIGSTOP) | \ (1ULL << em_SIGTSTP) | \ (1ULL << em_SIGCONT) | \ (1ULL << em_SIGCHLD) | \ (1ULL << em_SIGTTIN) | \ (1ULL << em_SIGTTOU) | \ (1ULL << em_SIGIO) | \ (1ULL << em_SIGXCPU) | \ (1ULL << em_SIGXFSZ) | \ (1ULL << em_SIGVTALRM) | \ (1ULL << em_SIGPROF) | \ (1ULL << em_SIGWINCH) | \ (1ULL << em_SIGINFO) | \ (1ULL << em_SIGUSR1) | \ (1ULL << em_SIGUSR2) | \ (1ULL << em_SIGPWR) )) // All signals userland can block. Indexed by emulated signal number. #define SIG_CANBLOCK (SIG_ALLMASK & ~(uint64_t)((1ULL << em_SIGKILL) | (1ULL << em_SIGSTOP))) /* * Default signal actions, that is, the actions taken when the handler * is set to SIG_DFL. The SIGDEF_* values must all be nonzero, so * that holes in sigdef[] can be detected by noticing zero values. * There are four possible default actions: kill the process (eg, * SIGTERM), kill the process with a coredump (eg, SIGABRT), ignore * the signal (sg, SIGCONT), and stop the process (eg, SIGTTIN). * * Because _we_ can't catch SIGKILL and SIGSTOP, their entries here * never matter; they're here for completeness more than correctness. * * Indexed by emulated signal number. */ #define SIGDEF_HOLE 0 // used to detect holes #define SIGDEF_KILL 1 #define SIGDEF_CORE 2 #define SIGDEF_IGNORE 3 #define SIGDEF_STOP 4 static const unsigned int sigdef[] = { [em_SIGHUP] = SIGDEF_KILL, [em_SIGINT] = SIGDEF_KILL, [em_SIGQUIT] = SIGDEF_CORE, [em_SIGILL] = SIGDEF_CORE, [em_SIGTRAP] = SIGDEF_CORE, [em_SIGABRT] = SIGDEF_CORE, [em_SIGEMT] = SIGDEF_CORE, [em_SIGFPE] = SIGDEF_CORE, [em_SIGKILL] = SIGDEF_KILL, // never matters [em_SIGBUS] = SIGDEF_CORE, [em_SIGSEGV] = SIGDEF_CORE, [em_SIGSYS] = SIGDEF_CORE, [em_SIGPIPE] = SIGDEF_KILL, [em_SIGALRM] = SIGDEF_KILL, [em_SIGTERM] = SIGDEF_KILL, [em_SIGURG] = SIGDEF_IGNORE, [em_SIGSTOP] = SIGDEF_STOP, // never matters [em_SIGTSTP] = SIGDEF_STOP, [em_SIGCONT] = SIGDEF_IGNORE, [em_SIGCHLD] = SIGDEF_IGNORE, [em_SIGTTIN] = SIGDEF_STOP, [em_SIGTTOU] = SIGDEF_STOP, [em_SIGIO] = SIGDEF_IGNORE, [em_SIGXCPU] = SIGDEF_KILL, [em_SIGXFSZ] = SIGDEF_KILL, [em_SIGVTALRM] = SIGDEF_KILL, [em_SIGPROF] = SIGDEF_KILL, [em_SIGWINCH] = SIGDEF_IGNORE, [em_SIGINFO] = SIGDEF_IGNORE, [em_SIGUSR1] = SIGDEF_KILL, [em_SIGUSR2] = SIGDEF_KILL, [em_SIGPWR] = SIGDEF_IGNORE }; /* * Normally zero. Immediately after a restartable syscall which * returns EINTR, set true; signal delivery notices this and arranges * to restart the syscall. */ static int syscall_restartable; /* * When set nonzero, anysigpend indicates there is probably a signal * pending delivery. Basically, it means "it's worth checking". */ static volatile sig_atomic_t anysigpend; /* * Indicates run() should check for rare events. There are various * things that happen rarely and asynchronously but that require run() * to handle them. Rather than have a bunch of variables that run() * checks each time around its loop, we have this, which reduces the * overhead on most loops to one check. */ static volatile sig_atomic_t alert_run; /* * A bunch of 0x00 octets. This is useful mostly for padding syscall * output buffers; for internal things, we can just bzero(), but for * userland, it's easier to copyout() from here than to write a * bzeroout(). */ static const char nulbuf[PAGE_SIZE] = { 0 }; // Indicates run should do just-post-execve() actions, like TRC_STACK. static int postexec; /* * When set, we do a debugger-assist loop after forking. With vfork, * this happens when exec breaks the vfork link (we do it right after * vforkbreak()). */ static int forkwait; /* * When set, panic() does a debugger-assist infinite loop. */ static int panicloop; /* * State of the initial-exec machinery. See the comment on * INIT_EXEC_STATE, above. */ static INIT_EXEC_STATE initial_exec_state = IES_INITIAL; /* * vfork() requires some careful attention to control flow, to ensure * we don't return from the stack frame in which _we_ vfork until the * vfork sharing-and-wait assocation is broken. We also have to be * careful to deal with the effects of our underlying OS's memory * sharing on vfork - there are a number of things that a real OS * keeps separate between parent and child, but which we keep in our * VM and thus need to fix up after vfork(). Most of these have been * moved into STATE and thus are dealt with the same way (emulated) * machine registers are, by restoring pre-vfork state once the parent * resumes, but some require special attention. * * vfork_stage simply says where we are in the control flow, which * bounces around a bit more than we might wish (the underlying * vfork() has to happen in run(), but that's something like three * call frames above the point where we discover the emulated machine * is vforking). It is VFORK_NONE during ordinary execution. When * the emulator vforks, the syscall implementation sets it to * VFORK_START. run() then vforks in the emulator and sets * vfork_stage to VFORK_FAIL (if the vfork failed) or VFORK_SUCCESS * (if it worked) and arranges for sc___vfork14 to be re-entered. * This last is not strictly necessary; run() could set up the * emulator state directly - but doing it this way lets us leverage * the existing syscall-return code, rather than having to duplicate * it in run() or factor it out. * * vfork_states is used to restore machine state (and some emulated-OS * state) in the parent, as sketched above. It's a stack, not just a * single saved state, to deal with the case where a vforked child * vforks again while it's still borrowing the parent's resources. * * vfork_dropvm is used to deal with execve(). Normally, execve just * drops the old VM, replacing it with the new. But it's not that * simple for execve in a vforked child, because that would leave us, * in the parent, with the child's VM. So when the implementation of * emulated execve breaks the underlying-OS vfork association, it * makes sure the parent's VM is in the global vm variable. After * vforkbreak()ing, it then drops the parent VM and switches to the * new one - but the parent is then stuck with having both VM spaces * set up. The execve() code sticks the child's VM in vfork_dropvm * before vforkbreak()ing; when the parent resumes post-vfork, it * discards any VM it finds in vfork_dropvm. * * vfork_value is used to communicate the errno (if vfork_stage is * VFORK_FAIL) or return PID (if vfork_stage is VFORK_SUCCESS) to * sc___vfork14 (see the above discussion of vfork_stage). * * during_vfork is zero during normal operation; it is nonzero if the * emulator is currently emulating a vforked child. To handle vfork * from a vfork child, it is a counter, not just a boolean, * incremented when vfork() succeeds and decremented when a vfork * child execs. * * vfb and vfbtm are lists of things done by a vforked child that need * fixups in the parent. For example, if a vforked child closes a * file descriptor, in a real OS the file descriptor in the parent is * unaffected. For us, the underlying descriptor is unaffected, but * we have state in VM as well (see fds, above), so we need to fix * things up a bit. vfb holds most of these records. But, in order * to get tracing back in order as soon as possible, we have to do the * TRCMGR entries first. Rather than scan the list twice, doing the * TRCMGR entries the first time and the rest the second, we keep two * lists, vfb for most entries and vfbtm for TRCMGR entries. */ static VFORKSTAGE vfork_stage; static STATESTACK *vfork_states; static VM vfork_dropvm; static uint32_t vfork_value; static int during_vfork; static VFORKBACKOUT *vfb; static VFORKBACKOUT *vfbtm; /* * vm_changed is set to indicate that the VM mapping has changed and, * if TRC_VM, should be reported. */ static int vm_changed; /* * elf_stab holds the symbol table entries from the ELF file currently * being executed, so that, for example, a disasebled call instruction * can report the symbol, if any, corresponding to the target. It * doesn't work when executing out of a .so, but it's enough to help. */ static STAB elf_stab = STAB_INIT_EMPTY; /* * Byteswap values from ELF files. These are needed because loading * values from ELF files is done with read() (or moral equivalent), * not mem_get_*(), so byte-sex differences between the emulated SPARC * and the emulator CPU, if any, become visible. We are perhaps * fortunate that the machine we're emulating uses network byte order, * letting us (ab)use the ntoh*() macros; if we were emulating a * little-endian machine, we'd need to write some kind of letoh*() * operations. */ #define ELF_HALF_TO_NATIVE(x) ntohs((x)) #define ELF_WORD_TO_NATIVE(x) ntohl((x)) #define ELF_ADDR_TO_NATIVE(x) ntohl((x)) #define ELF_OFFSET_TO_NATIVE(x) ntohl((x)) /* * Names of machine registers. The extra four at the end are mostly * for the benefit of print_regs(). For the main 32 registers, this * is indexed by the R_* values defined in the STATE struct, above; * the FPU registers are indexed starting at PRINT_REGS_Fbase. The * extra values (y, etc) have PRINT_REGS_* definitions for their * indices. The other PRINT_REGS* values are for the convenience of * print_regs() and related code. */ static const char * const regnames[] = { "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7", "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7", "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7", "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7", "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", "y", "pc", "npc", "cc" }; #define PRINT_REGS_Fbase 32 #define PRINT_REGS_Y 64 #define PRINT_REGS_PC 65 #define PRINT_REGS_NPC 66 #define PRINT_REGS_CC 67 #define PRINT_REGS__N (sizeof(regnames)/sizeof(regnames[0])) // Printed forms of integer condition code conditions. static const char * const icc[] = { "n", "e", "le", "l", "leu", "lu/cs", "neg", "vs", "a", "ne", "g", "ge", "gu", "geu/cc", "pos", "vc" }; // Printed forms of FPU condition code conditions. static const char * const fcc[] = { "n", "ne", "lg", "ul", "l", "ug", "g", "u", "a", "e", "ue", "ge", "uge", "le", "ule", "o" }; // Printed forms of coprocessor condition code conditions. static const char * const ccc[] = { "n", "123", "12", "13", "1", "23", "2", "3", "a", "0", "03", "02", "023", "01", "013", "012" }; /* * The signal trampoline code. This gets copied out to each executed * process's stack; signal delivery sets %pc to point to the beginning * of it. The comments are from 1.4T's sys/arch/sparc/sparc/locore.s, * where this comes from. */ static const uint32_t sigcode[] = { 0x9de3bf18, // save %sp, -CCFSZ-136, %sp 0xa4100002, // mov %g2, %l2 0xa6100003, // mov %g3, %l3 0xa8100004, // mov %g4, %l4 0xaa100005, // mov %g5, %l5 0xac100006, // mov %g6, %l6 0xae100007, // mov %g7, %l7 0xe007a064, // ld [%fp+64+16+SC_PSR_OFFSET], %l0 0x23000004, // sethi %hi(PSR_EF), %l1 0xa08c0011, // andcc %l0, %l1, %l0 0x02800013, // be 1f 0xa3400000, // rd %y, %l1 0xc12ba060, // st %fsr, [%sp+CCFSZ+0] 0xc13ba068, // std %f0, [%sp+CCFSZ+8] 0xc53ba070, // std %f2, [%sp+CCFSZ+16] 0xc93ba078, // std %f4, [%sp+CCFSZ+24] 0xcd3ba080, // std %f6, [%sp+CCFSZ+32] 0xd13ba088, // std %f8, [%sp+CCFSZ+40] 0xd53ba090, // std %f10, [%sp+CCFSZ+48] 0xd93ba098, // std %f12, [%sp+CCFSZ+56] 0xdd3ba0a0, // std %f14, [%sp+CCFSZ+64] 0xe13ba0a8, // std %f16, [%sp+CCFSZ+72] 0xe53ba0b0, // std %f18, [%sp+CCFSZ+80] 0xe93ba0b8, // std %f20, [%sp+CCFSZ+88] 0xed3ba0c0, // std %f22, [%sp+CCFSZ+96] 0xf13ba0c8, // std %f24, [%sp+CCFSZ+104] 0xf53ba0d0, // std %f26, [%sp+CCFSZ+112] 0xf93ba0d8, // std %f28, [%sp+CCFSZ+120] 0xfd3ba0e0, // std %f30, [%sp+CCFSZ+128] 0xd01fa040, // 1: ldd [%fp+64], %O0 0xd607a04c, // ld [%fp+76], %o3 0x9fc04000, // call %g1 0x9407a050, // add %fp, 64+16, %o2 0x80940000, // tst %l0 0x02800013, // be 1f 0x81844000, // wr %l1, %g0, %y 0xc10ba060, // ld [%sp+CCFSZ+0], %fsr 0xc11ba068, // ldd [%sp+CCFSZ+8], %f0 0xc51ba070, // ldd [%sp+CCFSZ+16], %f2 0xc91ba078, // ldd [%sp+CCFSZ+24], %f4 0xcd1ba080, // ldd [%sp+CCFSZ+32], %f6 0xd11ba088, // ldd [%sp+CCFSZ+40], %f8 0xd51ba090, // ldd [%sp+CCFSZ+48], %f10 0xd91ba098, // ldd [%sp+CCFSZ+56], %f12 0xdd1ba0a0, // ldd [%sp+CCFSZ+64], %f14 0xe11ba0a8, // ldd [%sp+CCFSZ+72], %f16 0xe51ba0b0, // ldd [%sp+CCFSZ+80], %f18 0xe91ba0b8, // ldd [%sp+CCFSZ+88], %f20 0xed1ba0c0, // ldd [%sp+CCFSZ+96], %f22 0xf11ba0c8, // ldd [%sp+CCFSZ+104], %f24 0xf51ba0d0, // ldd [%sp+CCFSZ+112], %f26 0xf91ba0d8, // ldd [%sp+CCFSZ+120], %f28 0xfd1ba0e0, // ldd [%sp+CCFSZ+128], %f30 0x84100012, // mov %l2, %g3 0x86100013, // mov %l3, %g3 0x88100014, // mov %l4, %g4 0x8a100015, // mov %l5, %g5 0x8c100016, // mov %l6, %g6 0x8e100017, // mov %l7, %g7 0x83e82127, // restore %g0, SYS___sigreturn14, %g1 0x9003a050, // add %sp, 64+16, %g0 0x91d02000, // t ST_SYSCALL 0x82102001, // mov SYS_EXIT, %g1 0x91d02000, // t ST_SYSCALL }; #define SZSIGCODE sizeof(sigcode) /* * conds[] is here to automate condition-code testing. It is indexed * by the condition value from a branch instruction; the resulting * value is, conceptually, an array of 16 bits indexed by the four-bit * number formed by concatenating the condition-code bits. The * resulting bit says whether the condition passes or not. * * The CMASK_* definitions here assume that the treatment of the 16-bit * value as an array of 16 bits maps the LSB to the [0] element, the * MSB to the [15] element. */ /* The conds[] initialization assumes these */ #if (CC_N != 8) || (CC_Z != 4) || (CC_V != 2) || (CC_C != 1) #error "conds[] assumptions invalid" #endif #define CMASK_FROM_CC(x) ((0xffff/((1<<(x))+1))^0xffff) #define CMASK_N CMASK_FROM_CC(CC_N) #define CMASK_Z CMASK_FROM_CC(CC_Z) #define CMASK_V CMASK_FROM_CC(CC_V) #define CMASK_C CMASK_FROM_CC(CC_C) static const uint16_t conds[16] = { 0, // never CMASK_Z, // eq CMASK_Z | (CMASK_N ^ CMASK_V), // le CMASK_N ^ CMASK_V, // lt CMASK_C | CMASK_Z, // leu CMASK_C, // ltu, cs CMASK_N, // neg CMASK_V, // vs 0xffff, // always 0xffff ^ CMASK_Z, // ne 0xffff ^ (CMASK_Z | (CMASK_N ^ CMASK_V)), // gt 0xffff ^ CMASK_N ^ CMASK_V, // ge 0xffff ^ (CMASK_C | CMASK_Z), // gtu 0xffff ^ CMASK_C, // geu, cc 0xffff ^ CMASK_N, // pos 0xffff ^ CMASK_V }; // vc #undef CMASK_N #undef CMASK_Z #undef CMASK_V #undef CMASK_C #undef CMASK_FROM_CC /* * fconds[] is just like conds[], except that it's for floating-point * conditional branches rather than integer conditional branches. * This means there are only two cc bits, not four, and thus each * entry needs only four bits, not sixteen. */ /* * The fconds[] initialization assumes these. Actually, strictly, it * can deal with FCC_* being any permutation of the numbers 0,1,2,3, * but that is difficult to express compactly in cpp. */ #if (FCC_EQ != 0) || (FCC_LT != 1) || (FCC_GT != 2) || (FCC_UN != 3) #error "fconds[] assumptions invalid" #endif #define FCM_EQ (1< tests we can pass plain char to. #define Cisspace(x) isspace((unsigned char)(x)) #define Cisdigit(x) isdigit((unsigned char)(x)) /* * This is called upon the emulator bugchecking. This actually getting * called indicates there is a bug somewhere. * * We fflush multiple times because we use stdio wrapper streams and we * have no reason to think fflush(0) will flush the inner streams * after flushing the outer streams. */ void (panic)(const char *fn, int lno, const char *fmt, ...) { va_list ap; fprintf(stderr,"%d: panic (\"%s\", line %d): ",mypid,fn,lno); va_start(ap,fmt); vfprintf(stderr,fmt,ap); va_end(ap); fprintf(stderr,"\n"); fflush(0); fflush(0); fflush(0); if (panicloop) { volatile int v; fprintf(stderr,"%s: panic PID %d\n",__progname,(int)getpid()); fflush(0); v = 1; while (v) poll(0,0,100); } signal(SIGSEGV,SIG_DFL); signal(SIGBUS,SIG_DFL); (void)*(volatile char *)0; abort(); exit(1); } static void gdbloop(void) { volatile int go; go = 0; while (! go) poll(0,0,100); } // Forward. See the comment on the definition, below. static void full_tracing(void) __attribute__((__used__)); /* * This is called to return to the top-level loop. The test is to * handle the case where it's called before the top-level loop is * entered; I'm not sure this can happen, but it's a cheap check. */ static void top(void) __attribute__((__noreturn__)); static void top(void) { if (! err_jmp) exit(1); (*err_jmp)(); panic("err_jmp returned"); } /* * Most tracing is generated with trc(TRC_*,...) calls. But sometimes * it's more convenient to write to a FILE * (as, for example, when * calling a print-something function that takes a FILE * for the * destination). This returns the FILE * output for the given tracing * kind should be sent to, or nil if that tracing is turned off. * * The returned FILE * should never be closed by the caller. */ static FILE *trc_f(int which) { if ((which < 0) || (which >= TRC__N)) abort(); return(trace[which].f); } /* * Usually, tracing should just call trc(). But, sometimes, tracing * does something complicated or expensive enough that it should be * skipped if tracing is off. This performs that test. */ static int trc_if(int which) { if ((which < 0) || (which >= TRC__N)) abort(); return(!!trace[which].f); } /* * Generate trace output. Conceptually, this is semantically * equivalent to calling trc_f() and, if the returned value is * non-nil, fprintf()ing to it - but this is more convenient in msot * cases. */ static void trc(int, const char *, ...) __attribute__((__format__(__printf__,2,3))); static void trc(int which, const char *fmt, ...) { FILE *f; va_list ap; f = trc_f(which); if (f == 0) return; va_start(ap,fmt); vfprintf(f,fmt,ap); va_end(ap); } /* * Record a memory access. This is not called unless TRC_MEM tracing * is turned on. This handles collapsing successive adjacent accesses * into a single MEMACC - though it does so only when the later access * is after the earlier; some memory accesses are done upwards instead * of downwards specifically so that they will collapse nicely here. * (Arguably we should arrange to handle accesses immediately below, * as well as immediately above, existing MEMACCs, though the * realloc() interface makes that a bit annoying.) */ static void mem_rw(char rw, uint32_t a, uint8_t v) { MEMACC *m; if (nomemacc) return; if (nmemacc >= amemacc) { int i; i = amemacc; memacc = realloc(memacc,(amemacc=nmemacc+16)*sizeof(*memacc)); for (;ivp = malloc((m->a=8)*sizeof(uint8_t)); } } if ( (nmemacc > 0) && (a == (m=&memacc[nmemacc-1])->a2) && (rw == m->rw) ) { if (m->n >= m->a) m->vp = realloc(m->vp,(m->a=m->n+8)*sizeof(uint8_t)); m->vp[m->n++] = v; m->a2 ++; return; } m = &memacc[nmemacc++]; m->a1 = a; m->a2 = a + 1; m->n = 1; m->vp[0] = v; m->rw = rw; } /* * Find the MEMSEG that maps a given address. This also centralizes * alignment checks. * * If op is nil, this returns false for unmapped addresses; if not, it * complains and throws out (and thus cannot return nil). * * XXX Arguably we should do better than linear search, even with the * optimization that accesses will tend to stay nearby and thus moving * the accessed MEMSEG to the head of the list will cut down on * searches. Maybe an array indexed by address/PAGE_SIZE? But, so * far, there are typically few enough MEMSEGs I'm not sure it's worth * the bother. */ static MEMSEG *memseg_find(uint32_t addr, uint32_t align, const char *op) { MEMSEG *ms; MEMSEG **msp; if (addr & align) { printf("%d: %s %08lx: not aligned\n",mypid,op,(ULI)addr); trc(TRC_ERR,"%s %08lx: not aligned\n",op,(ULI)addr); top(); } msp = &vm.m; while ((ms = *msp)) { if ((addr >= ms->base) && (addr < ms->end)) { *msp = ms->link; ms->link = vm.m; vm.m = ms; return(ms); } else { msp = &ms->link; } } if (! op) return(0); printf("%d: %s %08lx: not mapped\n",mypid,op,(ULI)addr); trc(TRC_ERR,"%s %08lx: not mapped\n",op,(ULI)addr); top(); } /* * Return a uint8_t pointer to the memory at emulated virtual address * addr. align is the alignment mask (0 for no alignment, 1 for * 2-byte, 3 for 4-byte, 7 for 8-byte, etc). op is a text name for * the operation, for error messages. prot is the type of access * contemplated. * * This is like memseg_find except that it does protection checks and * the return value is a pointer to the memory rather than the * relevant MEMSEG pointer. */ static uint8_t *mem_find(uint32_t addr, uint32_t align, const char *op, unsigned int prot) { MEMSEG *ms; ms = memseg_find(addr,align,op); if (! (ms->prot & prot)) { printf("%d: %s %08lx: not accessible\n",mypid,op,(ULI)addr); trc(TRC_ERR,"%s %08lx: not accessible\n",op,(ULI)addr); top(); } (*ms->ops->check)(ms,addr-ms->base,align+1,prot); return(ms->data+(addr-ms->base)); } /* * Set and get memory values. Addresses must be aligned correctly for * the data type in question, and these handle the way the emulated * machine is big-endian regardless of the emulating CPU's endianness. */ // Get an 8-byte data value. uint64_t mem_get_8(uint32_t addr) { uint8_t *p; p = mem_find(addr,7,"get_8",P_R); if (trc_if(TRC_MEM)) { mem_rw('r',addr,p[0]); mem_rw('r',addr+1,p[1]); mem_rw('r',addr+2,p[2]); mem_rw('r',addr+3,p[3]); mem_rw('r',addr+4,p[4]); mem_rw('r',addr+5,p[5]); mem_rw('r',addr+6,p[6]); mem_rw('r',addr+7,p[7]); } return( (p[0] * 0x0100000000000000ULL) | (p[1] * 0x0001000000000000ULL) | (p[2] * 0x0000010000000000ULL) | (p[3] * 0x0000000100000000ULL) | (p[4] * 0x0000000001000000ULL) | (p[5] * 0x0000000000010000ULL) | (p[6] * 0x0000000000000100ULL) | p[7] ); } // Get a 4-byte data value. uint32_t mem_get_4(uint32_t addr) { uint8_t *p; p = mem_find(addr,3,"get_4",P_R); if (trc_if(TRC_MEM)) { mem_rw('r',addr,p[0]); mem_rw('r',addr+1,p[1]); mem_rw('r',addr+2,p[2]); mem_rw('r',addr+3,p[3]); } return((p[0]*0x01000000)|(p[1]*0x00010000)|(p[2]*0x00000100)|p[3]); } // Get a 4-byte code value. // Just like mem_get_4 except that it's P_X instead of P_R. uint32_t mem_exe_4(uint32_t addr) { uint8_t *p; p = mem_find(addr,3,"exe_4",P_X); if (trc_if(TRC_MEM)) { mem_rw('x',addr,p[0]); mem_rw('x',addr+1,p[1]); mem_rw('x',addr+2,p[2]); mem_rw('x',addr+3,p[3]); } return((p[0]*0x01000000)|(p[1]*0x00010000)|(p[2]*0x00000100)|p[3]); } // Set a 4-byte value. void mem_set_4(uint32_t addr, uint32_t v) { uint8_t *p; p = mem_find(addr,3,"set_4",P_W); p[0] = v >> 24; p[1] = v >> 16; p[2] = v >> 8; p[3] = v; if (trc_if(TRC_MEM)) { mem_rw('w',addr,p[0]); mem_rw('w',addr+1,p[1]); mem_rw('w',addr+2,p[2]); mem_rw('w',addr+3,p[3]); } } // Set an 8-byte value. void mem_set_8(uint32_t addr, uint64_t v) { uint8_t *p; p = mem_find(addr,7,"set_8",P_W); p[0] = v >> 56; p[1] = v >> 48; p[2] = v >> 40; p[3] = v >> 32; p[4] = v >> 24; p[5] = v >> 16; p[6] = v >> 8; p[7] = v; if (trc_if(TRC_MEM)) { mem_rw('w',addr,p[0]); mem_rw('w',addr+1,p[1]); mem_rw('w',addr+2,p[2]); mem_rw('w',addr+3,p[3]); mem_rw('w',addr+4,p[4]); mem_rw('w',addr+5,p[5]); mem_rw('w',addr+6,p[6]); mem_rw('w',addr+7,p[7]); } } // Get a 2-byte data value. uint16_t mem_get_2(uint32_t addr) { uint8_t *p; p = mem_find(addr,1,"get_2",P_R); if (trc_if(TRC_MEM)) { mem_rw('r',addr,p[0]); mem_rw('r',addr+1,p[1]); } return((p[0]*0x0100)|p[1]); } // Set a 2-byte data value. void mem_set_2(uint32_t addr, uint16_t v) { uint8_t *p; p = mem_find(addr,1,"set_2",P_W); p[0] = v >> 8; p[1] = v; if (trc_if(TRC_MEM)) { mem_rw('w',addr,p[0]); mem_rw('w',addr+1,p[1]); } } // Get a 1-byte data value. uint8_t mem_get_1(uint32_t addr) { uint8_t *p; p = mem_find(addr,0,"get_1",P_R); if (trc_if(TRC_MEM)) mem_rw('r',addr,*p); return(*p); } // Set a 1-byte data value. void mem_set_1(uint32_t addr, uint8_t v) { uint8_t *p; p = mem_find(addr,0,"set_1",P_W); if (trc_if(TRC_MEM)) mem_rw('w',addr,v); *p = v; } /* * Convert an underlying OS errno to an emulated-OS errno. * * We do not assume the underlying OS supports all the errnos the * emulator does. */ static uint32_t os2em_errno(int err) { switch (errno) { #ifdef EPERM case EPERM: return(em_EPERM); break; #endif #ifdef ENOENT case ENOENT: return(em_ENOENT); break; #endif #ifdef ESRCH case ESRCH: return(em_ESRCH); break; #endif #ifdef EINTR case EINTR: return(em_EINTR); break; #endif #ifdef EIO case EIO: return(em_EIO); break; #endif #ifdef ENXIO case ENXIO: return(em_ENXIO); break; #endif #ifdef E2BIG case E2BIG: return(em_E2BIG); break; #endif #ifdef ENOEXEC case ENOEXEC: return(em_ENOEXEC); break; #endif #ifdef EBADF case EBADF: return(em_EBADF); break; #endif #ifdef ECHILD case ECHILD: return(em_ECHILD); break; #endif #ifdef EDEADLK case EDEADLK: return(em_EDEADLK); break; #endif #ifdef ENOMEM case ENOMEM: return(em_ENOMEM); break; #endif #ifdef EACCES case EACCES: return(em_EACCES); break; #endif #ifdef EFAULT case EFAULT: return(em_EFAULT); break; #endif #ifdef ENOTBLK case ENOTBLK: return(em_ENOTBLK); break; #endif #ifdef EBUSY case EBUSY: return(em_EBUSY); break; #endif #ifdef EEXIST case EEXIST: return(em_EEXIST); break; #endif #ifdef EXDEV case EXDEV: return(em_EXDEV); break; #endif #ifdef ENODEV case ENODEV: return(em_ENODEV); break; #endif #ifdef ENOTDIR case ENOTDIR: return(em_ENOTDIR); break; #endif #ifdef EISDIR case EISDIR: return(em_EISDIR); break; #endif #ifdef EINVAL case EINVAL: return(em_EINVAL); break; #endif #ifdef ENFILE case ENFILE: return(em_ENFILE); break; #endif #ifdef EMFILE case EMFILE: return(em_EMFILE); break; #endif #ifdef ENOTTY case ENOTTY: return(em_ENOTTY); break; #endif #ifdef ETXTBSY case ETXTBSY: return(em_ETXTBSY); break; #endif #ifdef EFBIG case EFBIG: return(em_EFBIG); break; #endif #ifdef ENOSPC case ENOSPC: return(em_ENOSPC); break; #endif #ifdef ESPIPE case ESPIPE: return(em_ESPIPE); break; #endif #ifdef EROFS case EROFS: return(em_EROFS); break; #endif #ifdef EMLINK case EMLINK: return(em_EMLINK); break; #endif #ifdef EPIPE case EPIPE: return(em_EPIPE); break; #endif #ifdef EDOM case EDOM: return(em_EDOM); break; #endif #ifdef ERANGE case ERANGE: return(em_ERANGE); break; #endif #ifdef EAGAIN case EAGAIN: return(em_EAGAIN); break; #endif #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EAGAIN != EWOULDBLOCK)) case EWOUDBLOCK: return(em_EWOULDBLOCK); break; #endif #ifdef EINPROGRESS case EINPROGRESS: return(em_EINPROGRESS); break; #endif #ifdef EALREADY case EALREADY: return(em_EALREADY); break; #endif #ifdef ENOTSOCK case ENOTSOCK: return(em_ENOTSOCK); break; #endif #ifdef EDESTADDRREQ case EDESTADDRREQ: return(em_EDESTADDRREQ); break; #endif #ifdef EMSGSIZE case EMSGSIZE: return(em_EMSGSIZE); break; #endif #ifdef EPROTOTYPE case EPROTOTYPE: return(em_EPROTOTYPE); break; #endif #ifdef ENOPROTOOPT case ENOPROTOOPT: return(em_ENOPROTOOPT); break; #endif #ifdef EPROTONOSUPPORT case EPROTONOSUPPORT: return(em_EPROTONOSUPPORT); break; #endif #ifdef ESOCKTNOSUPPORT case ESOCKTNOSUPPORT: return(em_ESOCKTNOSUPPORT); break; #endif #ifdef EOPNOTSUPP case EOPNOTSUPP: return(em_EOPNOTSUPP); break; #endif #ifdef EPFNOSUPPORT case EPFNOSUPPORT: return(em_EPFNOSUPPORT); break; #endif #ifdef EAFNOSUPPORT case EAFNOSUPPORT: return(em_EAFNOSUPPORT); break; #endif #ifdef EADDRINUSE case EADDRINUSE: return(em_EADDRINUSE); break; #endif #ifdef EADDRNOTAVAIL case EADDRNOTAVAIL: return(em_EADDRNOTAVAIL); break; #endif #ifdef ENETDOWN case ENETDOWN: return(em_ENETDOWN); break; #endif #ifdef ENETUNREACH case ENETUNREACH: return(em_ENETUNREACH); break; #endif #ifdef ENETRESET case ENETRESET: return(em_ENETRESET); break; #endif #ifdef ECONNABORTED case ECONNABORTED: return(em_ECONNABORTED); break; #endif #ifdef ECONNRESET case ECONNRESET: return(em_ECONNRESET); break; #endif #ifdef ENOBUFS case ENOBUFS: return(em_ENOBUFS); break; #endif #ifdef EISCONN case EISCONN: return(em_EISCONN); break; #endif #ifdef ENOTCONN case ENOTCONN: return(em_ENOTCONN); break; #endif #ifdef ESHUTDOWN case ESHUTDOWN: return(em_ESHUTDOWN); break; #endif #ifdef ETOOMANYREFS case ETOOMANYREFS: return(em_ETOOMANYREFS); break; #endif #ifdef ETIMEDOUT case ETIMEDOUT: return(em_ETIMEDOUT); break; #endif #ifdef ECONNREFUSED case ECONNREFUSED: return(em_ECONNREFUSED); break; #endif #ifdef ELOOP case ELOOP: return(em_ELOOP); break; #endif #ifdef ENAMETOOLONG case ENAMETOOLONG: return(em_ENAMETOOLONG); break; #endif #ifdef EHOSTDOWN case EHOSTDOWN: return(em_EHOSTDOWN); break; #endif #ifdef EHOSTUNREACH case EHOSTUNREACH: return(em_EHOSTUNREACH); break; #endif #ifdef ENOTEMPTY case ENOTEMPTY: return(em_ENOTEMPTY); break; #endif #ifdef EPROCLIM case EPROCLIM: return(em_EPROCLIM); break; #endif #ifdef EUSERS case EUSERS: return(em_EUSERS); break; #endif #ifdef EDQUOT case EDQUOT: return(em_EDQUOT); break; #endif #ifdef ESTALE case ESTALE: return(em_ESTALE); break; #endif #ifdef EREMOTE case EREMOTE: return(em_EREMOTE); break; #endif #ifdef EBADRPC case EBADRPC: return(em_EBADRPC); break; #endif #ifdef ERPCMISMATCH case ERPCMISMATCH: return(em_ERPCMISMATCH); break; #endif #ifdef EPROGUNAVAIL case EPROGUNAVAIL: return(em_EPROGUNAVAIL); break; #endif #ifdef EPROGMISMATCH case EPROGMISMATCH: return(em_EPROGMISMATCH); break; #endif #ifdef EPROCUNAVAIL case EPROCUNAVAIL: return(em_EPROCUNAVAIL); break; #endif #ifdef ENOLCK case ENOLCK: return(em_ENOLCK); break; #endif #ifdef ENOSYS case ENOSYS: return(em_ENOSYS); break; #endif #ifdef EFTYPE case EFTYPE: return(em_EFTYPE); break; #endif #ifdef EAUTH case EAUTH: return(em_EAUTH); break; #endif #ifdef ENEEDAUTH case ENEEDAUTH: return(em_ENEEDAUTH); break; #endif #ifdef EIDRM case EIDRM: return(em_EIDRM); break; #endif #ifdef ENOMSG case ENOMSG: return(em_ENOMSG); break; #endif #ifdef EOVERFLOW case EOVERFLOW: return(em_EOVERFLOW); break; #endif #ifdef ENOTPLAIN case ENOTPLAIN: return(em_ENOTPLAIN); break; #endif } printf("Unmappable errno %d\n",err); top(); } /* * Convert an emulated-OS signal number to an underlying-OS signal * number. */ static int em2os_signal(uint32_t sig) { switch (sig) { case em_SIGHUP: return(SIGHUP); break; case em_SIGINT: return(SIGINT); break; case em_SIGQUIT: return(SIGQUIT); break; case em_SIGILL: return(SIGILL); break; case em_SIGTRAP: return(SIGTRAP); break; case em_SIGABRT: return(SIGABRT); break; case em_SIGEMT: return(SIGEMT); break; case em_SIGFPE: return(SIGFPE); break; case em_SIGKILL: return(SIGKILL); break; case em_SIGBUS: return(SIGBUS); break; case em_SIGSEGV: return(SIGSEGV); break; case em_SIGSYS: return(SIGSYS); break; case em_SIGPIPE: return(SIGPIPE); break; case em_SIGALRM: return(SIGALRM); break; case em_SIGTERM: return(SIGTERM); break; case em_SIGURG: return(SIGURG); break; case em_SIGSTOP: return(SIGSTOP); break; case em_SIGTSTP: return(SIGTSTP); break; case em_SIGCONT: return(SIGCONT); break; case em_SIGCHLD: return(SIGCHLD); break; case em_SIGTTIN: return(SIGTTIN); break; case em_SIGTTOU: return(SIGTTOU); break; case em_SIGIO: return(SIGIO); break; case em_SIGXCPU: return(SIGXCPU); break; case em_SIGXFSZ: return(SIGXFSZ); break; case em_SIGVTALRM: return(SIGVTALRM); break; case em_SIGPROF: return(SIGPROF); break; case em_SIGWINCH: return(SIGWINCH); break; case em_SIGINFO: return(SIGINFO); break; case em_SIGUSR1: return(SIGUSR1); break; case em_SIGUSR2: return(SIGUSR2); break; case em_SIGPWR: return(SIGPWR); break; } return(0); } /* * Convert an underlying-OS signal number to an emulated-OS signal * number. */ static int os2em_signal(uint32_t sig) { switch (sig) { case SIGHUP: return(em_SIGHUP); break; case SIGINT: return(em_SIGINT); break; case SIGQUIT: return(em_SIGQUIT); break; case SIGILL: return(em_SIGILL); break; case SIGTRAP: return(em_SIGTRAP); break; case SIGABRT: return(em_SIGABRT); break; case SIGEMT: return(em_SIGEMT); break; case SIGFPE: return(em_SIGFPE); break; case SIGKILL: return(em_SIGKILL); break; case SIGBUS: return(em_SIGBUS); break; case SIGSEGV: return(em_SIGSEGV); break; case SIGSYS: return(em_SIGSYS); break; case SIGPIPE: return(em_SIGPIPE); break; case SIGALRM: return(em_SIGALRM); break; case SIGTERM: return(em_SIGTERM); break; case SIGURG: return(em_SIGURG); break; case SIGSTOP: return(em_SIGSTOP); break; case SIGTSTP: return(em_SIGTSTP); break; case SIGCONT: return(em_SIGCONT); break; case SIGCHLD: return(em_SIGCHLD); break; case SIGTTIN: return(em_SIGTTIN); break; case SIGTTOU: return(em_SIGTTOU); break; case SIGIO: return(em_SIGIO); break; case SIGXCPU: return(em_SIGXCPU); break; case SIGXFSZ: return(em_SIGXFSZ); break; case SIGVTALRM: return(em_SIGVTALRM); break; case SIGPROF: return(em_SIGPROF); break; case SIGWINCH: return(em_SIGWINCH); break; case SIGINFO: return(em_SIGINFO); break; case SIGUSR1: return(em_SIGUSR1); break; case SIGUSR2: return(em_SIGUSR2); break; case SIGPWR: return(em_SIGPWR); break; } return(0); } /* * Convert an emulated-OS struct termios to an underlying-OS struct * termios. */ static void em2os_termios(uint32_t em, struct termios *os) { uint32_t v; int i; v = mem_get_4(em); os->c_iflag = ((v & em_IGNBRK) ? IGNBRK : 0) | ((v & em_BRKINT) ? BRKINT : 0) | ((v & em_IGNPAR) ? IGNPAR : 0) | ((v & em_PARMRK) ? PARMRK : 0) | ((v & em_INPCK) ? INPCK : 0) | ((v & em_ISTRIP) ? ISTRIP : 0) | ((v & em_INLCR) ? INLCR : 0) | ((v & em_IGNCR) ? IGNCR : 0) | ((v & em_ICRNL) ? ICRNL : 0) | ((v & em_IXON) ? IXON : 0) | ((v & em_IXOFF) ? IXOFF : 0) | ((v & em_IXANY) ? IXANY : 0) | ((v & em_IMAXBEL) ? IMAXBEL : 0); v = mem_get_4(em+4); os->c_oflag = ((v & em_OPOST) ? OPOST : 0) | ((v & em_ONLCR) ? ONLCR : 0) | ((v & em_OXTABS) ? OXTABS : 0) | ((v & em_ONOEOT) ? ONOEOT : 0) | ((v & em_OCRNL) ? OCRNL : 0) | ((v & em_ONOCR) ? ONOCR : 0) | ((v & em_ONLRET) ? ONLRET : 0); v = mem_get_4(em+8); os->c_cflag = ((v & em_CIGNORE) ? CIGNORE : 0) | (((v & em_CSIZE) == em_CS5) ? CS5 : 0) | (((v & em_CSIZE) == em_CS6) ? CS6 : 0) | (((v & em_CSIZE) == em_CS7) ? CS7 : 0) | (((v & em_CSIZE) == em_CS8) ? CS8 : 0) | ((v & em_CSTOPB) ? CSTOPB : 0) | ((v & em_CREAD) ? CREAD : 0) | ((v & em_PARENB) ? PARENB : 0) | ((v & em_PARODD) ? PARODD : 0) | ((v & em_HUPCL) ? HUPCL : 0) | ((v & em_CLOCAL) ? CLOCAL : 0) | ((v & em_CRTSCTS) ? CRTSCTS : 0) | ((v & em_CDTRCTS) ? CDTRCTS : 0) | ((v & em_MDMBUF) ? MDMBUF : 0); v = mem_get_4(em+12); os->c_lflag = ((v & em_ECHOKE) ? ECHOKE : 0) | ((v & em_ECHOE) ? ECHOE : 0) | ((v & em_ECHOK) ? ECHOK : 0) | ((v & em_ECHO) ? ECHO : 0) | ((v & em_ECHONL) ? ECHONL : 0) | ((v & em_ECHOPRT) ? ECHOPRT : 0) | ((v & em_ECHOCTL) ? ECHOCTL : 0) | ((v & em_ISIG) ? ISIG : 0) | ((v & em_ICANON) ? ICANON : 0) | ((v & em_ALTWERASE) ? ALTWERASE : 0) | ((v & em_IEXTEN) ? IEXTEN : 0) | ((v & em_EXTPROC) ? EXTPROC : 0) | ((v & em_TOSTOP) ? TOSTOP : 0) | ((v & em_FLUSHO) ? FLUSHO : 0) | ((v & em_NOKERNINFO) ? NOKERNINFO : 0) | ((v & em_PENDIN) ? PENDIN : 0) | ((v & em_NOFLSH) ? NOFLSH : 0); for (i=(sizeof(os->c_cc)/sizeof(os->c_cc[0]))-1;i>=0;i--) os->c_cc[i] = _POSIX_VDISABLE; os->c_cc[VEOF] = mem_get_1(em+16+em_VEOF); os->c_cc[VEOL] = mem_get_1(em+16+em_VEOL); os->c_cc[VEOL2] = mem_get_1(em+16+em_VEOL2); os->c_cc[VERASE] = mem_get_1(em+16+em_VERASE); os->c_cc[VWERASE] = mem_get_1(em+16+em_VWERASE); os->c_cc[VKILL] = mem_get_1(em+16+em_VKILL); os->c_cc[VREPRINT] = mem_get_1(em+16+em_VREPRINT); os->c_cc[VINTR] = mem_get_1(em+16+em_VINTR); os->c_cc[VQUIT] = mem_get_1(em+16+em_VQUIT); os->c_cc[VSUSP] = mem_get_1(em+16+em_VSUSP); os->c_cc[VDSUSP] = mem_get_1(em+16+em_VDSUSP); os->c_cc[VSTART] = mem_get_1(em+16+em_VSTART); os->c_cc[VSTOP] = mem_get_1(em+16+em_VSTOP); os->c_cc[VLNEXT] = mem_get_1(em+16+em_VLNEXT); os->c_cc[VDISCARD] = mem_get_1(em+16+em_VDISCARD); os->c_cc[VMIN] = mem_get_1(em+16+em_VMIN); os->c_cc[VTIME] = mem_get_1(em+16+em_VTIME); os->c_cc[VSTATUS] = mem_get_1(em+16+em_VSTATUS); os->c_ispeed = mem_get_4(em+36); os->c_ospeed = mem_get_4(em+40); } /* * Convert an underlying-OS struct termios to an emulated-OS struct * termios. */ static void os2em_termios(struct termios *os, uint32_t em) { mem_set_4(em, // c_iflag ((os->c_iflag & IGNBRK) ? em_IGNBRK : 0) | ((os->c_iflag & BRKINT) ? em_BRKINT : 0) | ((os->c_iflag & IGNPAR) ? em_IGNPAR : 0) | ((os->c_iflag & PARMRK) ? em_PARMRK : 0) | ((os->c_iflag & INPCK) ? em_INPCK : 0) | ((os->c_iflag & ISTRIP) ? em_ISTRIP : 0) | ((os->c_iflag & INLCR) ? em_INLCR : 0) | ((os->c_iflag & IGNCR) ? em_IGNCR : 0) | ((os->c_iflag & ICRNL) ? em_ICRNL : 0) | ((os->c_iflag & IXON) ? em_IXON : 0) | ((os->c_iflag & IXOFF) ? em_IXOFF : 0) | ((os->c_iflag & IXANY) ? em_IXANY : 0) | ((os->c_iflag & IMAXBEL) ? em_IMAXBEL : 0) ); mem_set_4(em+4, // c_oflag ((os->c_oflag & OPOST) ? em_OPOST : 0) | ((os->c_oflag & ONLCR) ? em_ONLCR : 0) | ((os->c_oflag & OXTABS) ? em_OXTABS : 0) | ((os->c_oflag & ONOEOT) ? em_ONOEOT : 0) | ((os->c_oflag & OCRNL) ? em_OCRNL : 0) | ((os->c_oflag & ONOCR) ? em_ONOCR : 0) | ((os->c_oflag & ONLRET) ? em_ONLRET : 0) ); mem_set_4(em+8, // c_cflag ((os->c_cflag & CIGNORE) ? em_CIGNORE : 0) | (((os->c_cflag & CSIZE) == CS5) ? em_CS5 : 0) | (((os->c_cflag & CSIZE) == CS6) ? em_CS6 : 0) | (((os->c_cflag & CSIZE) == CS7) ? em_CS7 : 0) | (((os->c_cflag & CSIZE) == CS8) ? em_CS8 : 0) | ((os->c_cflag & CSTOPB) ? em_CSTOPB : 0) | ((os->c_cflag & CREAD) ? em_CREAD : 0) | ((os->c_cflag & PARENB) ? em_PARENB : 0) | ((os->c_cflag & PARODD) ? em_PARODD : 0) | ((os->c_cflag & HUPCL) ? em_HUPCL : 0) | ((os->c_cflag & CLOCAL) ? em_CLOCAL : 0) | ((os->c_cflag & CRTSCTS) ? em_CRTSCTS : 0) | ((os->c_cflag & CDTRCTS) ? em_CDTRCTS : 0) | ((os->c_cflag & MDMBUF) ? em_MDMBUF : 0) ); mem_set_4(em+12, // c_lflag ((os->c_lflag & ECHOKE) ? em_ECHOKE : 0) | ((os->c_lflag & ECHOE) ? em_ECHOE : 0) | ((os->c_lflag & ECHOK) ? em_ECHOK : 0) | ((os->c_lflag & ECHO) ? em_ECHO : 0) | ((os->c_lflag & ECHONL) ? em_ECHONL : 0) | ((os->c_lflag & ECHOPRT) ? em_ECHOPRT : 0) | ((os->c_lflag & ECHOCTL) ? em_ECHOCTL : 0) | ((os->c_lflag & ISIG) ? em_ISIG : 0) | ((os->c_lflag & ICANON) ? em_ICANON : 0) | ((os->c_lflag & ALTWERASE) ? em_ALTWERASE : 0) | ((os->c_lflag & IEXTEN) ? em_IEXTEN : 0) | ((os->c_lflag & EXTPROC) ? em_EXTPROC : 0) | ((os->c_lflag & TOSTOP) ? em_TOSTOP : 0) | ((os->c_lflag & FLUSHO) ? em_FLUSHO : 0) | ((os->c_lflag & NOKERNINFO) ? em_NOKERNINFO : 0) | ((os->c_lflag & PENDIN) ? em_PENDIN : 0) | ((os->c_lflag & NOFLSH) ? em_NOFLSH : 0) ); mem_set_1(em+16+em_VEOF,os->c_cc[VEOF]); mem_set_1(em+16+em_VEOL,os->c_cc[VEOL]); mem_set_1(em+16+em_VEOL2,os->c_cc[VEOL2]); mem_set_1(em+16+em_VERASE,os->c_cc[VERASE]); mem_set_1(em+16+em_VWERASE,os->c_cc[VWERASE]); mem_set_1(em+16+em_VKILL,os->c_cc[VKILL]); mem_set_1(em+16+em_VREPRINT,os->c_cc[VREPRINT]); mem_set_1(em+16+em_VINTR,os->c_cc[VINTR]); mem_set_1(em+16+em_VQUIT,os->c_cc[VQUIT]); mem_set_1(em+16+em_VSUSP,os->c_cc[VSUSP]); mem_set_1(em+16+em_VDSUSP,os->c_cc[VDSUSP]); mem_set_1(em+16+em_VSTART,os->c_cc[VSTART]); mem_set_1(em+16+em_VSTOP,os->c_cc[VSTOP]); mem_set_1(em+16+em_VLNEXT,os->c_cc[VLNEXT]); mem_set_1(em+16+em_VDISCARD,os->c_cc[VDISCARD]); mem_set_1(em+16+em_VMIN,os->c_cc[VMIN]); mem_set_1(em+16+em_VTIME,os->c_cc[VTIME]); mem_set_1(em+16+em_VSTATUS,os->c_cc[VSTATUS]); mem_set_4(em+36,os->c_ispeed); mem_set_4(em+40,os->c_ospeed); } static int em2os_MSG_flags(uint32_t emf) { int os; os = 0; if (emf & em_MSG_OOB) os |= MSG_OOB; if (emf & em_MSG_PEEK) os |= MSG_PEEK; if (emf & em_MSG_DONTROUTE) os |= MSG_DONTROUTE; if (emf & em_MSG_EOR) os |= MSG_EOR; if (emf & em_MSG_TRUNC) os |= MSG_TRUNC; if (emf & em_MSG_CTRUNC) os |= MSG_CTRUNC; if (emf & em_MSG_WAITALL) os |= MSG_WAITALL; if (emf & em_MSG_DONTWAIT) os |= MSG_DONTWAIT; if (emf & em_MSG_BCAST) os |= MSG_BCAST; if (emf & em_MSG_MCAST) os |= MSG_MCAST; if (emf & em_MSG_NOSIGNAL) os |= MSG_NOSIGNAL; if (emf & ~(em_MSG_OOB | em_MSG_PEEK | em_MSG_DONTROUTE | em_MSG_EOR | em_MSG_TRUNC | em_MSG_CTRUNC | em_MSG_WAITALL | em_MSG_DONTWAIT | em_MSG_BCAST | em_MSG_MCAST | em_MSG_NOSIGNAL)) { printf("Unrecognized emulated MSG_* flag(s): %08lx\n",(ULI)emf); top(); } return(os); } /* * strerror(), except it takse an emulated-OS errno (and the returned * strings include the short names as well as the messages). */ static const char *em_strerror(uint32_t e) { switch (e) { case em_EPERM: return("EPERM, Operation not permitted"); break; case em_ENOENT: return("ENOENT, No such file or directory"); break; case em_ESRCH: return("ESRCH, No such process"); break; case em_EINTR: return("EINTR, Interrupted system call"); break; case em_EIO: return("EIO, Input/output error"); break; case em_ENXIO: return("ENXIO, Device not configured"); break; case em_E2BIG: return("E2BIG, Argument list too long"); break; case em_ENOEXEC: return("ENOEXEC, Exec format error"); break; case em_EBADF: return("EBADF, Bad file descriptor"); break; case em_ECHILD: return("ECHILD, No child processes"); break; case em_EDEADLK: return("EDEADLK, Resource deadlock avoided"); break; case em_ENOMEM: return("ENOMEM, Cannot allocate memory"); break; case em_EACCES: return("EACCES, Permission denied"); break; case em_EFAULT: return("EFAULT, Bad address"); break; case em_ENOTBLK: return("ENOTBLK, Block device required"); break; case em_EBUSY: return("EBUSY, Device busy"); break; case em_EEXIST: return("EEXIST, File exists"); break; case em_EXDEV: return("EXDEV, Cross-device link"); break; case em_ENODEV: return("ENODEV, Operation not supported by device"); break; case em_ENOTDIR: return("ENOTDIR, Not a directory"); break; case em_EISDIR: return("EISDIR, Is a directory"); break; case em_EINVAL: return("EINVAL, Invalid argument"); break; case em_ENFILE: return("ENFILE, Too many open files in system"); break; case em_EMFILE: return("EMFILE, Too many open files"); break; case em_ENOTTY: return("ENOTTY, Inappropriate ioctl for device"); break; case em_ETXTBSY: return("ETXTBSY, Text file busy"); break; case em_EFBIG: return("EFBIG, File too large"); break; case em_ENOSPC: return("ENOSPC, No space left on device"); break; case em_ESPIPE: return("ESPIPE, Illegal seek"); break; case em_EROFS: return("EROFS, Read-only file system"); break; case em_EMLINK: return("EMLINK, Too many links"); break; case em_EPIPE: return("EPIPE, Broken pipe"); break; case em_EDOM: return("EDOM, Numerical argument out of domain"); break; case em_ERANGE: return("ERANGE, Result too large"); break; case em_EAGAIN: return("EAGAIN/EWOULDBLOCK, Resource temporarily unavailable"); break; case em_EINPROGRESS: return("EINPROGRESS, Operation now in progress"); break; case em_EALREADY: return("EALREADY, Operation already in progress"); break; case em_ENOTSOCK: return("ENOTSOCK, Socket operation on non-socket"); break; case em_EDESTADDRREQ: return("EDESTADDRREQ, Destination address required"); break; case em_EMSGSIZE: return("EMSGSIZE, Message too long"); break; case em_EPROTOTYPE: return("EPROTOTYPE, Protocol wrong type for socket"); break; case em_ENOPROTOOPT: return("ENOPROTOOPT, Protocol not available"); break; case em_EPROTONOSUPPORT: return("EPROTONOSUPPORT, Protocol not supported"); break; case em_ESOCKTNOSUPPORT: return("ESOCKTNOSUPPORT, Socket type not supported"); break; case em_EOPNOTSUPP: return("EOPNOTSUPP, Operation not supported"); break; case em_EPFNOSUPPORT: return("EPFNOSUPPORT, Protocol family not supported"); break; case em_EAFNOSUPPORT: return("EAFNOSUPPORT, Address family not supported by protocol family"); break; case em_EADDRINUSE: return("EADDRINUSE, Address already in use"); break; case em_EADDRNOTAVAIL: return("EADDRNOTAVAIL, Can't assign requested address"); break; case em_ENETDOWN: return("ENETDOWN, Network is down"); break; case em_ENETUNREACH: return("ENETUNREACH, Network is unreachable"); break; case em_ENETRESET: return("ENETRESET, Network dropped connection on reset"); break; case em_ECONNABORTED: return("ECONNABORTED, Software caused connection abort"); break; case em_ECONNRESET: return("ECONNRESET, Connection reset by peer"); break; case em_ENOBUFS: return("ENOBUFS, No buffer space available"); break; case em_EISCONN: return("EISCONN, Socket is already connected"); break; case em_ENOTCONN: return("ENOTCONN, Socket is not connected"); break; case em_ESHUTDOWN: return("ESHUTDOWN, Can't send after socket shutdown"); break; case em_ETOOMANYREFS: return("ETOOMANYREFS, Too many references: can't splice"); break; case em_ETIMEDOUT: return("ETIMEDOUT, Connection timed out"); break; case em_ECONNREFUSED: return("ECONNREFUSED, Connection refused"); break; case em_ELOOP: return("ELOOP, Too many levels of symbolic links"); break; case em_ENAMETOOLONG: return("ENAMETOOLONG, File name too long"); break; case em_EHOSTDOWN: return("EHOSTDOWN, Host is down"); break; case em_EHOSTUNREACH: return("EHOSTUNREACH, No route to host"); break; case em_ENOTEMPTY: return("ENOTEMPTY, Directory not empty"); break; case em_EPROCLIM: return("EPROCLIM, Too many processes"); break; case em_EUSERS: return("EUSERS, Too many users"); break; case em_EDQUOT: return("EDQUOT, Disc quota exceeded"); break; case em_ESTALE: return("ESTALE, Stale NFS file handle"); break; case em_EREMOTE: return("EREMOTE, Too many levels of remote in path"); break; case em_EBADRPC: return("EBADRPC, RPC struct is bad"); break; case em_ERPCMISMATCH: return("ERPCMISMATCH, RPC version wrong"); break; case em_EPROGUNAVAIL: return("EPROGUNAVAIL, RPC prog. not avail"); break; case em_EPROGMISMATCH: return("EPROGMISMATCH, Program version wrong"); break; case em_EPROCUNAVAIL: return("EPROCUNAVAIL, Bad procedure for program"); break; case em_ENOLCK: return("ENOLCK, No locks available"); break; case em_ENOSYS: return("ENOSYS, Function not implemented"); break; case em_EFTYPE: return("EFTYPE, Inappropriate file type or format"); break; case em_EAUTH: return("EAUTH, Authentication error"); break; case em_ENEEDAUTH: return("ENEEDAUTH, Need authenticator"); break; case em_EIDRM: return("EIDRM, Identifier removed"); break; case em_ENOMSG: return("ENOMSG, No message of desired type"); break; case em_EOVERFLOW: return("EOVERFLOW, Value too large to be stored in data type"); break; case em_ENOTPLAIN: return("ENOTPLAIN, Not a plain file"); break; } return("unknown"); } /* * Given a VFBKIND, return a short string for human consumption * describing it. */ static const char *vfb_kind_str(VFBKIND k) { switch (k) { case VFB_OPEN: return("OPEN"); break; case VFB_CLOSE: return("CLOSE"); break; case VFB_DUP2: return("DUP2"); break; case VFB_TRCMGR: return("TRCMGR"); break; } return("unknown"); } /* * Given a VFORKSTAGE, return a short string for human consumption * describing it. */ static const char *vfork_stage_str(VFORKSTAGE s) { switch (s) { case VFORK_NONE: return("NONE"); break; case VFORK_START: return("START"); break; case VFORK_FAIL: return("FAIL"); break; case VFORK_SUCCESS: return("SUCCESS"); break; } return("unknown"); } /* * Given an emulated-OS signal number, return its short string name. */ static const char *em_signame(uint32_t s, const char *unk) { switch (s) { case em_SIGHUP: return("SIGHUP"); break; case em_SIGINT: return("SIGINT"); break; case em_SIGQUIT: return("SIGQUIT"); break; case em_SIGILL: return("SIGILL"); break; case em_SIGTRAP: return("SIGTRAP"); break; case em_SIGABRT: return("SIGABRT"); break; case em_SIGEMT: return("SIGEMT"); break; case em_SIGFPE: return("SIGFPE"); break; case em_SIGKILL: return("SIGKILL"); break; case em_SIGBUS: return("SIGBUS"); break; case em_SIGSEGV: return("SIGSEGV"); break; case em_SIGSYS: return("SIGSYS"); break; case em_SIGPIPE: return("SIGPIPE"); break; case em_SIGALRM: return("SIGALRM"); break; case em_SIGTERM: return("SIGTERM"); break; case em_SIGURG: return("SIGURG"); break; case em_SIGSTOP: return("SIGSTOP"); break; case em_SIGTSTP: return("SIGTSTP"); break; case em_SIGCONT: return("SIGCONT"); break; case em_SIGCHLD: return("SIGCHLD"); break; case em_SIGTTIN: return("SIGTTIN"); break; case em_SIGTTOU: return("SIGTTOU"); break; case em_SIGIO: return("SIGIO"); break; case em_SIGXCPU: return("SIGXCPU"); break; case em_SIGXFSZ: return("SIGXFSZ"); break; case em_SIGVTALRM: return("SIGVTALRM"); break; case em_SIGPROF: return("SIGPROF"); break; case em_SIGWINCH: return("SIGWINCH"); break; case em_SIGINFO: return("SIGINFO"); break; case em_SIGUSR1: return("SIGUSR1"); break; case em_SIGUSR2: return("SIGUSR2"); break; case em_SIGPWR: return("SIGPWR"); break; } return(unk); } /* * Handle the command line. */ static void handleargs(int ac, char **av) { int skip; int errs; skip = 0; errs = 0; for (ac--,av++;ac;ac--,av++) { if (skip > 0) { skip --; continue; } if (**av != '-') { exe = *av; break; } if (0) { needarg:; fprintf(stderr,"%s: %s needs a following argument\n",__progname,*av); errs = 1; continue; } #define WANTARG() do { if (++skip >= ac) goto needarg; } while (0) if (!strcmp(*av,"-forkwait") || !strcmp(*av,"-fork-wait")) { forkwait = 1; continue; } if (!strcmp(*av,"-panicloop") || !strcmp(*av,"-panic-loop")) { panicloop = 1; continue; } if (!strcmp(*av,"-debugwait") || !strcmp(*av,"-debug-wait")) { gdbloop(); continue; } if (!strcmp(*av,"-delayexec") || !strcmp(*av,"-delay-exec")) { initial_exec_state = IES_DELAY; continue; } if (!strcmp(*av,"-real")) { WANTARG(); realthing_setup(av[skip]); continue; } #undef WANTARG fprintf(stderr,"%s: unrecognized option `%s'\n",__progname,*av); errs = 1; } while (av[0]) { char *equal; equal = index(av[0],'='); if (! equal) break; cl_nenvp ++; cl_envp = realloc(cl_envp,cl_nenvp*sizeof(*cl_envp)); cl_envp[cl_nenvp-1] = av[0]; ac --; av ++; } exe = *av; if (! exe) { fprintf(stderr,"%s: need executable filename\n",__progname); errs = 1; } ac --; av ++; cl_nargs = ac; cl_args = av; if (errs) exit(1); } /* * Find the live block whose base address is given, or nil if no such * live block exists. */ static MALBLOCK *arena_find_live(MEMSEG_PRIV_ARENA *a, uint32_t addr) { MALBLOCK *b; b = a->live; while (1) { if (! b) return(0); if (addr == b->base) return(b); if (addr < b->base) b = b->l; else b = b->r; } } /* * Find the live block into which addr falls, or nil if no such live * block exists. Redzones are not part of blocks for the purposes of * this function. */ static MALBLOCK *arena_find_containing(MEMSEG_PRIV_ARENA *a, uint32_t addr) { MALBLOCK *b; b = a->live; while (1) { if (! b) return(0); if ((addr >= b->base) && (addr < b->end)) return(b); if (addr < b->base) b = b->l; else b = b->r; } } static void free_malblocks_avl(MALBLOCK *b) { if (! b) return; free_malblocks_avl(b->l); free_malblocks_avl(b->r); free(b); } static void free_malblocks_list(MALBLOCK *l) { MALBLOCK *b; while ((b = l)) { l = b->r; free(b); } } static MEMSEGOPS memseg_ops_malloc; // forward /* * Create and return a new malloc()-backed MEMSEG, given base, size, * and protection. This does not deal with maintaining the * no-overlapping invariant; the caller must have already dealt with * that. This does link the new MEMSEG into vm, though. * * Note that this does not promise anything about the contents of the * new memory. * * XXX Maybe collapse with any adjacent malloc MEMSEGs with identical * protection? */ static MEMSEG *memseg_new_malloc(uint32_t base, uint32_t size, unsigned char prot) { MEMSEG *n; MEMSEG_PRIV_MALLOC *p; n = malloc(sizeof(MEMSEG)); p = malloc(sizeof(MEMSEG_PRIV_MALLOC)); n->base = base; n->size = size; n->end = base + size; n->prot = prot; p->tofree = malloc(size); n->data = p->tofree; n->ops = &memseg_ops_malloc; n->priv = p; n->link = vm.m; vm.m = n; return(n); } /* * The done method for malloc memsegs. Free the underlying memory and * the private pointer. */ static void memseg_done_malloc(MEMSEG *ms) { free(((MEMSEG_PRIV_MALLOC *)ms->priv)->tofree); free(ms->priv); } /* * The curtail method for malloc MEMSEGs. Just adjust size and end; * don't worry about freeing partial memory blocks - the code * simplicity wins over the memory resource saving. */ static void memseg_curtail_malloc(MEMSEG *ms, uint32_t by) { if (by >= ms->size) panic("impossible malloc curtail"); ms->size -= by; ms->end -= by; } /* * The behead method for malloc MEMSEGs. Just adjust base, size, and * data; as for curtail, above, it's not worth trying to economize on * memory. */ static void memseg_behead_malloc(MEMSEG *ms, uint32_t by) { if (by >= ms->size) panic("impossible malloc behead"); ms->size -= by; ms->base += by; ms->data += by; } /* * The split method for malloc MEMSEGs. Just allocate a new memory * MEMSEG, copy, and adjust the old one. * * XXX Arguably should copy the smaller piece. * * XXX Arguably should refcount the backing memory and have both * MEMSEGs refer to it. */ static MEMSEG *memseg_split_malloc(MEMSEG *ms, uint32_t part1, uint32_t part2) { MEMSEG *n; if ((part1 > ms->size) || (part2 > ms->size) || (part1+part2 > ms->size)) panic("impossible malloc split"); n = memseg_new_malloc(ms->end-part2,part2,ms->prot); bcopy(ms->data+(ms->size-part2),n->data,part2); ms->size = part1; ms->end = ms->base + part1; return(n); } /* * The postexec method for malloc MEMSEGs. This is boring; malloc * MEMSEGs never survive exec()s. */ static int memseg_postexec_malloc(MEMSEG *ms __attribute__((__unused__))) { return(0); } /* * The merge method for malloc MEMSEGs. */ static int memseg_merge_malloc(MEMSEG *a, MEMSEG *b) { MEMSEG_PRIV_MALLOC *ap; char *newdata; if ( (a->ops != &memseg_ops_malloc) || (b->ops != &memseg_ops_malloc) || (a->end != b->base) || (a->prot != b->prot) ) panic("impossible %s",__func__); if ((a->size >= (1U<<20)) || (b->size >= (1U<<20))) return(0); ap = a->priv; newdata = malloc(a->size+b->size); bcopy(a->data,newdata,a->size); bcopy(b->data,newdata+a->size,b->size); free(ap->tofree); ap->tofree = newdata; a->data = newdata; a->size += b->size; a->end = a->base + a->size; a->link = b->link; memseg_done_malloc(b); return(1); } /* * The check method for malloc MEMSEGs. */ static void memseg_check_malloc(MEMSEG *ms, uint32_t addr, uint32_t len, unsigned int prot) { (void)ms; (void)addr; (void)len; (void)prot; } /* * The desc method for malloc MEMSEGs. */ static void memseg_desc_malloc(MEMSEG *ms, FILE *to) { (void)ms; fprintf(to,"malloc"); } /* * The MEMSEGOPS for malloc MEMSEGs. */ static MEMSEGOPS memseg_ops_malloc = MEMSEGOPS_INIT(malloc); static MEMSEGOPS memseg_ops_mmap; // forward /* * Create and return a new mmap()-backed MEMSEG, given the usual base, * size, and protection, and mmap-specific values for the mmap flags * and the void * returned by the underlying-OS mmap(). */ static MEMSEG *memseg_new_mmap(uint32_t base, uint32_t size, unsigned char prot, uint32_t mapflags, void *mapped) { MEMSEG *n; MEMSEG_PRIV_MMAP *p; n = malloc(sizeof(MEMSEG)); p = malloc(sizeof(MEMSEG_PRIV_MMAP)); n->base = base; n->size = size; n->end = base + size; n->prot = prot; p->refcnt = 1; p->mapped = mapped; p->size = size; p->mapflags = mapflags; n->data = mapped; n->ops = &memseg_ops_mmap; n->priv = p; n->link = vm.m; vm.m = n; return(n); } /* * The done method for mmap()-backed MEMSEGs. Drop a reference, and, * if it was the last reference, munmap() the underlying memory and * free the private data struct. */ static void memseg_done_mmap(MEMSEG *ms) { MEMSEG_PRIV_MMAP *p; p = ms->priv; p->refcnt --; if (p->refcnt < 1) { munmap(p->mapped,p->size); free(p); } } /* * The curtail method for mmap()-backed MEMSEGs. Just leave the * underlying mmap()ped memory alone and adjust the descriptive * values. */ static void memseg_curtail_mmap(MEMSEG *ms, uint32_t by) { if (by >= ms->size) panic("impossible mmap curtail"); ms->size -= by; ms->end -= by; } /* * The behead method for mmap()-backed MEMSEGs. Just leave the * underlying mmap()ped memory alone and adjust the descriptive * values. */ static void memseg_behead_mmap(MEMSEG *ms, uint32_t by) { if (by >= ms->size) panic("impossible mmap behead"); ms->size -= by; ms->base += by; ms->data += by; } /* * The split method for mmap()-backed MEMSEGs. This is why * MEMSEG_PRIV_MMAPs have refcounts: so that we can, here, generate * two MEMSEGs backed by the same underlying mmap(). */ static MEMSEG *memseg_split_mmap(MEMSEG *ms, uint32_t part1, uint32_t part2) { MEMSEG *n; MEMSEG_PRIV_MMAP *p; if ((part1 > ms->size) || (part2 > ms->size) || (part1+part2 > ms->size)) panic("impossible mmap split"); p = ms->priv; n = malloc(sizeof(MEMSEG)); n->base = ms->base + ms->size - part2; n->size = part2; n->end = ms->end; n->prot = ms->prot; n->data = ms->data + (ms->size - part2); n->ops = ms->ops; p->refcnt ++; n->priv = p; ms->size = part1; ms->end = ms->base + part1; n->link = vm.m; vm.m = n; return(n); } /* * The postexec method for mmap()-backed MEMSEGs. Generally, these go * away on exec() just as malloc MEMSEGs do, but if mapped with * MAP_INHERIT they stay around. */ static int memseg_postexec_mmap(MEMSEG *ms) { return((((MEMSEG_PRIV_MMAP *)ms->priv)->mapflags & em_MAP_INHERIT) ? 1 : 0); } /* * The merge method for mmap MEMSEGs. */ static int memseg_merge_mmap(MEMSEG *a __attribute__((__unused__)), MEMSEG *b __attribute__((__unused__))) { return(0); } /* * The check method for mmap MEMSEGs. */ static void memseg_check_mmap(MEMSEG *ms, uint32_t addr, uint32_t len, unsigned int prot) { (void)ms; (void)addr; (void)len; (void)prot; } /* * The desc method for mmap MEMSEGs. */ static void memseg_desc_mmap(MEMSEG *ms, FILE *to) { MEMSEG_PRIV_MMAP *p; uint32_t f; uint32_t m; static const struct { uint32_t mask; uint32_t value; const char *text; } bits[] #define BIT(name) { em_MAP_##name, em_MAP_##name, #name } = { BIT(SHARED), BIT(PRIVATE), BIT(COPY), BIT(FIXED), BIT(RENAME), BIT(NORESERVE), BIT(INHERIT), BIT(NOEXTEND), BIT(HASSEMAPHORE), { em_MAP_FILE|em_MAP_ANON, em_MAP_FILE, "FILE" }, { em_MAP_FILE|em_MAP_ANON, em_MAP_ANON, "ANON" } }; int i; const char *pref; p = ms->priv; fprintf(to,"mmap: %p (%d), %lu@%p, ",(void *)p,p->refcnt,(ULI)p->size,(void *)p->mapped); f = p->mapflags; m = 0; pref = ""; for (i=0;i<(sizeof(bits)/sizeof(bits[0]));i++) { if (bits[i].mask & m) continue; if ((f & bits[i].mask) == bits[i].value) { m |= bits[i].mask; f &= ~bits[i].mask; fprintf(to,"%s%s",pref,bits[i].text); pref = "|"; } } if (f) fprintf(to,"|%#lx",(ULI)f); #undef BIT } /* * The MEMSEGOPS for mmap MEMSEGs. */ static MEMSEGOPS memseg_ops_mmap = MEMSEGOPS_INIT(mmap); static MEMSEGOPS memseg_ops_arena; // forward /* * Create and return the malloc-arena MEMSEG. This does not set up any * MALBLOCKs; that must be handled elsewhere. */ static MEMSEG *memseg_new_arena(void) { MEMSEG *n; MEMSEG_PRIV_ARENA *p; void *mmrv; mmrv = mmap(0,ARENA_SIZE,PROT_READ|PROT_WRITE,MAP_ANON|MAP_PRIVATE,-1,0); if (mmrv == MAP_FAILED) { printf("can't create malloc arena: mmap: %s\n",strerror(errno)); top(); } n = malloc(sizeof(MEMSEG)); p = malloc(sizeof(MEMSEG_PRIV_ARENA)); n->base = USRSTACK - MAXSSIZE - ARENA_STACK_GAP - ARENA_SIZE; n->size = ARENA_SIZE; n->end = USRSTACK - MAXSSIZE; n->prot = P_R | P_W | P_X; p->seg = n; p->free = 0; p->live = 0; p->old = 0; n->data = mmrv; n->ops = &memseg_ops_arena; n->priv = p; n->link = vm.m; vm.m = n; return(n); } /* * The done method for the malloc-arena MEMSEG. */ static void memseg_done_arena(MEMSEG *ms) { MEMSEG_PRIV_ARENA *a; a = ms->priv; free_malblocks_avl(a->live); free_malblocks_list(a->free); free_malblocks_list(a->old); munmap(ms->data,ARENA_SIZE); free(a); } /* * The curtail method for the malloc-arena MEMSEG. */ static void memseg_curtail_arena(MEMSEG *ms, uint32_t by) { (void)ms; (void)by; panic("curtailing malloc-arena memseg"); } /* * The behead method for the malloc-arena MEMSEG. */ static void memseg_behead_arena(MEMSEG *ms, uint32_t by) { (void)ms; (void)by; panic("beheading malloc-arena memseg"); } /* * The split method for the malloc-arena MEMSEG. */ static MEMSEG *memseg_split_arena(MEMSEG *ms, uint32_t part1, uint32_t part2) { (void)ms; (void)part1; (void)part2; panic("splitting malloc-arena memseg"); } /* * The postexec method for the malloc-arena MEMSEG. */ static int memseg_postexec_arena(MEMSEG *ms) { (void)ms; return(0); } /* * The merge method for the malloc-arena MEMSEG. */ static int memseg_merge_arena(MEMSEG *a, MEMSEG *b) { (void)a; (void)b; panic("merging malloc-arena memsegs"); } /* * The check method for the malloc-arena MEMSEG. This function is the * reason the malloc-arena MEMSEG exists - and the reason MEMSEGs have * check methods. */ static void memseg_check_arena(MEMSEG *ms, uint32_t addr, uint32_t len, unsigned int prot) { MALBLOCK *b; (void)prot; addr += ms->base; b = arena_find_containing(ms->priv,addr); if (b && (addr+len <= b->end)) return; printf("bad reference to malloc arena region [%08lx..%08lx)\n",(ULI)addr,(ULI)(addr+len)); top(); } /* * The desc method for the malloc-arena MEMSEG. */ static void memseg_desc_arena(MEMSEG *ms, FILE *to) { (void)ms; fprintf(to,"arena"); } /* * The MEMSEGOPS for the malloc-arena MEMSEGs. */ static MEMSEGOPS memseg_ops_arena = MEMSEGOPS_INIT(arena); /* * Test whether a whole range of VM exists. * * This tests whether there is a mapping for each page overlapping the * range [addr,addr+size). */ static int range_exists(uint32_t addr, uint32_t size) { uint32_t a; uint32_t end; MEMSEG *ms; if (size & 0x80000000) return(0); end = addr + size; if (addr > end) return(0); a = ROUND_DOWN(addr,PAGE_SIZE); while (a < end) { ms = memseg_find(a,0,0); if (! ms) return(0); a = ms->end; } return(1); } /* * Go through vm and remove anything that overlaps with the area * described by newbase and newsize, except that ignore, if non-nil, * is a MEMSEG that is to be left untouched even if it does overlap. * This is used to punch a hole in the VM space, if necessary, to * accommodate a new MEMSEG. (This is why ignore exists, so that this * can be called after the new MEMSEG is created and linked in.) */ static void memseg_clear_conflict(uint32_t newbase, uint32_t newsize, MEMSEG *ignore) { uint32_t newend; MEMSEG *ms; MEMSEG **msp; newend = newbase + newsize; if (newsize < 1) panic("empty memseg"); if ((newbase & (PAGE_SIZE-1)) || (newsize & (PAGE_SIZE-1))) panic("misaligned memseg"); if (newend < newbase) panic("va wraparound"); msp = &vm.m; while ((ms = *msp)) { if (ms != ignore) { /* * Each existing memseg may relate to the new memseg in one of * 13 ways (nnn=new, ooo=old, ***=both): * * (a) ...ooo...nnn... * (b) ...ooonnn... * (c) ...ooo***nnn... * (d) ...ooo***... * (e) ...ooo***ooo... * (f) ...***nnn... * (g) ...******... * (h) ...******ooo... * (i) ...nnn***nnn... * (j) ...nnn***... * (k) ...nnn***ooo... * (l) ...nnnooo... * (m) ...nnn...ooo... */ if ((ms->base >= newbase) && (ms->end <= newend)) { // Cases f, g, i, j: destroy old entirely *msp = ms->link; (*ms->ops->done)(ms); free(ms); continue; } else if ((ms->end <= newbase) || (ms->base >= newend)) { // Cases a, b, l, m: do nothing } else if (ms->end <= newend) { // Cases c, d: curtail old (*ms->ops->curtail)(ms,ms->end-newbase); } else if (ms->base >= newbase) { // Cases h, k: behead old (*ms->ops->behead)(ms,newend-ms->base); } else { // Case e: split old (*ms->ops->split)(ms,newbase-ms->base,ms->end-newend); } } msp = &ms->link; } } /* * Just like memseg_clear_conflict, except it never changes anything; * it just returns true if memseg_clear_conflict would have done * anything. */ static int memseg_check_conflict(uint32_t newbase, uint32_t newsize, MEMSEG *ignore) { uint32_t newend; MEMSEG *ms; newend = newbase + newsize; if (newsize < 1) panic("empty memseg"); if ((newbase & (PAGE_SIZE-1)) || (newsize & (PAGE_SIZE-1))) panic("misaligned memseg"); if (newend < newbase) panic("va wraparound"); for (ms=vm.m;ms;ms=ms->link) { if (ms != ignore) { /* * Each existing memseg may relate to the new memseg in one of * 13 ways (nnn=new, ooo=old, ***=both): * * (a) ...ooo...nnn... * (b) ...ooonnn... * (c) ...ooo***nnn... * (d) ...ooo***... * (e) ...ooo***ooo... * (f) ...***nnn... * (g) ...******... * (h) ...******ooo... * (i) ...nnn***nnn... * (j) ...nnn***... * (k) ...nnn***ooo... * (l) ...nnnooo... * (m) ...nnn...ooo... */ if ((ms->base >= newbase) && (ms->end <= newend)) return(1); else if ((ms->end <= newbase) || (ms->base >= newend)) continue; } } return(0); } /* * Destroy a memory space. This is used upon a successful exec() to * dispose of the old VM, and, during vfork, by the parent to dispose * of its copy of the child's VM. */ static void vm_destroy(VM vm) { MEMSEG *ms; while ((ms = vm.m)) { vm.m = ms->link; (*ms->ops->done)(ms); free(ms); } } /* * Used after a successful exec() to replace the old VM with the new. * The old VM is in vm; the new VM is passed in. First we go through * the old VM and throw out everything that shouldn't remain (which in * most cases means all of it, but mmap() with MAP_INHERIT can create * segments which survive). Then we drop the new VM on top of it. * * In case of a conflict between a surviving piece of the old space and * a piece of the new space, the new space wins. I'm not sure what * real NetBSD/sparc 1.4T does in this case; it matters only if mmap() * was given an address it must map at which overlaps part of the new * text, data, or stack. (System-selected mmap locations can't * overlap any of those.) */ static void vm_postexec(VM newvm) { MEMSEG *list; MEMSEG **tail; MEMSEG *s; list = vm.m; tail = &vm.m; while (vm.m) { s = vm.m; vm.m = s->link; if ((*s->ops->postexec)(s)) { *tail = s; tail = &s->link; } else { (*s->ops->done)(s); free(s); } } *tail = 0; while (newvm.m) { s = newvm.m; newvm.m = s->link; memseg_clear_conflict(s->base,s->size,0); s->link = vm.m; vm.m = s; } vm.dbrk = newvm.dbrk; vm_changed = 1; } /* * Create and return a new malloc MEMSEG for a given base address, * size, and protection. This is suitable for use when setting up the * address space for exec() and is currently just memseg_new_malloc() * plus memseg_clear_conflict(). * * The returned MEMSEG will always be a malloc MEMSEG. * * This does not promise anything about the contents of the new memory. */ static MEMSEG *memseg_mem(uint32_t va, uint32_t size, unsigned int prot) { MEMSEG *n; n = memseg_new_malloc(va,size,prot); memseg_clear_conflict(n->base,n->size,n); return(n); } /* * Sort a list of MEMSEGs in order of increasing addresses. We sort by * base address, but the no-overlap invariant means that the order * induced by this is equivalent to the one induced by sorting by end * address instead (or (base+end)/2, or pretty much anything else even * vaguely sensible). * * This is used when we're looking for holes in the address space, such * as for an mmap() with no address specified. */ static MEMSEG *sort_vm_list(MEMSEG *list) { MEMSEG *a; MEMSEG *b; MEMSEG *t; MEMSEG **lp; if (!list || !list->link) return(list); a = 0; b = 0; while (list) { t = list; list = t->link; t->link = b; b = a; a = t; } a = sort_vm_list(a); b = sort_vm_list(b); lp = &list; while (a || b) { if (a && (!b || (a->base < b->base))) { t = a; a = a->link; } else { t = b; b = b->link; } *lp = t; lp = &t->link; } *lp = 0; return(list); } /* * Sort the current VM a la sort_vm_list, above. */ static void sort_vm(void) { vm.m = sort_vm_list(vm.m); } /* * Look for a hole at least size bytes large in the VM space, whose * base address is at least base and whose end is no higher than max. * Return the base of the located space. If no such space can be * found, we panic (arguably we should complain and top()). */ static uint32_t find_space(uint32_t base, uint32_t size, uint32_t max) { MEMSEG *ms; uint32_t lastend; uint32_t rv; if (size & 0x80000000) panic("impossible find_space (size)"); if ((uint32_t)(base+size) < base) panic("impossible find_space (wrap)"); if (base+size > max) panic("impossible find_space (max)"); do <"found"> { sort_vm(); lastend = PAGE_SIZE; for (ms=vm.m;ms;ms=ms->link) { rv = (lastend < base) ? base : lastend; if ((ms->base >= base+size) && (ms->base-rv >= size)) break <"found">; lastend = ms->end; if (lastend+size > max) break; } if (lastend+size <= max) { rv = (lastend < base) ? base : lastend; break <"found">; } panic("can't find space: base %08lx size %08lx max %08lx",(ULI)base,(ULI)size,(ULI)max); } while( 0); return(rv); } /* * This is used during exec() to read something out of the file being * loaded into the address space. fd is our (underlying-OS) fd onto * the executable file, buf is where to read the data into, len is the * amount to read, off is the offset into the file (suitable for * passing to pread(2)), and path and what are the path to the file * and an indication of what's being read, the latter two for error * messages, if generated. * * On success this returns 0. On failure, it returns -1, with, if * TRC_EXEC tracing is turned on, an indication of what went wrong. */ static int read_exe(int fd, void *buf, int len, off_t off, const char *path, const char *what) { int rv; rv = pread(fd,buf,len,off); if (rv < 0) { if (what) trc(TRC_EXEC,"%s: %s: read: %s\n",path,what,strerror(errno)); return(-1); } if (rv == 0) { if (what) trc(TRC_EXEC,"%s: %s: read EOF\n",path,what); return(-1); } if (rv != len) { if (what) trc(TRC_EXEC,"%s: %s: read wanted %d, got %d\n",path,what,len,rv); return(-1); } return(0); } /* * Print the condition code bits to a FILE *. This is used when, for * example, printing (emulated) machine registers. */ /* The output generated doesn't really make sense otherwise... */ #if (CC_N != 8) || (CC_Z != 4) || (CC_V != 2) || (CC_C != 1) #error "print_cc assumptions invalid" #endif static void print_cc(FILE *to, unsigned int cc) { fprintf(to,"%c%c%c%c", (cc & CC_N) ? 'N' : '.', (cc & CC_Z) ? 'Z' : '.', (cc & CC_V) ? 'V' : '.', (cc & CC_C) ? 'C' : '.' ); } /* * Print the FPU condition code bits to a FILE *. This is used when, * for example, printing (emulated) machine registers. */ static void print_fcc(FILE *to, unsigned int fcc) { const char *s; switch (fcc) { case FCC_EQ: s = "EQ"; break; case FCC_LT: s = "LT"; break; case FCC_GT: s = "GT"; break; case FCC_UN: s = "UN"; break; default: abort(); break; } fprintf(to,"%s",s); } /* * Print some or all emulated machine registers. to is the FILE * to * print them to and test is a test routine indicating which registers * to print; it is passed an index into the regnames[] array. */ static void print_regs(FILE *to, int (*test)(int)) { int nix; int ixv[PRINT_REGS__N]; int nr; int r; int n; int i; nix = 0; for (i=0;i= nr) fprintf(to," "); fprintf(to,"%-3s = ",regnames[i]); switch (i) { case PRINT_REGS_Y: fprintf(to,"%08lx",(ULI)s.y); break; case PRINT_REGS_PC: fprintf(to,"%08lx",(ULI)s.pc); break; case PRINT_REGS_NPC: fprintf(to,"%08lx",(ULI)s.npc); break; case PRINT_REGS_CC: print_cc(to,s.cc); break; default: if (i < PRINT_REGS_Fbase) { fprintf(to,"%08lx",(ULI)s.regs[i]); } else { fprintf(to,"%08lx",(ULI)s.fregs[i-PRINT_REGS_Fbase]); } break; } } fprintf(to,"\n"); } } /* * Set up a clean set of processor registers, including all windows. * * This is called at startup and when emulating execve(). */ static void clean_regs(void) { int i; int j; s.cc = 0; s.y = 0; for (i=32-1;i>=0;i--) s.regs[i] = 0; for (i=32-1;i>=0;i--) s.fregs[i] = 0; for (i=NWINDOWS-1;i>=0;i--) { for (j=8-1;j>=0;j--) { s.rw[i].l[j] = 0; s.rw[i].i[j] = 0; } } s.cwp = 0; s.iwp = 1; s.flags &= ~SF_FPU; } /* * Initialize emulator state on startup. */ static void setup(void) { clean_regs(); s.cc = 0; s.flags = 0; s.pc = 0; s.npc = s.pc + 4; vm = INITVM(); vfork_dropvm = INITVM(); s.instrs = 0; s.noninteractive = 0; s.lastexec = 0; mypid = getpid(); trcmgr_newpid(mypid); bpts = 0; abpts = 0; nbpts = 0; bpt_suppress = 0; vm_changed = 0; memwatches = 0; } /* * Copy data from the emulator into emulated memory. (The name was * inspired by the copyout() kernel routine, which performs the * analogous operation.) osbuf is the buffer to copy from, embuf is * the address in emulated VM to copy to, n is the number of bytes to * copy, what is what's being copied (for errors), and prefail is used * to handle cleanup in error cases: it is called if a memory * protection fault occurs, after the fault is detected and before the * error is printed and top() is called. prefail is designed to * permit things such as freeing malloc()ed temporaries. * * XXX prefail should go away in favour of a more general way of * wrapping err_jmp. */ static void copyout(const void *osbuf, uint32_t embuf, uint32_t n, const char *what, void (*prefail)(void *), void *pfarg) { uint32_t part; MEMSEG *ms; int left; uint32_t bp; int i; left = n; bp = embuf; while (left > 0) { ms = memseg_find(bp,0,what); if (! (ms->prot & P_W)) { if (prefail) (*prefail)(pfarg); printf("%d: %s: %08lx: not accessible\n",mypid,what,(ULI)bp); trc(TRC_ERR,"%s: %08lx: not accessible\n",what,(ULI)bp); top(); } part = ms->end - bp; if (part > left) part = left; (*ms->ops->check)(ms,bp-ms->base,part,P_W); bcopy((bp-embuf)+(const char *)osbuf,ms->data+(bp-ms->base),part); if (trc_if(TRC_MEM)) for (i=0;idata[i+bp-ms->base]); bp += part; left -= part; } } /* * Copy data from emulated memory into the emulator. (The name was * inspired by the copyin() kernel routine, which performs the * analogous operation.) osbuf is the buffer to copy into, embuf is * the address in emulated VM to copy from, n is the number of bytes * to copy, what is what's being copied (for errors), and prefail is * used to handle cleanup in error cases: it is called if a memory * protection fault occurs, after the fault is detected and before the * error is printed and top() is called. prefail is designed to * permit things such as freeing malloc()ed temporaries. * * XXX prefail should go away in favour of a more general way of * wrapping err_jmp. */ static void copyin(void *osbuf, uint32_t embuf, uint32_t n, const char *what, void (*prefail)(void *), void *pfarg) { uint32_t part; MEMSEG *ms; int left; uint32_t bp; int i; left = n; bp = embuf; while (left > 0) { ms = memseg_find(bp,0,what); if (! (ms->prot & P_R)) { if (prefail) (*prefail)(pfarg); printf("%d: %s: %08lx: not accessible\n",mypid,what,(ULI)bp); trc(TRC_ERR,"%s: %08lx: not accessible\n",what,(ULI)bp); top(); } part = ms->end - bp; if (part > left) part = left; (*ms->ops->check)(ms,bp-ms->base,part,P_R); bcopy(ms->data+(bp-ms->base),(bp-embuf)+(char *)osbuf,part); if (trc_if(TRC_MEM)) for (i=0;idata[i+bp-ms->base]); bp += part; left -= part; } } /* * Open an ELF file and check that it's suitable for our use (eg, that * it's for 32-bit SPARC). This is used by exec(), factored out * because it's used both for the file being exec()ed and for the * dynamic loader used by dynamically linked executables. */ static uint32_t elf_start_load(ELF_CTX *elf, int exptype, const char *exptypestr, int load_p_s) { __label__ enoexec_; void enoexec(void) { goto enoexec_; } elf->fd = open(elf->path,O_RDONLY,0); if (elf->fd < 0) return(os2em_errno(errno)); if (0) { enoexec_:; close(elf->fd); return(em_ENOEXEC); } if (read_exe(elf->fd,&elf->eh,sizeof(elf->eh),0,elf->path,"bad ELF file (can't read header)") < 0) enoexec(); if ( (elf->eh.e_ident[EI_MAG0] != ELFMAG0) || (elf->eh.e_ident[EI_MAG1] != ELFMAG1) || (elf->eh.e_ident[EI_MAG2] != ELFMAG2) || (elf->eh.e_ident[EI_MAG3] != ELFMAG3) ) { trc(TRC_EXEC,"%s: bad ELF file (bad magic number)\n",elf->path); enoexec(); } if (elf->eh.e_ident[EI_CLASS] != ELFCLASS32) { trc(TRC_EXEC,"%s: bad ELF file (class isn't 32-bit)\n",elf->path); enoexec(); } if (ELF_HALF_TO_NATIVE(elf->eh.e_machine) != EM_SPARC) { trc(TRC_EXEC,"%s: bad ELF file (machine isn't SPARC)\n",elf->path); enoexec(); } if (ELF_HALF_TO_NATIVE(elf->eh.e_type) != exptype) { trc(TRC_EXEC,"%s: bad ELF file (type isn't %s)\n",elf->path,exptypestr); enoexec(); } if (ELF_HALF_TO_NATIVE(elf->eh.e_phentsize) != sizeof(Elf32_Phdr)) { trc(TRC_EXEC,"%s: bad ELF file (phentsize isn't sizeof(Elf32_Phdr))\n",elf->path); enoexec(); } elf->entry = ELF_ADDR_TO_NATIVE(elf->eh.e_entry); if (load_p_s) { if (ELF_HALF_TO_NATIVE(elf->eh.e_shentsize) != sizeof(Elf32_Shdr)) { trc(TRC_EXEC,"%s: bad ELF file (shentsize isn't sizeof(Elf32_Shdr))\n",elf->path); enoexec(); } elf->phn = ELF_HALF_TO_NATIVE(elf->eh.e_phnum); elf->ph = malloc(elf->phn*sizeof(Elf32_Phdr)); if (elf->ph == 0) { trc(TRC_EXEC,"%s: bad ELF file (can't malloc %d for phdrs)\n",elf->path,(int)(elf->phn*sizeof(Elf32_Phdr))); enoexec(); } elf->shn = ELF_HALF_TO_NATIVE(elf->eh.e_shnum); elf->sh = malloc(elf->shn*sizeof(Elf32_Shdr)); if (elf->sh == 0) { trc(TRC_EXEC,"%s: bad ELF file (can't malloc %d for shdrs)\n",elf->path,(int)(elf->shn*sizeof(Elf32_Shdr))); enoexec(); } if (read_exe(elf->fd,elf->ph,elf->phn*sizeof(Elf32_Phdr),ELF_OFFSET_TO_NATIVE(elf->eh.e_phoff),elf->path,"bad ELF file (can't read phdrs)") < 0) enoexec(); if (read_exe(elf->fd,elf->sh,elf->shn*sizeof(Elf32_Shdr),ELF_OFFSET_TO_NATIVE(elf->eh.e_shoff),elf->path,"bad ELF file (can't read shdrs)") < 0) enoexec(); } return(0); } /* * Read the program headers from an ELF file and iterate over them, * calling the appropriate methods from ops for the entries of * interest. enoexec is called if something is found which should * provoke an ENOEXEC failure from exec(). */ static void map_psect(ELF_CTX *elf, const PSECT_OPS *ops, void (*enoexec)(void)) { int nph; Elf32_Phdr *ph; int i; void noexec(void) { free(ph); (*enoexec)(); } nph = ELF_HALF_TO_NATIVE(elf->eh.e_phnum); ph = malloc(nph*sizeof(Elf32_Phdr)); if (read_exe(elf->fd,ph,nph*sizeof(Elf32_Phdr),ELF_ADDR_TO_NATIVE(elf->eh.e_phoff),elf->path,"bad ELF file (can't read program headers)") < 0) noexec(); for (i=nph-1;i>=0;i--) { unsigned int t; t = ELF_WORD_TO_NATIVE(ph[i].p_type); switch (t) { case PT_INTERP: (*ops->pt_interp)(elf,&ph[i],&noexec); break; case PT_LOAD: (*ops->pt_load)(elf,&ph[i],&noexec); break; case PT_PHDR: (*ops->pt_phdr)(elf,&ph[i],&noexec); break; } } } /* * The PT_INTERP handler for the main executable: read and record the * "interpreter" (really, dynamic linker) pathname in the ELF_CTX for * later. */ static void psect_pt_interp_main(ELF_CTX *elf, Elf32_Phdr *ph, void (*err)(void)) { uint32_t fsz; fsz = ELF_WORD_TO_NATIVE(ph->p_filesz); if (fsz > em_MAXPATHLEN) { trc(TRC_EXEC,"%s: bad ELF file (PT_INTERP section length %lu > max %d)\n",elf->path,(ULI)fsz,em_MAXPATHLEN); (*err)(); } if (read_exe(elf->fd,&elf->interp[0],fsz,ELF_WORD_TO_NATIVE(ph->p_offset),elf->path,"PT_INTERP") < 0) err(); elf->interp[fsz] = '\0'; trc(TRC_EXEC,"%s saved interp = %s\n",__func__,&elf->interp[0]); } /* * The PT_INTERP handler when reading the "interpreter" specified by * the main executable. Since we don't handle cascaded * "interpreter"s, this just always errors. */ static void psect_pt_interp_interp(ELF_CTX *elf, Elf32_Phdr *ph __attribute__((__unused__)), void (*err)(void)) { trc(TRC_EXEC,"%s: bad PT_INTERP file (has its own PT_INTERP)\n",elf->path); (*err)(); } /* * The PT_LOAD handler. This is used for both the main executable and * the "interpreter", since it turns out they both need the same thing * here - hence the _common naming. */ static void psect_pt_load_common(ELF_CTX *elf, Elf32_Phdr *ph, void (*err)(void)) { uint32_t align; uint32_t va; uint32_t fa; uint32_t diff; uint32_t fo; uint32_t fsz; uint32_t msz; uint32_t psz; uint32_t flags; MEMSEG *ms; uint32_t filesz; trc(TRC_EXEC,"%s entry, loadbase %08lx\n",__func__,(ULI)elf->loadbase); flags = ELF_WORD_TO_NATIVE(ph->p_flags); fa = ELF_ADDR_TO_NATIVE(ph->p_vaddr); align = ELF_WORD_TO_NATIVE(ph->p_align); trc(TRC_EXEC,"flags %08lx fa %08lx align %08lx\n", (ULI)flags, (ULI)fa, (ULI)align); if (!align || (align & (align-1))) { trc(TRC_EXEC,"%s: p_align (%#lx) isn't a power of two\n",elf->path,(ULI)align); (*err)(); } filesz = ELF_WORD_TO_NATIVE(ph->p_filesz); if (align > 1) elf->loadbase = ROUND_UP(elf->loadbase,align); va = fa; if (align > 1) va = ROUND_DOWN(va,align); diff = fa - va; trc(TRC_EXEC,"fa %08lx va %08lx diff %08lx\n", (ULI)fa, (ULI)va, (ULI)diff); fo = ELF_OFFSET_TO_NATIVE(ph->p_offset) - diff; fsz = filesz + diff; msz = ELF_WORD_TO_NATIVE(ph->p_memsz) + diff; psz = ROUND_UP(msz,PAGE_SIZE); trc(TRC_EXEC,"fo %08lx fsz %08lx msz %08lx psz %08lx\n", (ULI)fo, (ULI)fsz, (ULI)msz, (ULI)psz); va += elf->loadbase; trc(TRC_EXEC,"calling memseg_mem, va %08lx psz %08lx\n", (ULI)va, (ULI)psz); ms = memseg_mem(va,psz,((flags&PF_R)?P_R:0)|((flags&PF_W)?P_W:0)|((flags&PF_X)?P_X:0)); if (read_exe(elf->fd,ms->data,fsz,fo,elf->path,"bad ELF file (can't read program segment)") < 0) (*err)(); if (fsz < psz) bzero(ms->data+fsz,psz-fsz); if (va+psz > elf->dend) elf->dend = va + psz; if ((elf->entry >= fa) && (elf->entry <= fa+filesz)) { elf->taddr = fa; if (elf->daddr == ~(uint32_t)0) elf->daddr = elf->taddr; elf->dli_interp = fa + elf->loadbase; } else { elf->daddr = fa; } } // Use psect_pt_load_common for PT_LOAD for both _main and _interp. #define psect_pt_load_main psect_pt_load_common #define psect_pt_load_interp psect_pt_load_common /* * The PT_PHDR handler for the main executable. Just record the value * for possible later use. */ static void psect_pt_phdr_main(ELF_CTX *elf, Elf32_Phdr *ph, void (*err)(void) __attribute__((__unused__))) { elf->phdr = ELF_WORD_TO_NATIVE(ph->p_vaddr); trc(TRC_EXEC,"%s saved phdr = %08lx\n",__func__,(ULI)elf->phdr); } /* * The PT_PHDR handler for the dynamic linker. We do nothing here, * since it's only the main executable that PT_PHDR matters for. */ static void psect_pt_phdr_interp(ELF_CTX *elf __attribute__((__unused__)), Elf32_Phdr *ph __attribute__((__unused__)), void (*err)(void) __attribute__((__unused__))) { } /* * The PSECT_OPS structs for the main executable (main) and the * PT_INTERP "interpreter" (interp). */ static const PSECT_OPS psect_ops_main = PSECT_OPS_INIT(main); static const PSECT_OPS psect_ops_interp = PSECT_OPS_INIT(interp); /* * A print_regs test function for printing all registers. */ static int print_regs_all(int rno __attribute__((__unused__))) { return(1); } /* * Empty a STAB, preparatory to freeing it or relaoding it or the like. */ static void stab_empty(STAB *s) { free(s->syms); free(s->strs); s->syms = 0; s->nsyms = 0; s->strs = 0; } /* * Sort a vector of SYMs by value. On return, v[i].val will be <= * v[i+1].val for any i from 0 through n-2. l and h are bounds (not * necessarily tight) on the min and max val values in v. */ static void sort_syms(SYM *v, int n, uint32_t min, uint32_t max) { int a; int b; SYM t; uint32_t mid; while (1) { if (n < 2) return; if (max-min < 1) return; mid = (min + max) / 2; a = 0; b = n - 1; while (1) { while ((a < b) && (v[a].val <= mid)) a ++; while ((b > a) && (v[b].val > mid)) b --; if (a == b) break; t = v[a]; v[a] = v[b]; v[b] = t; } if (v[b].val <= mid) b ++; if (max-min == 1) return; if (n-b < b) { sort_syms(v+b,n-b,mid,max); n = b; max = mid; } else { sort_syms(v,b,min,mid); v += b; n -= b; min = mid; } } } /* * Nested function pulled out of elf_reload_symbols(). */ static void reload_syms_nosyms(void (*)(void), ELF_CTX *, const char *, ...) __attribute__((__format__(__printf__,3,4),__noreturn__)); static void reload_syms_nosyms(void (*throw)(void), ELF_CTX *c, const char *fmt, ...) { char *s; va_list ap; if (trc_if(TRC_EXEC)) { va_start(ap,fmt); asprintf(&s,fmt,ap); va_end(ap); trc(TRC_EXEC,"%s: no symbols loaded: %s\n",c->path,s); free(s); } (*throw)(); abort(); } /* * Load the text symbols from an ELF file; record the limits of the * text segment as well, so we can tell when to do symbol lookup. */ static void elf_reload_symbols(ELF_CTX *c) { __label__ nosyms_; int i; int j; int shx; int link; Elf32_Sym sym; char *symv; unsigned int u; unsigned int v; void nosyms_throw(void) { goto nosyms_; } #define NOSYMS(...) reload_syms_nosyms(&nosyms_throw,c,__VA_ARGS__) if (0) { nosyms_:; return; } stab_empty(&elf_stab); shx = -1; for (i=c->shn-1;i>=0;i--) { if (ELF_WORD_TO_NATIVE(c->sh[i].sh_type) == SHT_SYMTAB) { if (shx >= 0) NOSYMS("multiple SYMTAB sections"); shx = i; } } if (shx < 0) NOSYMS("no SYMTAB section found"); link = ELF_WORD_TO_NATIVE(c->sh[shx].sh_link); if (! link) NOSYMS("SYMTAB section has no link"); if (link >= c->shn) NOSYMS("SYMTAB section link out of range"); i = ELF_WORD_TO_NATIVE(c->sh[link].sh_size); elf_stab.strslen = i; elf_stab.strs = malloc(i+1); if (! elf_stab.strs) NOSYMS("can't malloc(%d) for symbol strings",i); if (read_exe(c->fd,elf_stab.strs,i,ELF_WORD_TO_NATIVE(c->sh[link].sh_offset),c->path,0) < 0) NOSYMS("can't read symbol strings"); elf_stab.strs[i] = '\0'; i = ELF_WORD_TO_NATIVE(c->sh[shx].sh_size); if (i % sizeof(Elf32_Sym)) { trc(TRC_EXEC,"%s: warning: symtab size %d isn't a multiple of symbol size %d\n",c->path,i,(int)sizeof(Elf32_Sym)); } i /= sizeof(Elf32_Sym); elf_stab.nsyms = i; symv = malloc(i*sizeof(Elf32_Sym)); if (! symv) NOSYMS("can't malloc(%d) for file symbol table",i*(int)sizeof(Elf32_Sym)); elf_stab.syms = malloc(i*sizeof(SYM)); if (! elf_stab.syms) NOSYMS("can't malloc(%d) for in-core symbol table",i*(int)sizeof(SYM)); if (read_exe(c->fd,symv,i*sizeof(Elf32_Sym),ELF_WORD_TO_NATIVE(c->sh[shx].sh_offset),c->path,0) < 0) NOSYMS("can't read symbols"); elf_stab.textbeg = c->taddr; elf_stab.textend = c->daddr; j = 0; for (i=0;i= elf_stab.textbeg) && (v < elf_stab.textend) ) { elf_stab.syms[j].name = elf_stab.strs + u; elf_stab.syms[j].val = v; j ++; } } elf_stab.nsyms = j; sort_syms(&elf_stab.syms[0],elf_stab.nsyms,elf_stab.textbeg,elf_stab.textend); trc(TRC_EXEC,"symbols loaded, count=%d\n",elf_stab.nsyms); free(symv); #undef NOSYMS } /* * Lookup a function name by address. */ static SYM *lookup_fxn(uint32_t addr) { int l; int h; int m; l = -1; h = elf_stab.nsyms; while (h-l > 1) { m = (h + l) / 2; if (elf_stab.syms[m].val <= addr) l = m; if (elf_stab.syms[m].val >= addr) h = m; } return((l==h)?&elf_stab.syms[m]:0); } /* * Failure function for places where we want to free two things. */ static void free2(void *vv) { free(((void **)vv)[0]); free(((void **)vv)[1]); } /* * Try to exec xpath as an ELF executable. argvstrs and envpstrs, with * narg and nenv as their respective counts, are the argument and * environment vectors, already read out of emulated memory. * * Returns an emulated-system errno on failure, zero on success. */ static int try_exec_elf(const char *xpath, const char **argvstrs, int narg, const char **envpstrs, int nenv) { __label__ enoexec_; int i; int l; MEMSEG *ms; uint32_t argv; uint32_t envp; uint32_t *argvv; uint32_t *envpv; uint32_t ps_strings; int stacklen; uint32_t sfp; ELF_CTX ctx; uint32_t ee; void *freev[2]; void enoexec(void) { goto enoexec_; } #define CTX_DONE() do { close(ctx.fd); free(ctx.ph); free(ctx.sh); } while (0) trc(TRC_EXEC,"%s %s\n",__func__,xpath); trc(TRC_EXEC,"narg %d\n",narg); for (i=0;idata,MAXSSIZE); /* * For dynamically-linked executables, we need Aux32Info structs as * well. The stack has to be laid out with sp pointing to argc, * followed by argc+1 pointers to arg strings (and a trailing nil), * then envp pointers (terminated by a trailing nil), then Aux32Info * structs (terminated by one with a_type set to AT_NULL). After * that comes whatever else - which here means the argv and envp * strings, the stack gap, the signal-delivery trampoline, and * ps_strings. * * For the sake of comparisons, we want our stack layout to exactly * match the kernel's. In aid of this, we do things in a slightly * strange order, so as to exactly match the kernel's computations. */ vm.dbrk = ctx.dend; stacklen = 0; for (i=nenv-1;i>=0;i--) stacklen += strlen(envpstrs[i]) + 1; for (i=narg-1;i>=0;i--) stacklen += strlen(argvstrs[i]) + 1; stacklen = ROUND_UP(stacklen,8); stacklen = ((narg+nenv+2)*4) + // argv/envp pointers (8 * 2 * 4) + // Aux32Info structs 4 + // not sure stacklen + // argv/envp strings STACKGAPLEN + // stack gap SZSIGCODE + // signal trampoline 16; // ps_strings stacklen = ROUND_UP(stacklen,8); sfp = USRSTACK - stacklen; s.regs[R_SP] = sfp; // argc mem_set_4(sfp,narg); // argv/envp strings argvv = malloc((narg+1)*sizeof(uint32_t)); argvv[narg] = 0; envpv = malloc((nenv+1)*sizeof(uint32_t)); envpv[nenv] = 0; freev[0] = argvv; freev[1] = envpv; sfp = s.regs[R_SP] + (1 + narg + nenv + 2 + 16) * 4; for (i=0;i= nl) { trc(TRC_EXEC,"%s: no shell name present on #! line\n",path); enoexec(); } for (sh1=sh0;(sh1=0;i--) free(newargv[i]); free(newargv); return(e); } /* * Try to exec a program. We first try to handle it as an ELF * executable; if that fails, we then try it as a #! script. If that * fails too, we return failure. * * Returns 0 on success or an (emulated-OS) errno on failure. */ static int do_execve(const char *path, const char **argvstrs, const char **envpstrs) { int e; int narg; int nenv; struct stat stb; for (narg=0;argvstrs[narg];narg++) ; for (nenv=0;envpstrs[nenv];nenv++) ; if (stat(path,&stb) < 0) return(os2em_errno(errno)); // To fully support running as non-root this needs to change. if (! (stb.st_mode & 0111)) return(em_EACCES); if ((stb.st_mode & S_IFMT) != S_IFREG) return(em_EACCES); e = try_exec_elf(path,argvstrs,narg,envpstrs,nenv); if (e) e = try_exec_script(path,argvstrs,narg,envpstrs,nenv); return(e); } /* * Do the initial exec in accordance with the command-line args. */ static void initial_exec(void) { const char **argv; int i; const char **envp; int e; argv = malloc((cl_nargs+1)*sizeof(const char *)); argv[cl_nargs] = 0; for (i=cl_nargs-1;i>=0;i--) argv[i] = cl_args[i]; envp = malloc((cl_nenvp+1)*sizeof(const char *)); envp[cl_nenvp] = 0; for (i=cl_nenvp-1;i>=0;i--) envp[i] = cl_envp[i]; e = do_execve(exe,argv,envp); postexec = 1; if (e) { printf("Initial exec failed %d (%s)\n",e,em_strerror(e)); free(argv); initial_exec_state = IES_FAILED; top(); } free(argv); initial_exec_state = IES_WORKED; } /* * This is called when an operation that potentially needs vfork fixup * is done from a vforked child process. This adds a VFORKBACKOUT * record, so the parent can do the fixup. */ static void add_vfork_backout(VFBKIND k, uint32_t emfd, FD fd) { VFORKBACKOUT *b; if (during_vfork < 1) return; trc(TRC_VFORK,"adding vfork backout: kind=%d(%s) level=%d emfd=%lu fd fd=%d prot=%u flags=%u\n",(int)k,vfb_kind_str(k),during_vfork,(ULI)emfd,fd.fd,fd.prot,fd.flags); b = malloc(sizeof(VFORKBACKOUT)); b->kind = k; b->emfd = emfd; b->fd = fd; b->level = during_vfork; if (k == VFB_TRCMGR) { b->link = vfbtm; vfbtm = b; } else { b->link = vfb; vfb = b; } } /* * Used in a vfork()ed child after exec, to clean up backout records * made between vforking and execing. during_vfork is cleared * elsewhere. */ static void flush_vfork_backout(void) { VFORKBACKOUT *b; while ((b = vfb)) { vfb = b->link; free(b); } while ((b = vfbtm)) { vfbtm = b->link; free(b); } } /* * Do post-vfork cleanup in the parent process. Because * add_vfork_backout pushes things onto vfb, doing things this way * does them last-done first-restored, which is exactly what we want. * * This does just one list; see vfork_cleanup(), below. */ static void vfork_cleanup_list(VFORKBACKOUT **rootp) { VFORKBACKOUT *b; FD *fd; VFORKBACKOUT *root; root = *rootp; while (root && (root->level > during_vfork)) { b = root; root = b->link; switch (b->kind) { default: panic("impossible kind %d in %s",(int)b->kind,__func__); break; case VFB_OPEN: /* * The file was opened during the vfork. The OS's file * descriptor went away with the switch back to the parent, * but the FD is still around. Fix that. */ trc(TRC_VFORK,"backing out OPEN: emfd=%lu\n",(ULI)b->emfd); if (b->emfd >= nfds) panic("impossible VFB_OPEN backout 1"); fd = fds[b->emfd]; if (! fd) panic("impossible VFB_OPEN backout 2"); fds[b->emfd] = 0; free(fd); break; case VFB_CLOSE: /* * The file was closed during the vfork. The switch back to * the parent has resurrected the file descriptor, but the * FD is still nonexistent. Fix that. */ trc(TRC_VFORK,"backing out CLOSE: emfd=%lu\n",(ULI)b->emfd); if (b->emfd >= nfds) panic("impossible VFB_CLOSE backout 1"); if (fds[b->emfd]) panic("impossible VFB_CLOSE backout 2"); fd = malloc(sizeof(FD)); *fd = b->fd; fds[b->emfd] = fd; break; case VFB_DUP2: /* * A dup2() was done during the vfork. The file descriptors * have been restored, but the FD needs updating - its fd is * correct, but its prot needs restoring. */ trc(TRC_VFORK,"backing out DUP2: emfd=%lu\n",(ULI)b->emfd); if (b->emfd >= nfds) panic("impossible VFB_DUP2 backout 1"); fd = fds[b->emfd]; if (! fd) panic("impossible VFB_DUP2 backout 2"); fd->prot = b->fd.prot; break; case VFB_TRCMGR: /* * Clean up the tracing manager connection fd. Unusually, we * have to log _after_ doing the work, because, until this * trcmgr_set_fd happens, logging is broken. */ trcmgr_set_fd(b->fd.fd); trc(TRC_VFORK,"cleaning up tracing: fd=%d\n",b->fd.fd); break; } free(b); } *rootp = root; } /* * Do post-vfork cleanup in the parent process. This calls * vfork_cleanup_list to clean up TRCMGR entries first, then for all * other entries. */ static void vfork_cleanup(void) { vfork_cleanup_list(&vfbtm); vfork_cleanup_list(&vfb); } /* * Open a new (emulated) file descriptor. osfd is the underlying OS * file desciprtor. minfd is the point in the open file table that * the search for an available fd should start. rw is P_R and/or P_W, * indicating how the descriptor should be opened. * * This makes the add_vfork_backout() call for the new fd. */ static int new_fd(int osfd, int minfd, unsigned int rw) { int d; FD *fd; int i; for (d=minfd;(d MAXFDS) { printf("Out of fds\n"); top(); } if (d >= nfds) { i = nfds; nfds = d + 8; fds = realloc(fds,nfds*sizeof(*fds)); for (;ifd = osfd; fd->prot = rw & (P_R | P_W); fd->flags = 0; add_vfork_backout(VFB_OPEN,d,*fd); return(d); } /* * Set up the initial file descriptors during startup. */ static void init_fds(void) { int i; int d; struct stat stb; nfds = 0; fds = 0; during_vfork = 0; vfb = 0; vfbtm = 0; for (i=2;i>=0;i--) { if (fstat(i,&stb) >= 0) { d = dup(i); new_fd(d,i,(i==0)?P_R:P_W); } } } static int siglogfd; // This needs to be reentrant and signal-safe.... static void sig_log(const char *fmt, ...) { va_list ap; const char *fp; char obuf[256]; int ox; int v_int; uint32_t v_u32; void gen(char c) { if (ox < 255) obuf[ox++] = c; } void gen_u_dec(unsigned long long int v) { if (v >= 10) gen_u_dec(v/10); gen("0123456789"[v%10]); } if (siglogfd < 0) return; ox = 0; va_start(ap,fmt); for (fp=fmt;*fp;fp++) { if (*fp == '~') { switch (*++fp) { case '\0': panic("sig_log: format ends with ~"); break; default: panic("sig_log: unrecognized format ~%c",*fp); break; case 'd': v_int = va_arg(ap,int); if (v_int < 0) { gen('-'); gen_u_dec((unsigned int)-v_int); } else { gen_u_dec((unsigned int)v_int); } break; case '3': v_u32 = va_arg(ap,uint32_t); gen_u_dec(v_u32); break; } } else { gen(*fp); } } va_end(ap); write(siglogfd,&obuf[0],ox); } /* * This is the signal handler routine for all signals we catch. We * install this handler for all signals we can; it just records them * in s.sigpend[] and then sets anysigpend and alert_run so they'll be * noticed at the next emulated instruction boundary. */ static void catch_signal(int sig) { uint32_t emsig; sig_log("catch_signal: sig = ~d\n",sig); emsig = os2em_signal(sig); if (emsig == 0) { sig_log("catch_signal: no em sig\n"); return; } if (emsig >= em__NSIG) panic("handling impossible emsig %lu",(ULI)emsig); sig_log("setting sigpend[~3], anysigpend, and alert_run\n",emsig); s.sigpend[emsig] = 1; anysigpend = 1; alert_run = 1; } /* * Set our catcher for a given signal. This is used both during * initial signal setup and when (un)ignoring when the emulated * program changes their settings. */ static void set_our_catcher(int ossig, void (*catcher)(int)) { struct sigaction sa; sa.sa_handler = catcher; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sigaction(ossig,&sa,0); } /* * Set up signals on initial startup. The only thing of note here is * that we skip trying to install handlers for not just SIGKILL and * SIGSTOP (which we're not allowed to do anything with) but also * SIGSEGV. This is because the sort of memory fault that leads to * SIGSEGV on real hardware is, here, instead noticed by the memory * access code. */ static void init_signals(void) { int i; int ossig; s.sigmask = 0; anysigpend = 0; s.ignsigs = 0; for (i=em__NSIG-1;i>=1;i--) { s.sigh[i].handler = em_SIG_DFL; s.sigh[i].flags = 0; s.sigpend[i] = 0; switch (i) { case em_SIGKILL: case em_SIGSTOP: case em_SIGSEGV: continue; break; } ossig = em2os_signal(i); if (sigdef[i] == SIGDEF_IGNORE) { s.ignsigs |= 1ULL << i; set_our_catcher(ossig,SIG_DFL); } else { set_our_catcher(ossig,&catch_signal); } } s.onsigstack = 0; s.sigstack_enabled = 0; s.sigstack_base = 0; s.sigstack_size = 0; siglogfd = open("/sparc.sig.log",O_WRONLY|O_APPEND,0); } /* * Sign-extend a value. v is the value and bits is the number of * significant bits in it, including the sign bit. For example, * signextend(9,4) will return 0xfffffff9, while signextend(9,5) will * return 0x00000009. * * This assumes two's-copmplement representation; this is correct for * the emulated machine, and this is for emulator use. */ static uint32_t signextend(uint32_t v, int bits) { if ((v >> (bits-1)) & 1) { v |= (~(uint32_t)0) << bits; } else { v &= ~((~(uint32_t)0) << bits); } return(v); } /* * Report an unimplemented opcode. Just print the details and throw * out. */ static void unimp(uint32_t xa, uint32_t inst) { FILE *f; if (trc_if(TRC_INSTR)) { f = fwrap_tee(stdout,trc_f(TRC_INSTR),(FILE *)0); } else { f = fwrap_tee(stdout,(FILE *)0); } fprintf(f,"Unimplemented: at %08lx inst=%08lx\n",(ULI)xa,(ULI)inst); fprintf(f," OPC=%d OP2=%d DREG=%d A=%d COND=%d IMM22=%d, DISP22=%d\n", (int)OPC(inst), (int)OP2(inst), (int)DREG(inst), (int)A(inst), (int)COND(inst), (int)IMM22(inst), (int)DISP22(inst)); fprintf(f," OP3=%d SREG1=%d SREG2=%d I=%d ASI=%d SIMM13=%d OPF=%d\n", (int)OP3(inst), (int)SREG1(inst), (int)SREG2(inst), (int)I(inst), (int)ASI(inst), (int)SIMM13(inst), OPF(inst)); fclose(f); top(); } /* * Return the next window number after v in the `save' direction. That * is, if CWP were v, after a save it would be cwp_s(v). */ static unsigned int cwp_s(unsigned int v) { return((v?:NWINDOWS)-1); } /* * Return the next window number after v in the `restore' direction. * That is, if CWP were v, after a restore it would be cwp_r(v). */ static unsigned int cwp_r(unsigned int v) { return((v==NWINDOWS-1)?0:(v+1)); } /* * Save the current window's registers into s.rw[]. */ static void save_cwindow(void) { bcopy(&s.regs[R_L0],&s.rw[s.cwp].l[0],8*sizeof(uint32_t)); bcopy(&s.regs[R_I0],&s.rw[s.cwp].i[0],8*sizeof(uint32_t)); bcopy(&s.regs[R_O0],&s.rw[cwp_s(s.cwp)].i[0],8*sizeof(uint32_t)); } /* * Load the current window's registers from s.rw[]. */ static void load_cwindow(void) { bcopy(&s.rw[s.cwp].l[0],&s.regs[R_L0],8*sizeof(uint32_t)); bcopy(&s.rw[s.cwp].i[0],&s.regs[R_I0],8*sizeof(uint32_t)); bcopy(&s.rw[cwp_s(s.cwp)].i[0],&s.regs[R_O0],8*sizeof(uint32_t)); } /* * Spill a window's worth of registers to the stack. * * This code doesn't need to test whether any of the registers affected * are in s.regs[] instead of s.rw[]. At first sight it appears to, * but it's obviously unnecessary when you note that it's never called * except after a save_cwindow() and before load_cwindow() (or * equivalent) - so that all register values are in s.rw[]. (Some of * them are also in s.regs[], but that doesn't matter; the ones in * s.regs[] will be replaced before we use them for anything.) * * Conceptually, we have * * | | * +- ... -+ * | | * +---------+ * | | %i0-%i7 \ * +- CWP -+ \ * | | %l0-%l7 > current window * +---------+ / * / %i0-%i7 | | %o0-%o7 / * / +- CWP-1 -+ * invalid window < %l0-%l7 | | * \ +---------+ * \ %o0-%o7 | | %i0-%i7 \ * +- CWP-2 -+ \ * | | %l0-%l7 > being spilled * +---------+ / * / %i0-%i7 | | %o0-%o7 / * / +- CWP-3 -+ * has %i6 used < %l0-%l7 | | * \ +---------+ * \ %o0-%o7 | | * +- ... -+ * | | * * If NWINDOWS is at least 4, none of these overlap. If NWINDOWS is 3, * the "has %i6 used" window is the current window, but that doesn't * hurt anything. If NWINDOWS is only 2, the being-spilled window is * the current window and the has-%i6-used window is the invalid * window, but everything still has the correct values in s.rw[]. * * NWINDOWS does need to be at least 2, though. (It has to be anyway * in order that the current window and the invalid window be * distinct.) */ #if NWINDOWS < 2 #error "Must have at least two register windows!" #endif static void spill_window(int w) { uint32_t sp; int i; sp = s.rw[cwp_s(w)].i[6]; // %o6 of window w if (sp & 3) { fprintf(stderr,"window spill: %%sp low two bits are %d%d\n",(int)((sp>>1)&1),(int)(sp&1)); top(); } trc(TRC_WINDOW,"saving window %d to %08lx\n",w,(ULI)sp); for (i=0;i<8;i++) mem_set_4(sp+(i*4),s.rw[w].l[i]); for (i=0;i<8;i++) mem_set_4(sp+32+(i*4),s.rw[w].i[i]); } /* * Fill a window's worth of registers from the stack. */ static void fill_window(int w) { uint32_t sp; int i; sp = s.rw[cwp_s(w)].i[6]; // %o6 of window w if (sp & 3) { fprintf(stderr,"window fill: %%sp low two bits are %d%d\n",(int)((sp>>1)&1),(int)(sp&1)); top(); } trc(TRC_WINDOW,"restoring window %d from %08lx\n",w,(ULI)sp); for (i=0;i<8;i++) s.rw[w].l[i] = mem_get_4(sp+(i*4)); for (i=0;i<8;i++) s.rw[w].i[i] = mem_get_4(sp+32+(i*4)); } /* * Do a window save. On real hardware, the hardware just traps to the * kernel upon attempting to save (or restore) into an invalid window. * We are a userland-only emulator, so we do the stuff the kernel * does: we implement window spills on save and fills on restore when * attempting to shift into the invalid window. ("The" invalid * window, not "an" invalid window - unlike the hardware, which has a * bitmask, we have exactly one invalid window at any given time.) */ static void window_save(void) { int i; int j; save_cwindow(); i = cwp_s(s.cwp); if (i == s.iwp) { j = cwp_s(i); spill_window(j); s.iwp = j; s.cwp = i; bcopy(&s.regs[R_O0],&s.regs[R_I0],8*sizeof(uint32_t)); bzero(&s.regs[R_L0],8*sizeof(uint32_t)); bzero(&s.regs[R_O0],8*sizeof(uint32_t)); } else { s.cwp = i; load_cwindow(); } } /* * Flush all valid windows, except the current one, to memory. * * The kernel implements this by doing NWINDOWS-1 saves, then * NWINDOWS-1 restores. We just spill_window() each window between * cwp and iwp - in the restore direction; the ones between cwp and * iwp in the save direction aren't valid. We don't write cwp; we * reset iwp to cwp_r(cwp) to match the state the kernel leaves things * in. * * We could do what the kernel does and do multiple window_save()s and * window_restore()s. But we'd have to keep copying between %sp and * %fp, and that also pays the price of save_cwindow/load_cwindow * repeatedly. * * sc___sigreturn14 assumes this does a save_cwindow(). */ static void window_flush(void) { int i; save_cwindow(); for (i=cwp_s(s.iwp);i!=s.cwp;i=cwp_s(i)) spill_window(i); s.iwp = cwp_r(s.cwp); // Don't need to load_cwindow(); s.rw[] haven't changed. } /* * Do a window restore. See window_save for further comments. */ static void window_restore(void) { int i; save_cwindow(); i = cwp_r(s.cwp); if (i == s.iwp) { s.iwp = cwp_r(i); fill_window(i); } s.cwp = i; load_cwindow(); } /* * Implement addcc: compute a+b and return the result, affecting the * condition codes correspondingly. */ static uint32_t addcc(uint32_t a, uint32_t b) { uint64_t v; v = (uint64_t)a + (uint64_t)b; s.cc = ((v & 0x80000000) ? CC_N : 0) | (((uint32_t)v == 0) ? CC_Z : 0) | ((0x80000000&(a^v)&~(a^b)) ? CC_V : 0) | ((v & 0x100000000ULL) ? CC_C : 0); return(v); } /* * Implement subcc: compute a-b and return the result, affecting the * condition codes correspondingly. */ static uint32_t subcc(uint32_t a, uint32_t b) { uint64_t v; v = (uint64_t)a - (uint64_t)b; s.cc = ((v & 0x80000000) ? CC_N : 0) | (((uint32_t)v == 0) ? CC_Z : 0) | (((a^b)&(v^a)&0x80000000) ? CC_V : 0) | ((v & 0x100000000ULL) ? CC_C : 0); return(v); } /* * Implement addxcc: compute a+b+c and return the result, affecting the * condition codes correspondingly. c must be 0 or 1. */ static uint32_t addxcc(uint32_t a, uint32_t b, uint32_t c) { uint64_t v; v = (uint64_t)a + (uint64_t)b + (uint64_t)c; s.cc = ((v & 0x80000000) ? CC_N : 0) | (((uint32_t)v == 0) ? CC_Z : 0) | ((0x80000000&(a^v)&~(a^b)) ? CC_V : 0) | ((v & 0x100000000ULL) ? CC_C : 0); return(v); } /* * Implement subxcc: compute a-b-c and return the result, affecting the * condition codes correspondingly. c must be 0 or 1. */ static uint32_t subxcc(uint32_t a, uint32_t b, uint32_t c) { uint64_t v; v = (uint64_t)a - (uint64_t)b - (uint64_t)c; s.cc = ((v & 0x80000000) ? CC_N : 0) | (((uint32_t)v == 0) ? CC_Z : 0) | (((a^b)&(v^a)&0x80000000) ? CC_V : 0) | ((v & 0x100000000ULL) ? CC_C : 0); return(v); } /* * Implement sra: compute v>>n, where the shift is arithmetic, and * return the result. */ static uint32_t sra(uint32_t v, uint32_t n) { n &= 31; if (v & 0x80000000) { v = ~((~v) >> n); } else { v >>= n; } return(v); } /* * Implement andcc: compute a&b and return the result, affecting the * condition codes correspondingly. */ static uint32_t andcc(uint32_t a, uint32_t b) { uint32_t v; v = a & b; s.cc = ((v & 0x80000000) ? CC_N : 0) | ((v == 0) ? CC_Z : 0); return(v); } /* * Implement orcc: compute a|b and return the result, affecting the * condition codes correspondingly. */ static uint32_t orcc(uint32_t a, uint32_t b) { uint32_t v; v = a | b; s.cc = ((v & 0x80000000) ? CC_N : 0) | ((v == 0) ? CC_Z : 0); return(v); } /* * Implement xorcc: compute a^b and return the result, affecting the * condition codes correspondingly. */ static uint32_t xorcc(uint32_t a, uint32_t b) { uint32_t v; v = a ^ b; s.cc = ((v & 0x80000000) ? CC_N : 0) | ((v == 0) ? CC_Z : 0); return(v); } /* * Implement andncc: compute a&~b and return the result, affecting the * condition codes correspondingly. */ static uint32_t andncc(uint32_t a, uint32_t b) { uint32_t v; v = a & ~b; s.cc = ((v & 0x80000000) ? CC_N : 0) | ((v == 0) ? CC_Z : 0); return(v); } /* * Implement xnorcc: compute a^~b and return the result, affecting the * condition codes correspondingly. */ static uint32_t xnorcc(uint32_t a, uint32_t b) { uint32_t v; v = a ^ ~b; s.cc = ((v & 0x80000000) ? CC_N : 0) | ((v == 0) ? CC_Z : 0); return(v); } /* * Implement (integer) conditional branch instructions. Except for * branch-always (8) and branch-never (0), which are special-cased, * just look up the condition in conds[] and pick out the bit * corresponding to the current state of the condition codes, using * that to either branch or not. We special-case 0 and 8 because the * handling of the annul bit is backwards for those two. */ // The ">> (s.cc & 15)" part below depends on this.... #if (CC_N | CC_Z | CC_V | CC_C) != 15 #error "cbranch assumptions wrong" #endif static void cbranch(int cond, int annul, uint32_t to) { switch (cond) { case 8: s.npc = to; /* fall through */ case 0: if (annul) s.flags |= SF_ANNUL; break; default: if ((conds[cond&15] >> (s.cc & 15)) & 1) { s.npc = to; } else { if (annul) s.flags |= SF_ANNUL; } break; } } /* * Implement floating-point conditional branch instructions. Except * for branch-always (8) and branch-never (0), which are * special-cased, just look up the condition in fconds[] and pick out * the bit corresponding to the current state of the FPU condition * codes, using that to either branch or not. We special-case 0 and 8 * because the handling of the annul bit is backwards for those two. */ // The ">> (s.fcc & 3)" below depends on this.... #if (FCC_EQ < 0) || (FCC_EQ > 3) ||\ (FCC_LT < 0) || (FCC_LT > 3) ||\ (FCC_GT < 0) || (FCC_GT > 3) ||\ (FCC_UN < 0) || (FCC_UN > 3) #error "fcbranch assumptions invalid" #endif static void fcbranch(int cond, int annul, uint32_t to) { switch (cond) { case 8: s.npc = to; /* fall through */ case 0: if (annul) s.flags |= SF_ANNUL; break; default: if ((fconds[cond&15] >> (s.fcc & 3)) & 1) { s.npc = to; } else { if (annul) s.flags |= SF_ANNUL; } break; } } /* * Load a NUL-terminated string out of emulated memory (at ptr) and * return a malloc()ed copy of it in emulator memory. This also sets * up the NULTERM_STATUS for later cleanup when the string is no * longer needed. * * We frob nomemacc to ensure we read the memory exactly once as far as * TRC_MEM is concerned. * * XXX Possible bug lurking: what if mem_get_1 errors and throws out? * If nothing else, we'll leave nomemacc incremented. */ static const char *nulterm_scarg(uint32_t ptr, NULTERM_STATUS *nts) { int l; unsigned char *s; int i; l = 0; while (mem_get_1(ptr+l)) l ++; s = malloc(l+1); nomemacc ++; for (i=0;i<=l;i++) s[i] = mem_get_1(ptr+i); nomemacc --; nts->tofree = s; return(s); } /* * Do any appropriate cleanup for a NUL-terminated string copied out of * emulator memory once it's no longer needed. */ static void nulterm_done(NULTERM_STATUS *nts) { free(nts->tofree); } /* * Fetch a 32-bit syscall argument. Narrower arguments are padded; * wider arguments are treated as multiple 32-bit arguments. This * fetches out of register shadows or memory depending on the argument * number and how many arguments are in registers, as described by the * SCARGS. */ uint32_t scarg(SCARGS *args, int n) { if (n < 0) panic("impossible scarg"); if (n < args->nreg) return(args->regs[n]); if (! args->sp) panic("scarg overrun"); return(mem_get_4(args->sp+((23+n-args->nreg)*4))); } /* * Copy from/fromlen, in emulator memory, to to/tolen, in emulated * memory. If fromlen is longer, this truncates; if tolen is longer, * this NUL-pads. what is a text description of what's being copied, * in case an error message is generated. prefail is called on any * failure, before throwing out, as described in the comment on * copyout(). */ static void copy_or_nulpad(const void *from, int fromlen, uint32_t to, int tolen, const char *what, void (*prefail)(void *), void *pfarg) { int o; int n; if (fromlen < tolen) { copyout(from,to,fromlen,what,prefail,pfarg); o = fromlen; while (o < tolen) { n = sizeof(nulbuf); if (n > tolen-o) n = tolen - o; copyout(&nulbuf[0],to+o,n,what,prefail,pfarg); o += n; } } else { copyout(from,to,tolen,what,prefail,pfarg); } } /* * Build a SPARC PSR value from our state. Only some bits are * emulated: the condition codes and the "FPU used" bit. The other * bits of the PSR are not emulated. This is used, eg, when building * a sigcontext structure for signal delivery. */ static uint32_t build_psr(void) { uint32_t v; #if (CC_N == em_PSR_CC_N) &&\ (CC_Z == em_PSR_CC_Z) &&\ (CC_V == em_PSR_CC_V) &&\ (CC_C == em_PSR_CC_C) v = s.cc; #else v = ((s.cc & CC_N) ? em_PSR_CC_N : 0) | ((s.cc & CC_Z) ? em_PSR_CC_Z : 0) | ((s.cc & CC_V) ? em_PSR_CC_V : 0) | ((s.cc & CC_C) ? em_PSR_CC_C : 0); #endif v <<= em_PSR_CC_S; if (s.flags & SF_FPU) v |= em_PSR_EF; return(v); } /* * Build a SPARC FSR value from our state. Only some bits are * emulated, notably the condition codes. */ static uint32_t build_fsr(void) { return( (em_FSR_RD_NEAREST << em_FSR_RD_S) | (0 << em_FSR_TEM_S) | // no em_FSR_NS (0 << em_FSR_VER_S) | (0 << em_FSR_FTT_S) | // no em_FSR_QNE #if (em_FSR_FCC_EQ == FCC_EQ) &&\ (em_FSR_FCC_LT == FCC_LT) &&\ (em_FSR_FCC_GT == FCC_GT) &&\ (em_FSR_FCC_UN == FCC_UN) (s.fcc << em_FSR_FCC_S) | #else ( (s.fcc == FCC_EQ) ? (em_FSR_FCC_EQ << em_FSR_FCC_S) : (s.fcc == FCC_LT) ? (em_FSR_FCC_LT << em_FSR_FCC_S) : (s.fcc == FCC_GT) ? (em_FSR_FCC_GT << em_FSR_FCC_S) : (s.fcc == FCC_UN) ? (em_FSR_FCC_UN << em_FSR_FCC_S) ) | #endif (0 << em_FSR_AEXC_S) | (0 << em_FSR_CEXC_S) ); } /* * Given a SPARC FSR value, install it in the emulator state. Attempts * to set anything we don't emulate throw out. */ static void set_fsr(uint32_t v) { int throw; throw = 0; if (v & em_FSR_MBZ) { printf("MBZ bits aren't all zero\n"); throw = 1; } if (((v >> em_FSR_RD_S) & em_FSR_RD_M) != em_FSR_RD_NEAREST) { printf("rounding direction other than nearest not supported\n"); throw = 1; } if (((v >> em_FSR_TEM_S) & em_FSR_TEM_M) != 0) { printf("floating trap enables not supported\n"); throw = 1; } if (throw) { printf("can't set %%fsr\n"); top(); } #if (em_FSR_FCC_EQ == FCC_EQ) &&\ (em_FSR_FCC_LT == FCC_LT) &&\ (em_FSR_FCC_GT == FCC_GT) &&\ (em_FSR_FCC_UN == FCC_UN) s.fcc = (v >> em_FSR_FCC_S) & em_FSR_FCC_M; #else switch ((v >> em_FSR_FCC_S) & em_FSR_FCC_M) { case em_FSR_FCC_EQ: s.fcc = FCC_EQ; break; case em_FSR_FCC_LT: s.fcc = FCC_LT; break; case em_FSR_FCC_GT: s.fcc = FCC_GT; break; case em_FSR_FCC_UN: s.fcc = FCC_UN; break; default: abort(); break; } #endif } /* * Convert a SPARC PSR value's condition codes to the cc representation * we use. The other bits of the PSR are ignored. This is used * during signal return. */ static unsigned int psr_to_cc(uint32_t psr) { #if (CC_N == em_PSR_CC_N) &&\ (CC_Z == em_PSR_CC_Z) &&\ (CC_V == em_PSR_CC_V) &&\ (CC_C == em_PSR_CC_C) return((psr>>em_PSR_CC_S)&em_PSR_CC_M); #else return( ((psr & (em_PSR_CC_N << em_PSR_CC_S)) ? CC_N : 0) | ((psr & (em_PSR_CC_Z << em_PSR_CC_S)) ? CC_Z : 0) | ((psr & (em_PSR_CC_V << em_PSR_CC_S)) ? CC_V : 0) | ((psr & (em_PSR_CC_C << em_PSR_CC_S)) ? CC_C : 0) ); #endif } /* * Deliver a signal. The signal number is am emulated-OS signal * number. Our caller must have made sure the signal is not blocked * or ignored; we handle SIG_DFL actions and user-provided handlers. */ static void deliver_signal(uint32_t sig, uint32_t *context) { int onstack; SIG *sh; uint32_t fp; int how; if ((sig < 1) || (sig >= em__NSIG)) panic("delivery of impossible signal %lu",(ULI)sig); sh = &s.sigh[sig]; onstack = 0; // Signal stack support not yet implemented trc(TRC_SIGNAL,"delivering signal %lu, handler %lx (context %p)\n",(ULI)sig,(ULI)sh->handler,(void *)context); if (sh->handler == em_SIG_DFL) { if ((sig < 1) || (sig >= (sizeof(sigdef)/sizeof(sigdef[0])))) { trc(TRC_SIGNAL,"SIG_DFL handler for out-of-range signal %ld\n",(LI)(int32_t)sig); how = SIGDEF_KILL; } else { how = sigdef[sig]; if (how == SIGDEF_HOLE) { trc(TRC_SIGNAL,"SIG_DFL handler for unknown signal %ld\n",(LI)(int32_t)sig); how = SIGDEF_KILL; } } switch (how) { case SIGDEF_KILL: trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: killing process\n",(LI)(int32_t)sig,em_signame(sig,"unknown")); exit(0); break; case SIGDEF_CORE: trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: core dump\n",(LI)(int32_t)sig,em_signame(sig,"unknown")); exit(0); break; case SIGDEF_IGNORE: trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: ignore\n",(LI)(int32_t)sig,em_signame(sig,"unknown")); return; break; case SIGDEF_STOP: trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: stop\n",(LI)(int32_t)sig,em_signame(sig,"unknown")); exit(0); break; default: panic("SIG_DFL finds unknown sigdef %d",how); break; } } window_flush(); spill_window(s.cwp); fp = onstack ? s.sigstack_base + s.sigstack_size : s.regs[R_SP]; fp -= 64; fp &= ~(uint32_t)7; // Return the stacked stuff's address if desired. if (context) *context = fp; trc(TRC_SIGNAL,"setting context = %08lx\n",(ULI)fp); // Build the signal frame. mem_set_4(fp,sig); // sf.sf_signo; mem_set_4(fp+4,0); // sf.sf_code mem_set_4(fp+8,0); // sf.sf_scp mem_set_4(fp+12,0); // sf.sf_addr // Build the context to be used by sigreturn. mem_set_4(fp+16,0); // sf.sf_sc.sc_onstack (on-stack not implemented) mem_set_4(fp+20,s.sigmask>>1); // sf.sf_sc.__sc_mask13 mem_set_4(fp+24,s.regs[R_SP]); // sf.sf_sc.sc_sp mem_set_4(fp+28,s.pc); // sf.sf_sc.sc_pc mem_set_4(fp+32,s.npc); // sf.sf_sc.sc_npc mem_set_4(fp+36,build_psr()); // sf.sf_sc.sc_psr mem_set_4(fp+40,s.regs[R_G1]); // sf.sf_sc.sc_g1 mem_set_4(fp+44,s.regs[R_O0]); // sf.sf_sc.sc_o0 mem_set_4(fp+48,s.sigmask>>1); // sf.sf_sc.sc_mask, first word mem_set_4(fp+52,s.sigmask>>33); // sf.sf_sc.sc_mask, second word mem_set_4(fp+56,0); // sf.sf_sc.sc_mask, third word mem_set_4(fp+60,0); // sf.sf_sc.sc_mask, fourth word s.regs[R_G1] = sh->handler; s.pc = sigtramp; s.npc = sigtramp + 4; s.regs[R_SP] = fp - 64; s.sigmask = ( s.sigmask | ( ( sh->mask.bits[0] | (((uint64_t)sh->mask.bits[1]) << 32) ) << 1 ) ) & SIG_CANBLOCK; // any other flags? if (sh->flags & em_SA_RESETHAND) { sh->handler = em_SIG_DFL; bzero(&sh->mask.bits[0],sizeof(sh->mask.bits)); sh->flags = 0; } // if (onstack) record that we're on the signal stack } /* * Deliver pending signals, if any. If any pending signals remain * undelivered, leave anysigpend set true; otherwise, false. The * return value is a count of signals delivered. * * We deliver all signals with code set to 0. Signal delivery with * code set to anything else occurs only for signals reflecting * hardware traps, which we currently never generate. If we make * segfaults and illegal instructions and the like into signals, this * will need to change. * * If we have just interrupted a restartable syscall, then we need to * do extra stuff. The machine state has been backed up to restart * the syscall; if any of the delivered signals are not marked * SA_RESTART, we need to re-advance it and arrange for an EINTR * return. */ static int deliver_signals(uint32_t *firstcontext) { int sig; int any; int n; uint32_t fc; uint32_t *fcp; int allrestart; any = 0; n = 0; anysigpend = 0; fc = 0; fcp = &fc; allrestart = 1; for (sig=em__NSIG-1;sig>=1;sig--) { sig_log("deliver_signals sig ~d pend ~d\n",sig,(int)s.sigpend[sig]); if (s.sigpend[sig]) { if (s.sigh[sig].handler == em_SIG_IGN) { trc(TRC_SIGNAL,"deliver_signals sees %d (%s) pending: ignored\n",sig,em_signame(sig,"unknown")); s.sigpend[sig] = 0; } else if (! ((s.sigmask >> sig) & 1U)) { trc(TRC_SIGNAL,"deliver_signals sees %d (%s) pending: deliverable\n",sig,em_signame(sig,"unknown")); s.sigpend[sig] = 0; if (! (s.sigh[sig].flags & em_SA_RESTART)) allrestart = 0; deliver_signal(sig,fcp); if (fcp && fc) fcp = 0; n ++; } else { trc(TRC_SIGNAL,"deliver_signals sees %d (%s) pending: blocked\n",sig,em_signame(sig,"unknown")); any = 1; } } } if (firstcontext) *firstcontext = fc; if ((s.flags & SF_SIGRESTART) && !allrestart) { s.flags &= ~SF_SIGRESTART; s.pc = s.npc; s.npc += 4; // Must match the error path in dosyscall() trc(TRC_SYSCALL,"returning EINTR after all\n"); mem_set_4(fc+36,mem_get_4(fc+36)|(em_PSR_CC_C<0;i--) fprintf(f," .."); } for (;len>0;len--,ptr++) { switch (ptr & 15) { case 0: fprintf(f,"%08lx:",(ULI)ptr); break; case 8: fprintf(f," "); break; } fprintf(f," %02x",(*getb)(ptr)); if ((ptr & 15) == 15) fprintf(f,"\n"); } if (ptr & 15) fprintf(f,"\n"); } /* * Trace I/O data, pulling the data from emulated memory. * * XXX Possible bug lurking: what if mem_get_1 errors and throws out? * If nothing else, we'll leave nomemacc incremented. */ static void trace_io_data_em(const char *what, uint32_t ptr, int len) { uint8_t getb(uint32_t a) { return(mem_get_1(a)); } trace_io_data(what,ptr,(len>io_trace_size)?io_trace_size:len,&getb); } /* * Trace I/O data, pulling the data from emulator memory. */ static void trace_io_data_os(const char *what, uint32_t ptr, const void *data, int len) { uint8_t getb(uint32_t a) { return(((const unsigned char *)data)[a-ptr]); } trace_io_data(what,ptr,(len>io_trace_size)?io_trace_size:len,&getb); } /* * Given a uint32_t, return the number of set bits in it. */ static unsigned int bitcount32(uint32_t v) { v = (v & 0x55555555) + ((v >> 1) & 0x55555555); v = (v & 0x33333333) + ((v >> 2) & 0x33333333); v = (v & 0x0f0f0f0f) + ((v >> 4) & 0x0f0f0f0f); v = (v & 0x00ff00ff) + ((v >> 8) & 0x00ff00ff); return((v&0x0000ffff)+((v>>16)&0x0000ffff)); } /* * Emulate an integer sysctl() value. valp and lenp are the * emulated-memory locations where we should write the value and * length. val is the value. rv is the SCRV for the syscall. */ static int em_sc_int(uint32_t valp, uint32_t lenp, uint32_t val, SCRV *rv) { uint32_t len; int i; if (valp == 0) { mem_set_4(lenp,4); trace_io_data_em("length",lenp,4); SYSCALL_SETRET(4); return(1); } len = mem_get_4(lenp); trace_io_data_em("input length",lenp,4); if (len < 4) { for (i=0;i>24); val <<= 8; } trace_io_data_em("data",valp,len); SYSCALL_SETERR(em_ENOMEM); } else { for (i=0;i<4;i++) { mem_set_1(valp+i,val>>24); val <<= 8; } trace_io_data_em("data",valp,4); mem_set_4(lenp,4); trace_io_data_em("output length",lenp,4); SYSCALL_SETRET(4); } return(1); } /* * Emulate a string sysctl() value. valp and lenp are the * emulated-memory locations where we should write the value and * length. str and strlen are the value string. rv is the SCRV for * the syscall. */ static int em_sc_string(uint32_t valp, uint32_t lenp, const void *str, int strlen, SCRV *rv) { uint32_t len; if (valp == 0) { mem_set_4(lenp,strlen); trace_io_data_em("length",lenp,4); SYSCALL_SETRET(strlen); return(1); } len = mem_get_4(lenp); trace_io_data_em("input length",lenp,4); if (len < strlen) { copyout(str,valp,len,"sysctl string",0,0); trace_io_data_em("data",valp,len); SYSCALL_SETERR(em_ENOMEM); } else { copyout(str,valp,strlen,"sysctl string",0,0); trace_io_data_em("data",valp,strlen); mem_set_4(lenp,strlen); trace_io_data_em("output length",lenp,4); SYSCALL_SETRET(strlen); } return(1); } /* * Handle a hw.* sysctl. mib/miblen are the MIB (already copied out of * emulated memory), valp and lenp are the places (addresses in * emulated memory) to write the value and length, and rv is the SCRV * for the syscall. */ static int em_sysctl_hw(uint32_t *mib, int miblen, uint32_t valp, uint32_t lenp, SCRV *rv) { if (miblen != 1) return(0); switch (mib[0]) { case em_HW_MACHINE: return(em_sc_string(valp,lenp,"sparc",5,rv)); break; case em_HW_PAGESIZE: return(em_sc_int(valp,lenp,PAGE_SIZE,rv)); break; case em_HW_MACHINE_ARCH: return(em_sc_string(valp,lenp,"sparc",5,rv)); break; } return(0); } /* * Implementation of vm.loadavg sysctl. */ static int em_sc_vm_loadavg(uint32_t valp, uint32_t lenp, SCRV *rv) { struct loadavg osla; int osmib[2]; size_t osvalsize; uint32_t len; uint32_t vals[4]; int i; osmib[0] = CTL_VM; osmib[1] = VM_LOADAVG; osvalsize = sizeof(osla); if (sysctl(&osmib[0],2,&osla,&osvalsize,0,0) < 0) { SYSCALL_SETERR(os2em_errno(errno)); return(1); } if (valp == 0) { mem_set_4(lenp,16); trace_io_data_em("length",lenp,4); SYSCALL_SETRET(16); return(1); } vals[0] = osla.ldavg[0]; vals[1] = osla.ldavg[1]; vals[2] = osla.ldavg[2]; vals[3] = osla.fscale; len = mem_get_4(lenp); trace_io_data_em("input length",lenp,4); if (len < 16) { for (i=0;i>2]>>(8*(3-(i&3)))); } trace_io_data_em("data",valp,len); SYSCALL_SETERR(em_ENOMEM); } else { mem_set_4(valp,vals[0]); mem_set_4(valp+4,vals[1]); mem_set_4(valp+16,vals[2]); mem_set_4(valp+24,vals[3]); trace_io_data_em("data",valp,16); mem_set_4(lenp,16); trace_io_data_em("output length",lenp,4); SYSCALL_SETRET(16); } return(1); } /* * Handle a vm.* sysctl. mib/miblen are the MIB (already copied out of * emulated memory), valp and lenp are the places (addresses in * emulated memory) to write the value and length, and rv is the SCRV * for the syscall. */ static int em_sysctl_vm(uint32_t *mib, int miblen, uint32_t valp, uint32_t lenp, SCRV *rv) { if (miblen != 1) return(0); switch (mib[0]) { case em_VM_LOADAVG: return(em_sc_vm_loadavg(valp,lenp,rv)); break; } return(0); } /* * Handle a kern.* sysctl. mib/miblen are the MIB (already copied out * of emulated memory), valp and lenp are the places (addresses in * emulated memory) to write the value and length, and rv is the SCRV * for the syscall. * * Arguably KERN_NGROUPS should return the 1.4T value, but if the * underlying OS doesn't support that many.... */ static int em_sysctl_kern(uint32_t *mib, int miblen, uint32_t valp, uint32_t lenp, SCRV *rv) { if (miblen != 1) return(0); switch (mib[0]) { case em_KERN_OSTYPE: return(em_sc_string(valp,lenp,"NetBSD",7,rv)); break; case em_KERN_OSRELEASE: return(em_sc_string(valp,lenp,"1.4T",5,rv)); break; case em_KERN_VERSION: return(em_sc_string(valp,lenp,"NetBSD 1.4T (GENERIC) #0: Mon Aug 13 23:49:34 EDT 2018\n mouse@Sparkle.Rodents-Montreal.ORG:/home/mouse/kbuild/GENERIC\n",122,rv)); break; case em_KERN_HOSTNAME: // Append "-SPARC" to make it a little easier to tell whether // I'm typing to the emulated host or the underlying host. { static char *hn; static int hnl = 0; int l; char *dot; if (hnl < 1) { hnl = 8; hn = malloc(hnl); } while (1) { gethostname(hn,hnl-1); hn[hnl-1] = '\0'; l = strlen(hn); if (l < hnl-6-1) break; hnl <<= 1; free(hn); hn = malloc(hnl); } dot = index(hn,'.'); if (dot) { bcopy(dot,dot+6,l+1-(dot-hn)); bcopy("-SPARC",dot,6); } else { bcopy("-SPARC",hn+l,6+1); } return(em_sc_string(valp,lenp,hn,l+6+1,rv)); } break; case em_KERN_NGROUPS: { static int ngroups = -1; if (ngroups < 0) { int osmib[2]; size_t osvalsize; osmib[0] = CTL_KERN; osmib[1] = KERN_NGROUPS; osvalsize = sizeof(ngroups); if (sysctl(&osmib[0],2,&ngroups,&osvalsize,0,0) < 0) { SYSCALL_SETERR(os2em_errno(errno)); return(1); } } return(em_sc_int(valp,lenp,ngroups,rv)); } break; case em_KERN_IOV_MAX: { static int iovmax = -1; if (iovmax < 0) { int osmib[2]; size_t osvalsize; osmib[0] = CTL_KERN; osmib[1] = KERN_IOV_MAX; osvalsize = sizeof(iovmax); if (sysctl(&osmib[0],2,&iovmax,&osvalsize,0,0) < 0) { SYSCALL_SETERR(os2em_errno(errno)); return(1); } } return(em_sc_int(valp,lenp,iovmax,rv)); } break; } return(0); } /* * Store an underlying-OS struct rusage (at ru) into an emulated struct * rusage (at buf). */ static void store_rusage(uint32_t buf, const struct rusage *ru) { mem_set_8(buf,ru->ru_utime.tv_sec); mem_set_4(buf+8,ru->ru_utime.tv_usec); mem_set_4(buf+12,0); // struct padding mem_set_8(buf+16,ru->ru_stime.tv_sec); mem_set_4(buf+24,ru->ru_stime.tv_usec); mem_set_4(buf+28,0); // struct padding mem_set_4(buf+32,ru->ru_maxrss); mem_set_4(buf+36,ru->ru_ixrss); mem_set_4(buf+40,ru->ru_idrss); mem_set_4(buf+44,ru->ru_isrss); mem_set_4(buf+48,ru->ru_minflt); mem_set_4(buf+52,ru->ru_majflt); mem_set_4(buf+56,ru->ru_nswap); mem_set_4(buf+60,ru->ru_inblock); mem_set_4(buf+64,ru->ru_oublock); mem_set_4(buf+68,ru->ru_msgsnd); mem_set_4(buf+72,ru->ru_msgrcv); mem_set_4(buf+76,ru->ru_nsignals); mem_set_4(buf+80,ru->ru_nvcsw); mem_set_4(buf+84,ru->ru_nivcsw); } /* * Store an underlying-OS struct stat (at stb) into an emulated struct * stat (at stp). */ static void store_stat(uint32_t stp, const struct stat *stb) { mem_set_4(stp,stb->st_dev); mem_set_4(stp+4,stb->st_ino); mem_set_4(stp+8,stb->st_mode); mem_set_4(stp+12,stb->st_nlink); mem_set_4(stp+16,stb->st_uid); mem_set_4(stp+20,stb->st_gid); mem_set_4(stp+24,stb->st_rdev); mem_set_4(stp+28,0); // padding mem_set_8(stp+32,stb->st_atimespec.tv_sec); mem_set_4(stp+40,stb->st_atimespec.tv_nsec); mem_set_4(stp+44,0); // padding mem_set_8(stp+48,stb->st_mtimespec.tv_sec); mem_set_4(stp+56,stb->st_mtimespec.tv_nsec); mem_set_4(stp+60,0); // padding mem_set_8(stp+64,stb->st_ctimespec.tv_sec); mem_set_4(stp+72,stb->st_ctimespec.tv_nsec); mem_set_4(stp+76,0); // padding mem_set_8(stp+80,stb->st_size); mem_set_8(stp+88,stb->st_blocks); mem_set_4(stp+96,stb->st_blksize); mem_set_4(stp+100,0); // XXX should be st_flags mem_set_4(stp+104,stb->st_gen); mem_set_4(stp+108,0); // padding mem_set_8(stp+112,0); // st_qspare[0] mem_set_8(stp+120,0); // st_qspare[1] } #if defined(STATFS_VIA_STATVFS) || defined(GETFSSTAT_VIA_GETVFSSTAT) /* * Store an underlying-OS struct statvfs (at sf) into an emulated * struct statfs (at buf). what and prefail are passed on to * copy_or_nulpad for the strings. */ static void store_statvfs_as_statfs(uint32_t buf, const struct statvfs *sf, const char *what, void (*prefail)(void *), void *pfarg) { uint32_t emflags; emflags = ((sf->f_flag & MNT_RDONLY) ? em_MNT_RDONLY : 0) | ((sf->f_flag & MNT_SYNCHRONOUS) ? em_MNT_SYNCHRONOUS : 0) | ((sf->f_flag & MNT_NOEXEC) ? em_MNT_NOEXEC : 0) | ((sf->f_flag & MNT_NOSUID) ? em_MNT_NOSUID : 0) | ((sf->f_flag & MNT_NODEV) ? em_MNT_NODEV : 0) | ((sf->f_flag & MNT_UNION) ? em_MNT_UNION : 0) | ((sf->f_flag & MNT_ASYNC) ? em_MNT_ASYNC : 0) | ((sf->f_flag & MNT_EXRDONLY) ? em_MNT_EXRDONLY : 0) | ((sf->f_flag & MNT_EXPORTED) ? em_MNT_EXPORTED : 0) | ((sf->f_flag & MNT_DEFEXPORTED) ? em_MNT_DEFEXPORTED : 0) | ((sf->f_flag & MNT_EXPORTANON) ? em_MNT_EXPORTANON : 0) | ((sf->f_flag & MNT_EXKERB) ? em_MNT_EXKERB : 0) | ((sf->f_flag & MNT_LOCAL) ? em_MNT_LOCAL : 0) | ((sf->f_flag & MNT_QUOTA) ? em_MNT_QUOTA : 0) | ((sf->f_flag & MNT_ROOTFS) ? em_MNT_ROOTFS : 0) | ((sf->f_flag & MNT_NOCOREDUMP) ? em_MNT_NOCOREDUMP : 0) | ((sf->f_flag & MNT_NOATIME) ? em_MNT_NOATIME : 0) | ((sf->f_flag & MNT_EXNORESPORT) ? em_MNT_EXNORESPORT : 0) | ((sf->f_flag & MNT_EXPUBLIC) ? em_MNT_EXPUBLIC : 0) | ((sf->f_flag & MNT_SYMPERM) ? em_MNT_SYMPERM : 0) | ((sf->f_flag & MNT_NODEVMTIME) ? em_MNT_NODEVMTIME : 0) | ((sf->f_flag & MNT_SOFTDEP) ? em_MNT_SOFTDEP : 0); mem_set_2(buf,0); mem_set_2(buf+2,emflags&0xffff); mem_set_4(buf+4,sf->f_bsize); mem_set_4(buf+8,sf->f_iosize); mem_set_4(buf+12,sf->f_blocks); mem_set_4(buf+16,sf->f_bfree); mem_set_4(buf+20,sf->f_bavail); mem_set_4(buf+24,sf->f_files); mem_set_4(buf+28,sf->f_ffree); mem_set_4(buf+32,sf->f_fsidx.__fsid_val[0]); mem_set_4(buf+36,sf->f_fsidx.__fsid_val[1]); mem_set_4(buf+40,sf->f_owner); mem_set_4(buf+44,emflags); mem_set_4(buf+48,sf->f_syncwrites); mem_set_4(buf+52,sf->f_asyncwrites); mem_set_4(buf+56,0); copy_or_nulpad(&sf->f_fstypename[0],sizeof(sf->f_fstypename),buf+60,em_MFSNAMELEN,what,prefail,pfarg); copy_or_nulpad(&sf->f_mntonname[0],sizeof(sf->f_mntonname),buf+76,em_MNAMELEN,what,prefail,pfarg); copy_or_nulpad(&sf->f_mntfromname[0],sizeof(sf->f_mntfromname),buf+166,em_MNAMELEN,what,prefail,pfarg); } #endif #if !defined(STATFS_VIA_STATVFS) || !defined(GETFSSTAT_VIA_GETVFSSTAT) /* * Store an underlying-OS struct statfs (at sf) into an emulated struct * statfs (at buf). what and prefail are passed on to copy_or_nulpad * for the strings. */ static void store_statfs(uint32_t buf, const struct statfs *sf, const char *what, void (*prefail)(void)) { uint32_t emflags; emflags = ((sf->f_flags & MNT_RDONLY) ? em_MNT_RDONLY : 0) | ((sf->f_flags & MNT_SYNCHRONOUS) ? em_MNT_SYNCHRONOUS : 0) | ((sf->f_flags & MNT_NOEXEC) ? em_MNT_NOEXEC : 0) | ((sf->f_flags & MNT_NOSUID) ? em_MNT_NOSUID : 0) | ((sf->f_flags & MNT_NODEV) ? em_MNT_NODEV : 0) | ((sf->f_flags & MNT_UNION) ? em_MNT_UNION : 0) | ((sf->f_flags & MNT_ASYNC) ? em_MNT_ASYNC : 0) | ((sf->f_flags & MNT_EXRDONLY) ? em_MNT_EXRDONLY : 0) | ((sf->f_flags & MNT_EXPORTED) ? em_MNT_EXPORTED : 0) | ((sf->f_flags & MNT_DEFEXPORTED) ? em_MNT_DEFEXPORTED : 0) | ((sf->f_flags & MNT_EXPORTANON) ? em_MNT_EXPORTANON : 0) | ((sf->f_flags & MNT_EXKERB) ? em_MNT_EXKERB : 0) | ((sf->f_flags & MNT_LOCAL) ? em_MNT_LOCAL : 0) | ((sf->f_flags & MNT_QUOTA) ? em_MNT_QUOTA : 0) | ((sf->f_flags & MNT_ROOTFS) ? em_MNT_ROOTFS : 0) | ((sf->f_flags & MNT_NOCOREDUMP) ? em_MNT_NOCOREDUMP : 0) | ((sf->f_flags & MNT_NOATIME) ? em_MNT_NOATIME : 0) | ((sf->f_flags & MNT_EXNORESPORT) ? em_MNT_EXNORESPORT : 0) | ((sf->f_flags & MNT_EXPUBLIC) ? em_MNT_EXPUBLIC : 0) | ((sf->f_flags & MNT_SYMPERM) ? em_MNT_SYMPERM : 0) | ((sf->f_flags & MNT_NODEVMTIME) ? em_MNT_NODEVMTIME : 0) | ((sf->f_flags & MNT_SOFTDEP) ? em_MNT_SOFTDEP : 0); mem_set_2(buf,0); mem_set_2(buf+2,emflags&0xffff); mem_set_4(buf+4,sf->f_bsize); mem_set_4(buf+8,sf->f_iosize); mem_set_4(buf+12,sf->f_blocks); mem_set_4(buf+16,sf->f_bfree); mem_set_4(buf+20,sf->f_bavail); mem_set_4(buf+24,sf->f_files); mem_set_4(buf+28,sf->f_ffree); mem_set_4(buf+32,sf->f_fsid.val[0]); mem_set_4(buf+36,sf->f_fsid.val[1]); mem_set_4(buf+40,sf->f_owner); mem_set_4(buf+44,emflags); mem_set_4(buf+48,sf->f_syncwrites); mem_set_4(buf+52,sf->f_asyncwrites); mem_set_4(buf+56,0); copy_or_nulpad(&sf->f_fstypename[0],sizeof(sf->f_fstypename),buf+60,em_MFSNAMELEN,what,prefail); copy_or_nulpad(&sf->f_mntonname[0],sizeof(sf->f_mntonname),buf+76,em_MNAMELEN,what,prefail); copy_or_nulpad(&sf->f_mntfromname[0],sizeof(sf->f_mntfromname),buf+166,em_MNAMELEN,what,prefail); } #endif /* * Store an underlying-OS struct rlimit (at rl) into an emulated struct * rlimit (at buf). */ static void store_rlimit(uint32_t buf, const struct rlimit *rl) { uint64_t v; v = (rl->rlim_cur == RLIM_INFINITY) ? em_RLIM_INFINITY : rl->rlim_cur; mem_set_8(buf,v); v = (rl->rlim_max == RLIM_INFINITY) ? em_RLIM_INFINITY : rl->rlim_max; mem_set_8(buf+8,v); } /* * Load an underlying-OS struct rlimit (at rl) from an emulated struct * rlimit (at buf). */ static void load_rlimit(uint32_t buf, struct rlimit *rl) { uint64_t v; v = mem_get_8(buf); rl->rlim_cur = (v == em_RLIM_INFINITY) ? RLIM_INFINITY : v; v = mem_get_8(buf+8); rl->rlim_max = (v == em_RLIM_INFINITY) ? RLIM_INFINITY : v; } /* * Handle common code for file descriptor syscall arguments. arg is * the argument value, as returned by (eg) scarg(). prot is either 0, * meaning that no protection check should be done, or P_R or P_W, * meaning that the descriptor has to be readable or writable. call * is the text name of the syscall, for potential error messages. */ static FD *descriptor_arg(uint32_t arg, unsigned int prot, const char *call) { FD *fd; if (arg >= nfds) { trc(TRC_SYSCALL,"%s fd %lu out of range -> EBADF\n",call,(ULI)arg); return(0); } fd = fds[arg]; if (! fd) { trc(TRC_SYSCALL,"%s fd %lu not open -> EBADF\n",call,(ULI)arg); return(0); } if (prot && !(fd->prot & prot)) { const char *pkind; switch (prot) { case P_R: pkind = "readable"; break; case P_W: pkind = "writable"; break; default: panic("bad prot to descriptor_arg"); break; } trc(TRC_SYSCALL,"%s fd %lu not %s -> EBADF\n",call,(ULI)arg,pkind); return(0); } return(fd); } /* * Do a forkwait-style loop. * * We could leave the body of the loop empty, but that would burn CPU * unnecessarily. 1/10 second per iteration is long enough that we're * not hogging CPU but short enough that humans don't get impatient. */ static void do_forkwait(void) { volatile sig_atomic_t v; v = 1; while (v) poll(0,0,100); } /* * Do post-exec() signal-handling stuff. This mostly means resetting * caught signals to SIG_DFL. */ static void sig_postexec(void) { int i; for (i=em__NSIG-1;i>=1;i--) { switch (s.sigh[i].handler) { case em_SIG_IGN: case em_SIG_DFL: break; default: trc(TRC_SIGNAL,"postexec resetting signal %d (%s) to SIG_DFL\n",i,em_signame(i,"unknown")); s.sigh[i].handler = em_SIG_DFL; bzero(&s.sigh[i].mask,sizeof(s.sigh[i].mask)); s.sigh[i].flags = 0; if (sigdef[i] == SIGDEF_IGNORE) { s.ignsigs |= 1ULL << i; set_our_catcher(em2os_signal(i),SIG_DFL); s.sigpend[i] = 0; } break; } s.sigstack_enabled = 0; s.sigstack_base = 0; s.sigstack_size = 0; } } /* * Print an emulated-OS signal mask. Note that NetBSD/sparc 1.4T * signal masks are shifted by one bit, so that the low bit * corresponds to signal 1, not (nonexistent) signal 0. */ static void print_em_sig_mask(FILE *f, const EMSIGSET *mask) { const char *pref; int j; pref = ""; for (j=0;jbits[j>>5] >> (j & 31)) & 1) { const char *n; n = em_signame(j+1,0); if (n) fprintf(f,"%s%s",pref,n); else fprintf(f,"%s?%d",pref,j+1); pref = "|"; } } if (! pref[0]) fprintf(f,"0"); } /* * Fetch a struct sockaddr out of emulator memory, converting it to an * emulator-OS struct sockaddr. * * Arguments are the pointer and length values, usually syscall * arguments (eg, the second and third arguments to connect(2)). * * Return value is a struct containing: * * err * If this is zero, it worked, and the rest of the fields * are valid. If this is nonzero, it is an emulator * errno, and the rest of the fields are garbage. * emlen * The sockaddr length field from the emulated struct. * emfam * The sockaddr family field from the emulated struct. * sa * Pointer (in emulator memory) to malloc()ed space * holding the emulator version of the struct sockaddr. * salen * Size of the emulator version of the struct sockaddr. */ typedef struct { int err; uint8_t emlen; uint8_t emfam; void *sa; int salen; } GETSA; static GETSA get_sockaddr(uint32_t addr, uint32_t len) { GETSA r; int i; if ((len < 2) || (len > 255)) return((GETSA){.err=em_EADDRNOTAVAIL}); r.emlen = mem_get_1(addr); r.emfam = mem_get_1(addr+1); switch (r.emfam) { case em_AF_INET: { struct sockaddr_in *p; if (r.emlen != 16) return((GETSA){.err=em_EINVAL}); p = malloc(sizeof(*p)); bzero(p,sizeof(*p)); // XXX API botch p->sin_len = sizeof(*p); p->sin_family = AF_INET; p->sin_port = htons(mem_get_2(addr+2)); for (i=0;i<4;i++) ((unsigned char *)&p->sin_addr)[i] = mem_get_1(addr+4+i); r.sa = p; r.salen = sizeof(*p); } break; case em_AF_INET6: { struct sockaddr_in6 *p; if (r.emlen != 28) return((GETSA){.err=em_EINVAL}); p = malloc(sizeof(*p)); bzero(p,sizeof(*p)); // XXX API botch still needed for v6? p->sin6_len = sizeof(*p); p->sin6_family = AF_INET; p->sin6_port = htons(mem_get_2(addr+2)); // XXX Does sin6_flowinfo need byteswapping? p->sin6_flowinfo = mem_get_4(addr+4); for (i=0;i<16;i++) ((unsigned char *)&p->sin6_addr)[i] = mem_get_1(addr+8+i); // XXX Does sin6_scope_id need byteswapping? p->sin6_scope_id = mem_get_4(addr+24); r.sa = p; r.salen = sizeof(*p); } break; case em_AF_LOCAL: { struct sockaddr_un *p; int i; p = malloc(256+offsetof(struct sockaddr_un,sun_path)); p->sun_len = r.emlen - 2 + offsetof(struct sockaddr_un,sun_path); p->sun_family = AF_LOCAL; for (i=2;isun_path[i-2] = mem_get_1(addr+i); r.sa = p; r.salen = r.emlen - 2 + offsetof(struct sockaddr_un,sun_path); } break; default: return((GETSA){.err=em_EADDRNOTAVAIL}); break; } r.err = 0; return(r); } /* * Take an emulator-OS struct sockaddr and convert it to an emulated-OS * struct sockaddr, storing it into emulated memory. * * Arguments are the sockaddr pointer and length values from the * emulator OS (eg, as returned by getsockname) and the sockaddr * pointer and length pointer values in the emulated machine (eg, as * passed to emulated getsockname). The fifth argument, if non-nil, * is a pointer through which the value stored through emlenp is * written, so our caller can get that length without having to pull * it out of emulated memory (which would involve brittle assumptions * about the order of our writes in cases of overlap). * * Return value is normally zero; if it is nonzero, it is an emulator * errno describing the error. */ static uint32_t put_sockaddr(const void *ossa, int oslen, uint32_t emsa, uint32_t emlenp, uint32_t *plenp) { int i; uint32_t emlen; if (emlenp == 0) return(0); emlen = mem_get_4(emlenp); if (emlen == 0) return(0); switch (((const struct sockaddr *)ossa)->sa_family) { case AF_LOCAL: { const struct sockaddr_un *p; int pl; p = ossa; pl = ((oslen < p->sun_len) ? oslen : p->sun_len) - offsetof(struct sockaddr_un,sun_path); if (pl < 0) { printf("%s: oslen = %d < offsetof(struct sockaddr_un,sun_path) = %d\n",__func__,oslen,(int)offsetof(struct sockaddr_un,sun_path)); top(); } mem_set_1(emsa,pl+2); if (emlen > 1) { mem_set_1(emsa+1,em_AF_LOCAL); if (pl > emlen-2) pl = emlen - 2; for (i=0;isun_path[i]); emlen = pl + 2; } mem_set_4(emlenp,emlen); if (plenp) *plenp = emlen; } break; case AF_INET: { const struct sockaddr_in *p; uint8_t b[16]; if (oslen < sizeof(struct sockaddr_in)) { printf("%s: oslen = %d < sizeof(struct sockaddr_in) = %d\n",__func__,oslen,(int)sizeof(struct sockaddr_in)); top(); } p = ossa; b[0] = 16; b[1] = em_AF_INET; b[2] = ((const uint8_t *)&p->sin_port)[0]; b[3] = ((const uint8_t *)&p->sin_port)[1]; b[4] = ((const uint8_t *)&p->sin_addr)[0]; b[5] = ((const uint8_t *)&p->sin_addr)[1]; b[6] = ((const uint8_t *)&p->sin_addr)[2]; b[7] = ((const uint8_t *)&p->sin_addr)[3]; bzero(&b[8],8); i = (emlen < 16) ? emlen : 16; copyout(&b[0],emsa,i,"struct sockaddr_in",0,0); mem_set_4(emlenp,i); if (plenp) *plenp = i; } break; case AF_INET6: { const struct sockaddr_in6 *p; uint8_t b[28]; if (oslen < sizeof(struct sockaddr_in6)) { printf("%s: oslen = %d < sizeof(struct sockaddr_in6) = %d\n",__func__,oslen,(int)sizeof(struct sockaddr_in6)); top(); } p = ossa; b[0] = 28; b[1] = em_AF_INET6; b[2] = ((const uint8_t *)&p->sin6_port)[0]; b[3] = ((const uint8_t *)&p->sin6_port)[1]; // XXX Does sin6_flowinfo need byteswapping? b[4] = (p->sin6_flowinfo >> 24) & 0xff; b[5] = (p->sin6_flowinfo >> 16) & 0xff; b[6] = (p->sin6_flowinfo >> 8) & 0xff; b[7] = p->sin6_flowinfo & 0xff; bcopy(&p->sin6_addr,&b[8],16); // XXX Does sin6_scope_id need byteswapping? b[24] = (p->sin6_scope_id >> 24) & 0xff; b[25] = (p->sin6_scope_id >> 16) & 0xff; b[26] = (p->sin6_scope_id >> 8) & 0xff; b[27] = p->sin6_scope_id & 0xff; i = (emlen < 28) ? emlen : 28; copyout(&b[0],emsa,i,"struct sockaddr_in6",0,0); mem_set_4(emlenp,i); if (plenp) *plenp = i; } break; default: return(em_EAFNOSUPPORT); break; } return(0); } /* * Dump out the current memory map in a human-readable form. The vm * is printed in linked-list order, so it usually should be sorted * before calling this. * * XXX We should print some indication of each MEMSEG's type and maybe * how it arose. */ static void dump_vm(FILE *to) { MEMSEG *ms; fprintf(to,"base size end prot (brk at %08lx)\n",(ULI)vm.dbrk); for (ms=vm.m;ms;ms=ms->link) { fprintf(to,"%08lx %08lx %08lx %c%c%c ", (ULI)ms->base, (ULI)ms->size, (ULI)ms->end, (ms->prot&P_R)?'R':'-', (ms->prot&P_W)?'W':'-', (ms->prot&P_X)?'X':'-'); (*ms->ops->desc)(ms,to); fprintf(to,"\n"); } } /* * A malloc implementation for the emulated program that makes it * impossible for the program to corrupt the arena, because the * bookkeeping data isn't even in its address space - and with true * redzones, not just data the corruption of which is only probably * detected. * * The emu_*() routines are called when the emulated machine executes a * trap instruction with the magic trap number appropriate to the * routine in question. They expect to find their arguments in the %o * registers (ie, the trap occurs in a leaf-routine context). */ /* * Find - creating if necessary - the emulated malloc arena. */ static MEMSEG *malloc_arena(void) { MEMSEG *m; MEMSEG_PRIV_ARENA *a; MALBLOCK *b; for (m=vm.m;m;m=m->link) if (m->ops == &memseg_ops_arena) return(m); trc(TRC_ARENA,"creating malloc arena\n"); m = memseg_new_arena(); if (memseg_check_conflict(m->base,m->size,m)) { printf("malloc arena conflicts with existing space somehow\n"); dump_vm(stdout); top(); } a = m->priv; b = malloc(sizeof(MALBLOCK)); b->kind = MBK_FREE; b->l = 0; b->r = 0; b->base = m->base; b->size = m->size; b->end = m->end; a->free = b; return(m); } /* * Rebalance an AVL (sub)tree after an insertion or deletion. *pp is * the root of the (sub)tree and pptr is the correct thing to put in * the u field of an element that replaces *pp. The tree must be * self-consistent; that is, the only permissible violation of the AVL * invariants is that pp[0]->bal may be 2 or -2. * * Return value is true if the result is unbalanced (by 1, necessarily) * or 0 if it's balanced. * * See rebalance.txt if you're not familiar with AVL tree rebalancing. * The case numbers in comments below refer the cases listed there. */ static int arena_rebalance(MALBLOCK **pp, MALBLOCK *pptr) { MALBLOCK *p; MALBLOCK *f; MALBLOCK *b; MALBLOCK *c; p = *pp; if (pptr != p->u) panic("pptr wrong"); switch (p->bal) { case 0: return(0); break; case -1: case 1: return(1); break; case -2: if (p->l->bal <= 0) { // case 1 p->bal = -1 - p->l->bal; p->l->bal ++; *pp = p->l; p->l->u = pptr; f = p->l->r; p->l->r = p; p->u = p->l; p->l = f; if (f) f->u = p; if (p->bal) return(1); } else { // case 2 f = p->l->r; b = f->l; c = f->r; *pp = f; f->u = pptr; f->l = p->l; f->l->u = f; f->r = p; p->u = f; f->l->r = b; if (b) b->u = f->l; p->l = c; if (c) c->u = p; f->l->bal = (f->bal > 0) ? -1 : 0; f->r->bal = (f->bal < 0) ? 1 : 0; f->bal = 0; } break; case 2: if (p->r->bal >= 0) { // case 3 p->bal = 1 - p->r->bal; p->r->bal --; *pp = p->r; p->r->u = pptr; f = p->r->l; p->r->l = p; p->u = p->r; p->r = f; if (f) f->u = p; if (p->bal) return(1); } else { // case 4 f = p->r->l; b = f->r; c = f->l; *pp = f; f->u = pptr; f->r = p->r; f->r->u = f; f->l = p; p->u = f; f->r->l = b; if (b) b->u = f->r; p->r = c; if (c) c->u = p; f->r->bal = (f->bal < 0) ? 1 : 0; f->l->bal = (f->bal > 0) ? -1 : 0; f->bal = 0; } break; default: panic("impossible rebalance"); break; } return(0); } /* * Insert b into the tree rooted at *pp. u is the correct thing to put * into the u pointer of a MALBLOCK stored into *pp. Return value is * true if the tree deepened, false if the new MALBLOCK was absorbed * without deepening. This code knows that u cannot be nil unless *pp * also is (though the converse is not true). */ static int arena_insert(MALBLOCK **pp, MALBLOCK *b, MALBLOCK *u) { MALBLOCK *p; p = *pp; if (! p) { *pp = b; b->u = u; return(1); } if (b->rz2 <= p->rz1) { if (arena_insert(&p->l,b,p)) { p->bal --; return(arena_rebalance(pp,u)); } } else if (b->rz1 >= p->rz2) { if (arena_insert(&p->r,b,p)) { p->bal ++; return(arena_rebalance(pp,u)); } } else { printf("corrupt arena: new block [%08lx..%08lx) overlaps existing block [%08lx..%08lx)\n",(ULI)b->rz1,(ULI)b->rz2,(ULI)p->rz1,(ULI)p->rz2); top(); } return(0); } /* * Add a new MBK_LIVE MALBLOCK to the arena. */ static void arena_add_live(MEMSEG_PRIV_ARENA *a, MALBLOCK *b) { b->bal = 0; b->l = 0; b->r = 0; arena_insert(&a->live,b,0); } /* * Remove an MBK_LIVE MALBLOCK from the arena. */ static void arena_remove_live(MEMSEG_PRIV_ARENA *a, MALBLOCK *b) { MALBLOCK *p; MALBLOCK *l; MALBLOCK *r; MALBLOCK **pp; int dr; MALBLOCK *s; p = b->u; l = b->l; r = b->r; pp = p ? (p->l == b) ? &p->l : &p->r : &a->live; dr = p ? (p->l == b) ? 1 : -1 : 0; if (! b->r) { if (! b->l) { *pp = 0; } else { b->l->u = p; *pp = b->l; } } else if (! b->l) { b->r->u = p; *pp = b->r; } else if (! b->r->l) { b->r->l = b->l; b->l->u = b->r; b->r->u = p; *pp = b->r; p = b->r; p->bal = b->bal; dr = -1; } else { s = b->r; while (s->l) s = s->l; s->u->l = s->r; if (s->r) s->r->u = s->u; s->l = b->l; b->l->u = s; s->r = b->r; b->r->u = s; s->bal = b->bal; b = s->u; s->u = p; *pp = s; p = b; dr = 1; } if (p) { p->bal += dr; while <"delrebal"> (1) { switch (p->bal) { case 0: if (p->u) { p->u->bal += (p == p->u->l) ? 1 : -1; p = p->u; continue; } break <"delrebal">; case -1: case 1: break <"delrebal">; case -2: case 2: s = p->u; if (s) { dr = s->bal; s->bal += (p == s->l) ? 1 : -1; if (arena_rebalance((p==s->l)?&s->l:&s->r,s)) { s->bal = dr; break <"delrebal">; } p = s; continue; } arena_rebalance(&a->live,0); break <"delrebal">; default: panic("impossible delete balance"); break; } } } } /* * The guts of emulated malloc(), factored out because realloc() also * wants to call it. Assumes size has already been checked against * ARENA_SIZE. If size is zero, returns an allocation that is nothing * but redzones. */ static MALBLOCK *emu_malloc_internal(MEMSEG_PRIV_ARENA *a, uint32_t size) { uint32_t want; MALBLOCK *f; MALBLOCK *b; want = size + (2 * REDZONE); do <"found"> { for (f=a->free;f;f=f->r) if (f->size >= want) break <"found">; trc(TRC_ARENA,"no free block is large enough, failing\n"); return(0); } while (0); if (f->size < want + REDZONE + ALLOC_GRAIN + REDZONE) { trc(TRC_ARENA,"using whole free block: %08lx at %08lx\n",(ULI)f->size,(ULI)f->base); if (f->r) f->r->l = f->l; if (f->l) f->l->r = f->r; else a->free = f->r; b = f; } else { trc(TRC_ARENA,"found free block: %08lx at %08lx\n",(ULI)f->size,(ULI)f->base); b = malloc(sizeof(MALBLOCK)); } b->kind = MBK_LIVE; b->rz1 = f->base; b->base = b->rz1 + REDZONE; b->size = size; b->end = b->base + size; b->rz2 = (b->end + REDZONE + ALLOC_GRAIN - 1) & ~(uint32_t)(ALLOC_GRAIN-1); if (f != b) { f->size -= b->rz2 - b->rz1; f->base = b->rz2; } arena_add_live(a,b); trc(TRC_ARENA,"returning %08lx (internal %p)\n",(ULI)b->base,(void *)b); return(b); } /* * The guts of emulated free(), factored out because realloc() also * wants to call it. */ static void emu_free_internal(MEMSEG_PRIV_ARENA *a, MALBLOCK *b) { arena_remove_live(a,b); b->kind = MBK_OLD; b->l = 0; b->r = a->old; if (b->r) b->r->l = b; a->old = b; } /* * Emulated malloc(). Input size is in %o0, return value replaces it. */ #if ALLOC_GRAIN & (ALLOC_GRAIN-1) #error "emu_malloc code assumes ALLOC_GRAIN is a power of two" #endif static void emu_malloc(void) { uint32_t size; MALBLOCK *b; size = s.regs[R_O0]; trc(TRC_ARENA,"malloc(%lu)\n",(ULI)size); if (size > ARENA_SIZE-(2*REDZONE)) { trc(TRC_ARENA,"huge allocation, failing\n"); s.regs[R_O0] = 0; return; } b = emu_malloc_internal(malloc_arena()->priv,size); s.regs[R_O0] = b ? b->base : 0; } /* * Emulated free(). Input block pointer is in %o0. */ static void emu_free(void) { MEMSEG *arena; MEMSEG_PRIV_ARENA *a; uint32_t eb; MALBLOCK *b; eb = s.regs[R_O0]; trc(TRC_ARENA,"free(%08lx)\n",(ULI)eb); if (! eb) return; arena = malloc_arena(); if ((eb < arena->base) || (eb >= arena->end)) { trc(TRC_ARENA,"outside arena\n"); printf("wild free(%08lx)\n",(ULI)eb); return; } a = arena->priv; b = arena_find_live(a,eb); if (! b) { trc(TRC_ARENA,"no block found\n"); printf("unfound free(%08lx)\n",(ULI)eb); return; } trc(TRC_ARENA,"found, internal %p\n",(void *)b); emu_free_internal(a,b); } /* * Emulated realloc(). Input block pointer is in %o0, new size in %o1. */ static void emu_realloc(void) { MEMSEG *arena; MEMSEG_PRIV_ARENA *a; uint32_t blk; uint32_t siz; MALBLOCK *b; MALBLOCK *n; uint32_t c; blk = s.regs[R_O0]; siz = s.regs[R_O1]; trc(TRC_ARENA,"realloc(%08lx,%08lx)\n",(ULI)blk,(ULI)siz); arena = malloc_arena(); a = arena->priv; if (blk && ((blk < arena->base) || (blk >= arena->end))) { trc(TRC_ARENA,"outside arena\n"); printf("wild realloc(%08lx)\n",(ULI)blk); return; } if (blk) { b = arena_find_live(a,blk); if (! b) { trc(TRC_ARENA,"no block found\n"); printf("unfound realloc(%08lx)\n",(ULI)blk); s.regs[R_O0] = 0; return; } } else { b = 0; } n = emu_malloc_internal(a,siz); if (! b) { s.regs[R_O0] = 0; return; } if (b) { c = (siz < b->size) ? siz : b->size; if (c > 0) bcopy(&arena->data[b->base-arena->base],&arena->data[n->base-arena->base],c); emu_free_internal(a,b); } s.regs[R_O0] = n->base; } /* * Emulated calloc(). Input numbers are in %o0 and %o1. */ static void emu_calloc(void) { MEMSEG *arena; uint32_t sz1; uint32_t sz2; uint64_t size; MALBLOCK *b; sz1 = s.regs[R_O0]; sz2 = s.regs[R_O1]; trc(TRC_ARENA,"calloc(%08lx,%08lx)\n",(ULI)sz1,(ULI)sz2); size = sz1 * (uint64_t)sz2; if (size > ARENA_SIZE-(2*REDZONE)) { trc(TRC_ARENA,"huge allocation, failing\n"); s.regs[R_O0] = 0; return; } arena = malloc_arena(); b = emu_malloc_internal(arena->priv,size); if (b) bzero(&arena->data[b->base-arena->base],size); s.regs[R_O0] = b ? b->base : 0; } /* * Native exec: used to run a program in the underlying OS. Most * useful for programs like nc or copytolog for which the * functionality is what matters and which OS they run under is more * or less irrelevant. (Of course, doing this reduces the ability to * test the emulator. That's inevitable.) * * Unlike execve(), this returns an errno on failure. (On success, of * course, it doesn't return at all.) * * This is simpler than sc_execve because we don't have the same vfork * headaches; if we're in a vforked child, cleanup is automatic upon * exec(). * * We do, though, need to do a file-descriptor dance, to put * underlying-OS file descriptors in the places the emulated program * thinks they are, in case we are surrounded by redirections. There * are three kinds of file descriptors: * (A) File descriptors that are open in the emulated world and * are not marked close-on-exec. For these, we need to * arrange for the underlying fd to be at the emulated * descriptor number if the exec succeeds. * (B) File descriptors that are open in the emulated world and * are marked close-on-exec. For these, we need to arrange * for the underlying descriptor to be closed if the exec * succeeds. * (C) File descriptors that are open in the emulator but that do * not exist in the emulated world. For these, we need to * arrange for the underlying descriptor to be closed if the * exec succeeds. * In all cases, everything should remain untouched if the exec fails. * We run with emulator-world close-on-exec clear on everything, so we * don't, for example, need to save the current state of close-on-exec * on emulator-world descriptors. * * We have to be careful to not, for example, call trc() while we * potentially have the trace-manager communication descriptor moved * somewhere else. */ static void native_exec(void) { #if 0 typedef struct dmove DMOVE; struct dmove { int from; int to; } ; uint32_t eav; uint32_t eep; int nargv; int nenvp; char *path; char **argv; char **envp; NULTERM_STATUS nts_path; NULTERM_STATUS *nts_av; NULTERM_STATUS *nts_ep; int i; path = nulterm_scarg(s.regs[R_O0],&nts_path); eav = s.regs[R_O1]; eep = s.regs[R_O2]; for (nargv=0;mem_get_4(eav+(nargv<<2));nargv++) ; for (nenvp=0;mem_get_4(eep+(nenvp<<2));nenvp++) ; trc(TRC_EXEC,"%s: path %s nargv %d nenvp %d\n",__func__,path,nargv,nenvp); argv = malloc((nargv+1)*sizeof(char *)); envp = malloc((nenvp+1)*sizeof(char *)); nts_av = malloc(nargv*sizeof(NULTERM_STATUS)); nts_ep = malloc(nenvp*sizeof(NULTERM_STATUS)); argv[nargv] = 0; for (i=0;i=0;i--) { if (fds[i]) { if (i > emaxfd) emaxfd = i; efds[i] = (fds[i]->flags & FDF_CLEX) ? 2 : 1; } else { efds[i] = 0; } } /* * Scan all emulator-world fds, recording which are actually open. */ omaxfd = fcntl(0,F_MAXFD,0); ofds = malloc(omaxfd+1); for (i=omaxfd;i>=0;i--) { ofds[i] = ! ((fcntl(i,F_GETFD,0) == -1) && (errno == EBADF)); } /* * Find emulated-world fds which we want to keep open, but which * aren't already at the correct emulator-world descriptor. These * are descriptors we need to move. Also figure out which of them * have something else there in the emulator already; these need * saving elsewhere. */ ne2o = 0; nsave = 0; e2o_move = 0; ae2o = 0; save_move = 0; asave = 0; spare = -1; for (i=nfds-1;i>=0;i--) { if ((efds[i] == 1) && (fds[i]->fd != i)) { if (ne2o >= ae2o) e2o_move = realloc(e2o_move,(ae2o=ne2o+8)*sizeof(DMOVE)); e2o_move[ne2o] = (DMOVE) { .from = fds[i]->fd, .to = i }; ne2o ++; if ((i <= omaxfd) && ofds[i]) { if (nsave >= save) save_moved = reallco(save_moved,(asave=nsave+8)*sizeof(DMOVE)); do spare ++; while (((spare < nfds) && efds[spare]) || ((spare <= omaxfd) && ofds[spare])); save_moved[nsave] = (DMOVE) { .from = i, .to = spare }; nsave ++; } } } /* * Do all stashing of descriptors we need to. */ for (i=nsave-1;i>=0;i--) { if (dup2(save_moved[i].from,save_moved[i].to) < 0) { /* * Aak, we can't save it! Record errno, close any stashed * copies we've already made, and fail. We don't need to dup2 * back because all saved copies are to unused descriptors; the * original descriptors are all still open. We also don't need * to fiddle CLEX because we set CLEX on the copy, not the * original. */ e = errno; for (i++;i=0;i--) { if (dup2(e2o_moved[i].from,e2o_moved[i].to) < 0) { /* * Aak! This should never happen. The only way I can see it * happening is if we're moving to a high descriptor that calls * for expanding the fd array and that expansion failed. In * any case, save errno, back out what we've done, and fail. * That's unlikely enough I'm willing to crash the emulator in * this case. */ fprintf(stderr,"Impossible dup2() failure rearranging descriptors: %s\n",strerror(errno)); exit(1); } } #else s.regs[R_O0] = em_ENOSYS; #endif } /* * Do I/O: construct a struct iovec array for the emulated buffer(s), * breaking it up at MEMSEG boundaries as necessary, and do the I/O. * * The way this tests accessibility is not quite right. I think you * can, for example, read() into a buffer of which only the beginning * is accessible, provided the data read doesn't actually spill over * into the inaccessible part. This errors if any part of the * provided buffer is inaccessible. But so far this seems to be good * enough in practice. */ static int io_rw(int niov, IOV (*getiov)(int, void *), int prot, int (*doio)(struct iovec *, int, void *), void *priv, const char *call) { static int iov_a = 0; static struct iovec *iov_v = 0; int iov_n; int i; IOV iov; MEMSEG *ms; uint32_t part; iov_n = 0; for (i=0;i 0) { ms = memseg_find(iov.base,0,call); if (! (ms->prot & prot)) { printf("%d: %s: %08lx: not accessible\n",mypid,call,(ULI)iov.base); trc(TRC_ERR,"%s: %08lx: not accessible\n",call,(ULI)iov.base); top(); } part = ms->end - iov.base; if (part > iov.len) part = iov.len; (*ms->ops->check)(ms,iov.base-ms->base,part,prot); if (iov_n >= iov_a) iov_v = realloc(iov_v,(iov_a=iov_n+8)*sizeof(*iov_v)); iov_v[iov_n++] = (struct iovec) { .iov_base = ms->data + (iov.base - ms->base), .iov_len = part }; iov.base += part; iov.len -= part; } } return((*doio)(iov_v,iov_n,priv)); } /* * Internal to sc_{,p}{read,write}: get the IOV. */ static IOV getiov_rw(int n, void *pv) { if (n) panic("impossible"); return(((IO_PRIV_RW *)pv)->iov); } /* * Internal to sc_{,p}{read,write}v: get an IOV. */ static IOV getiov_rwv(int n, void *pv) { return((IOV){.base=((IO_PRIV_RWV *)pv)->iov[n][0],.len=((IO_PRIV_RWV *)pv)->iov[n][1]}); } /* * Internal to sc_read and sc_readv: do the read. */ static int doio_read(struct iovec *iov, int niov, void *pv) { return(readv(((IO_PRIV_RW *)pv)->fd->fd,iov,niov)); } /* * Internal to sc_pread and sc_preadv: do the read. */ static int doio_pread(struct iovec *iov, int niov, void *pv) { return(preadv(((IO_PRIV_RW *)pv)->fd->fd,iov,niov,((IO_PRIV_RW *)pv)->off)); } /* * Internal to sc_write and sc_writev: do the write. */ static int doio_write(struct iovec *iov, int niov, void *pv) { return(writev(((IO_PRIV_RW *)pv)->fd->fd,iov,niov)); } /* * Internal to sc_pwrite and sc_pwritev: do the write. */ static int doio_pwrite(struct iovec *iov, int niov, void *pv) { return(pwritev(((IO_PRIV_RW *)pv)->fd->fd,iov,niov,((IO_PRIV_RW *)pv)->off)); } /* * Implement exit(2). */ static SYSCALL_IMPL(sc_exit) { uint32_t ec; ec = scarg(args,0); trc(TRC_SYSCALL,"exit %lu\n",(ULI)ec); exit(ec); } /* * Implement fork(2). * * The return semantics of fork()-the-syscall are undocumented. * UTSLing reveals a somewhat schizoid mismatch. Comments in the libc * wrapper claim the child returns while the parent returns * . But, looking at the kernel source, it looks to me as * though the child returns <0,1>, a mismatch which has probably gone * unnoticed because the libc wrapper immediately throws away the * first return value in the child. This is also true of __vfork14, * and probably any other fork()ish syscalls. Why this, rather than * just returning what is documented as the return value in %o0, I * have no idea. * * We go with what the kernel actually does. */ static SYSCALL_IMPL(sc_fork) { pid_t kid; pid_t parent; fflush(0); kid = fork(); if (kid < 0) SYSCALL_ERR(os2em_errno(errno)); if (kid == 0) { parent = mypid; s.noninteractive = 1; if (forkwait) do_forkwait(); mypid = getpid(); trcmgr_newpid(mypid); trc(TRC_PROC,"fork child, parent %lu\n",(ULI)parent); } else { trc(TRC_PROC,"fork parent, child %lu\n",(ULI)kid); } SYSCALL_RET2(kid?:0,!kid); } /* * Implement read(2). */ static SYSCALL_IMPL(sc_read) { uint32_t d; IO_PRIV_RW priv; int n; syscall_restartable = 1; d = scarg(args,0); priv.iov.base = scarg(args,1); priv.iov.len = scarg(args,2); trc(TRC_SYSCALL,"read %ld, %08lx, %lu\n",(LI)(int32_t)d,(ULI)priv.iov.base,(ULI)priv.iov.len); priv.fd = descriptor_arg(d,P_R,"read"); if (! priv.fd) SYSCALL_ERR(em_EBADF); if (priv.iov.len < 1) { trc(TRC_SYSCALL,"read -> 0\n"); SYSCALL_RET(0); } n = io_rw(1,&getiov_rw,P_W,&doio_read,&priv,"read"); if (n < 0) { n = os2em_errno(errno); trc(TRC_SYSCALL,"read -> error %d (%s)\n",n,em_strerror(n)); SYSCALL_ERR(n); } trc(TRC_SYSCALL,"read -> %d\n",n); trace_io_data_em("data",priv.iov.base,n); SYSCALL_RET(n); } /* * Implement write(2). */ static SYSCALL_IMPL(sc_write) { uint32_t d; IO_PRIV_RW priv; int n; syscall_restartable = 1; d = scarg(args,0); priv.iov.base = scarg(args,1); priv.iov.len = scarg(args,2); trc(TRC_SYSCALL,"write %ld, %08lx, %lu\n",(LI)(int32_t)d,(ULI)priv.iov.base,(ULI)priv.iov.len); priv.fd = descriptor_arg(d,P_W,"write"); if (! priv.fd) SYSCALL_ERR(em_EBADF); if (priv.iov.len < 1) { trc(TRC_SYSCALL,"write -> 0\n"); SYSCALL_RET(0); } n = io_rw(1,&getiov_rw,P_R,&doio_write,&priv,"write"); if (n < 0) { n = os2em_errno(errno); trc(TRC_SYSCALL,"write err %d (%s)\n",n,em_strerror(n)); SYSCALL_ERR(n); } trc(TRC_SYSCALL,"write -> %d\n",n); trace_io_data_em("data",priv.iov.base,n); SYSCALL_RET(n); } /* * Implement open(2). * * This is an interesting case, because, when O_CREAT is not set, the * third arg is not actually used. We load it only when we have to, * for the sake of unset-value tracking, and pass 0 to the underlying * open() if O_CREAT is not used. * * There is an ugly hack here. If we just do this naïvely, opening * /dev/stdout opens the emulator's stdout, not the emulated * program's. To make /dev/stdout, /dev/fd/, etc, work right, we * kludge it here: if the thing being open()ed stats as a character * special device, and its major matches /dev/stdout's, then we * convert the open into a dup, somewhat a la the kernel's handling of * such opens. (If there is no /dev/stdout, then the whole thing is * suppressed; we assume that any system with fd-duping opens is set * up with /dev/stdout.) * * We could do string comparison sensing instead, special-casing * "/dev/stdout", "/dev/fd/4", etc. I think I prefer this, though * it's a close call. */ static SYSCALL_IMPL(sc_open) { const char *path; uint32_t how; uint32_t perm; int oshow; int osfd; int e; int fdp; NULTERM_STATUS nts; uint32_t d; static int stdio_major = -1; struct stat stb; FD *ofd; if (stdio_major == -1) { if ( (stat("/dev/stdout",&stb) < 0) || ((stb.st_mode & S_IFMT) != S_IFCHR) ) { stdio_major = -2; } else { stdio_major = major(stb.st_rdev); } } path = nulterm_scarg(scarg(args,0),&nts); how = scarg(args,1); switch (how & em_O_ACCMODE) { case em_O_RDONLY: oshow = O_RDONLY; fdp = P_R; break; case em_O_WRONLY: oshow = O_WRONLY; fdp = P_W; break; case em_O_RDWR: oshow = O_RDWR; fdp = P_R|P_W; break; case em_O_NOACCESS: oshow = O_NOACCESS; fdp = 0; break; } #define F(x) do { if (how & em_##x) oshow |= x; } while (0) F(O_NONBLOCK); F(O_APPEND); F(O_SHLOCK); F(O_EXLOCK); F(O_ASYNC); F(O_SYNC); F(O_CREAT); F(O_TRUNC); F(O_EXCL); F(O_DSYNC); F(O_RSYNC); F(O_ALT_IO); F(O_NOCTTY); F(O_DIRECTORY); F(O_PLAIN); #undef F perm = (how & em_O_CREAT) ? scarg(args,2) : 0; if ( (stat(path,&stb) >= 0) && ((stb.st_mode & S_IFMT) == S_IFCHR) && (major(stb.st_rdev) == stdio_major) ) { ofd = descriptor_arg(minor(stb.st_rdev),0,"dup-open"); if (! ofd) SYSCALL_ERR(em_EBADF); if (fdp & ~ofd->prot) SYSCALL_ERR(em_EACCES); osfd = dup(ofd->fd); if (osfd < 0) SYSCALL_ERR(os2em_errno(errno)); } else { osfd = open(path,oshow,perm); if (osfd < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } } d = new_fd(osfd,0,fdp); nulterm_done(&nts); SYSCALL_RET(d); } /* * Implement close(2). */ static SYSCALL_IMPL(sc_close) { uint32_t d; FD *fd; int e; d = scarg(args,0); fd = descriptor_arg(d,0,"close"); if (! fd) SYSCALL_ERR(em_EBADF); add_vfork_backout(VFB_CLOSE,d,*fd); e = close(fd->fd); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); fds[d] = 0; free(fd); SYSCALL_RET(0); } /* * Implement wait4(2). * * The most complicated part here is converting the status value. */ static SYSCALL_IMPL(sc_wait4) { uint32_t wpid; uint32_t statusp; uint32_t options; uint32_t rusagep; int e; int st; struct rusage ru; trc(TRC_MAGIC,"wait4(%08lx,%08lx,%08lx,%08lx)\n",(ULI)scarg(args,0),(ULI)scarg(args,1),(ULI)scarg(args,2),(ULI)scarg(args,3)); if ( (scarg(args,0) == 0x456d756c) && // ASCII Emul (scarg(args,1) == 0x61746f72) && // ASCII ator (scarg(args,2) == 0x4d616769) && // ASCII Magi (scarg(args,3) == 0x633a2d29) ) // ASCII c:-) { s.flags |= SF_EMU_MAGIC; trc(TRC_MAGIC,"Magic syscall set EMU_MAGIC\n"); SYSCALL_ERR(em_EINPROGRESS); } wpid = scarg(args,0); statusp = scarg(args,1); options = scarg(args,2); rusagep = scarg(args,3); e = wait4( (wpid & 0x80000000) ? -(int)(int32_t)(uint32_t)-wpid : (int)wpid, &st, ((options & em_WUNTRACED) ? WUNTRACED : 0) | ((options & em_WNOHANG) ? WNOHANG : 0) | ((options & em_WALTSIG) ? WALTSIG : 0) | ((options & em_WNOREAP) ? WNOREAP : 0), &ru ); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); if (e == 0) SYSCALL_RET(0); if (statusp) { if (em_WIFEXITED(st)) { mem_set_4(statusp,em_W_EXITCODE(WEXITSTATUS(st),0)); } else if (em_WIFSIGNALED(st)) { mem_set_4(statusp,em_W_DEADSIG(WTERMSIG(st),WCOREDUMP(st))); } else if (em_WIFSTOPPED(st)) { mem_set_4(statusp,em_W_STOPCODE(WSTOPSIG(st))); } else { printf("Undecipherable wait4 status %#x\n",st); top(); } } SYSCALL_RET(e); } /* * Implement link(2). */ static SYSCALL_IMPL(sc_link) { const char *p1; const char *p2; NULTERM_STATUS nts1; NULTERM_STATUS nts2; int e; p1 = nulterm_scarg(scarg(args,0),&nts1); p2 = nulterm_scarg(scarg(args,1),&nts2); if (link(p1,p2) < 0) { e = errno; nulterm_done(&nts1); nulterm_done(&nts2); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts1); nulterm_done(&nts2); SYSCALL_RET(0); } /* * Implement unlink(2). */ static SYSCALL_IMPL(sc_unlink) { const char *path; NULTERM_STATUS nts; int e; path = nulterm_scarg(scarg(args,0),&nts); if (unlink(path) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement chdir(2). */ static SYSCALL_IMPL(sc_chdir) { const char *path; NULTERM_STATUS nts; int e; path = nulterm_scarg(scarg(args,0),&nts); if (chdir(path) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement fchdir(2). */ static SYSCALL_IMPL(sc_fchdir) { uint32_t d; FD *fd; d = scarg(args,0); fd = descriptor_arg(d,0,"fchdir"); if (! fd) SYSCALL_ERR(em_EBADF); if (fchdir(fd->fd) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement chmod(2). */ static SYSCALL_IMPL(sc_chmod) { const char *path; NULTERM_STATUS nts; uint32_t mode; int e; path = nulterm_scarg(scarg(args,0),&nts); mode = scarg(args,1); if (chmod(path,mode) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement chown(2). */ static SYSCALL_IMPL(sc_chown) { const char *path; NULTERM_STATUS nts; uint32_t eu; uint32_t eg; int e; path = nulterm_scarg(scarg(args,0),&nts); eu = scarg(args,1); eg = scarg(args,2); if (chown(path,(eu==-(uint32_t)1)?-1:eu,(eg==-(uint32_t)1)?-1:eg) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement break(2). This is the actual syscall underlying brk() and * sbrk(), with the differences being dealt with by libc. */ static SYSCALL_IMPL(sc_break) { uint32_t newbrk; MEMSEG *ms; newbrk = scarg(args,0); newbrk = ROUND_UP(newbrk,PAGE_SIZE); if (newbrk > MAXDSIZE) { printf("break: %08lx exceeds data size limit\n",(ULI)newbrk); top(); } if (vm.dbrk == newbrk) { trc(TRC_SYSCALL,"break unchanged at %08lx\n",(ULI)vm.dbrk); } else { trc(TRC_SYSCALL,"break %08lx -> %08lx\n",(ULI)vm.dbrk,(ULI)newbrk); } if (newbrk > vm.dbrk) { ms = memseg_new_malloc(vm.dbrk,newbrk-vm.dbrk,P_R|P_W); bzero(ms->data,ms->size); memseg_clear_conflict(ms->base,ms->size,ms); } else if (newbrk < vm.dbrk) { memseg_clear_conflict(newbrk,vm.dbrk-newbrk,0); } vm.dbrk = newbrk; vm_changed = 1; SYSCALL_RET(0); } /* * Implement getfsstat(2). * * There is a complication here. For some underlying OS versions, we * have to use getvfsstat() instead of getfsstat(); see the comment * near the head of this file, where STATFS_VIA_STATVFS and * GETFSSTAT_VIA_GETVFSSTAT are potentially set. */ static SYSCALL_IMPL(sc_getfsstat) { #ifdef GETFSSTAT_VIA_GETVFSSTAT struct statvfs *osbuf; #define osWAIT ST_WAIT #define osNOWAIT ST_NOWAIT #define osCALL getvfsstat #define osSTORE store_statvfs_as_statfs #else struct statfs *osbuf; #define osWAIT MNT_WAIT #define osNOWAIT MNT_NOWAIT #define osCALL getfsstat #define osSTORE store_statfs #endif uint32_t embuf; uint32_t n; uint32_t emflags; int osflags; int e; int i; embuf = scarg(args,0); n = scarg(args,1); emflags = scarg(args,2); osflags = ((emflags & em_MNT_WAIT) ? osWAIT : 0) | ((emflags & em_MNT_NOWAIT) ? osNOWAIT : 0); n /= 256; if (embuf == 0) { e = osCALL(0,n,osflags); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(e); } else { osbuf = malloc(n*sizeof(*osbuf)); if (! osbuf) { printf("Out of memory allocating getfsstat() buffer\n"); top(); } e = osCALL(osbuf,n*sizeof(*osbuf),osflags); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); for (i=0;ifd,osbuf,len,osflags,(void *)osfrom,&osfromlen); } else { n = recvfrom(fd->fd,osbuf,len,osflags,0,0); osfrom = 0; } if (n < 0) { n = os2em_errno(errno); free(osbuf); free(osfrom); trc(TRC_SYSCALL,"recvfrom -> error %d (%s)\n",n,em_strerror(n)); SYSCALL_ERR(n); } freev[0] = osbuf; freev[1] = osfrom; copyout(osbuf,buf,n,"recvfrom",&free2,&freev[0]); trace_io_data_os("data",buf,osbuf,n); if (fromlen) { /* * Ideally, I would like to trace the from address and its length * separately, reporting the data ctually written in each case, * even if the address buffer and the length overlap. But that * requires more code restructuring than I want to get into now, * so I'm punting and just reporting the final values of each. */ put_sockaddr(osfrom,osfromlen,from,fromlen,&plen); trace_io_data_em("from address",from,plen); trace_io_data_em("from length",fromlen,4); } free(osbuf); free(osfrom); trc(TRC_SYSCALL,"recvfrom -> %d\n",n); SYSCALL_RET(n); } /* * Implement access(2). */ static SYSCALL_IMPL(sc_access) { const char *path; uint32_t how; int oshow; int e; NULTERM_STATUS nts; path = nulterm_scarg(scarg(args,0),&nts); how = scarg(args,1); trc(TRC_SYSCALL,"access %s, %08lx (%c%c%c)",path,(ULI)how,(how&em_R_OK)?'R':'-',(how&em_W_OK)?'W':'-',(how&em_X_OK)?'X':'-'); oshow = ((how & em_R_OK) ? R_OK : 0) | ((how & em_W_OK) ? W_OK : 0) | ((how & em_X_OK) ? X_OK : 0); e = access(path,oshow); if (e < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement fchflags(2). */ static SYSCALL_IMPL(sc_fchflags) { uint32_t d; FD *fd; uint32_t emflags; unsigned long int osflags; d = scarg(args,0); emflags = scarg(args,1); fd = descriptor_arg(d,0,"fchmod"); if (! fd) SYSCALL_ERR(em_EBADF); osflags = 0; if (emflags & em_UF_NODUMP) osflags |= UF_NODUMP; if (emflags & em_UF_IMMUTABLE) osflags |= UF_IMMUTABLE; if (emflags & em_UF_APPEND) osflags |= UF_APPEND; if (emflags & em_UF_OPAQUE) osflags |= UF_OPAQUE; if (emflags & em_SF_ARCHIVED) osflags |= SF_ARCHIVED; if (emflags & em_SF_IMMUTABLE) osflags |= SF_IMMUTABLE; if (emflags & em_SF_APPEND) osflags |= SF_APPEND; if (emflags & ~(em_UF_NODUMP | em_UF_IMMUTABLE | em_UF_APPEND | em_UF_OPAQUE | em_SF_ARCHIVED | em_SF_IMMUTABLE | em_SF_APPEND)) { printf("fchflags: unrecognized flag bits in 0x%08lx\n",(ULI)emflags); top(); } if (fchflags(fd->fd,osflags) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement kill(2). */ static SYSCALL_IMPL(sc_kill) { uint32_t pid; uint32_t emsig; int ossig; int e; pid = scarg(args,0); emsig = scarg(args,1); ossig = em2os_signal(emsig); if ((ossig == 0) && emsig) SYSCALL_ERR(em_EINVAL); trc(TRC_SIGNAL,"kill(2) %lu with %lu->%d\n",(ULI)pid,(ULI)emsig,ossig); e = kill(pid,ossig); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement getppid(2). */ static SYSCALL_IMPL(sc_getppid) { SYSCALL_RET(getppid()); } /* * Implement dup(2). */ static SYSCALL_IMPL(sc_dup) { uint32_t od; FD *ofd; int osnew; uint32_t emnew; od = scarg(args,0); ofd = descriptor_arg(od,0,"dup"); if (! ofd) SYSCALL_ERR(em_EBADF); osnew = dup(ofd->fd); if (osnew < 0) SYSCALL_ERR(os2em_errno(errno)); emnew = new_fd(osnew,0,ofd->prot); SYSCALL_RET(emnew); } /* * Implement pipe(2). * * Arguably should use the underlying OS's pipe(2), but 1.4T * implemented pipes with AF_LOCAL sockets, so we do too, though I * doubt much if any software cares about the differences. */ static SYSCALL_IMPL(sc_pipe) { int osfd[2]; int emfd[2]; if (socketpair(AF_LOCAL,SOCK_STREAM,0,&osfd[0]) < 0) SYSCALL_ERR(os2em_errno(errno)); emfd[0] = new_fd(osfd[0],0,P_R); emfd[1] = new_fd(osfd[1],0,P_W); SYSCALL_RET2(emfd[0],emfd[1]); } /* * Implement getegid(2). */ static SYSCALL_IMPL(sc_getegid) { SYSCALL_RET(getegid()); } /* * Implement getgid(2). */ static SYSCALL_IMPL(sc_getgid) { SYSCALL_RET(getgid()); } /* * Implement __getlogin, the syscall behind getlogin(). * * Fortunately, it's relatively easy to implement __getlogin in terms * of getlogin. */ static SYSCALL_IMPL(sc___getlogin) { uint32_t ptr; uint32_t len; const char *l; ptr = scarg(args,0); len = scarg(args,1); if (len > em_MAXLOGNAME) len = em_MAXLOGNAME; l = getlogin(); if (! l) l = ""; copy_or_nulpad(l,strlen(l),ptr,len,"__getlogin string",0,0); SYSCALL_RET(0); } /* * Implement ioctl(2). * * We implement only a handful of ioctls. */ static SYSCALL_IMPL(sc_ioctl) { uint32_t d; uint32_t ioc; uint32_t arg; FD *fd; int e; d = scarg(args,0); ioc = scarg(args,1); fd = descriptor_arg(d,0,"ioctl"); if (! fd) SYSCALL_ERR(em_EBADF); switch (ioc) { case em_TIOCGETA: { struct termios tio; arg = scarg(args,2); e = ioctl(fd->fd,TIOCGETA,&tio); if (e < 0) SYSCALL_ERR(os2em_errno(e)); os2em_termios(&tio,arg); SYSCALL_RET(0); } break; case em_TIOCGPGRP: { int iv; arg = scarg(args,2); e = ioctl(fd->fd,TIOCGPGRP,&iv); if (e < 0) SYSCALL_ERR(os2em_errno(e)); mem_set_4(arg,iv); SYSCALL_RET(0); } break; case em_TIOCSPGRP: { int iv; arg = scarg(args,2); iv = mem_get_4(arg); e = ioctl(fd->fd,TIOCSPGRP,&iv); if (e < 0) SYSCALL_ERR(os2em_errno(e)); SYSCALL_RET(0); } break; case em_TIOCGWINSZ: { struct winsize wsz; arg = scarg(args,2); e = ioctl(fd->fd,TIOCGWINSZ,&wsz); if (e < 0) SYSCALL_ERR(os2em_errno(e)); mem_set_2(arg,wsz.ws_row); mem_set_2(arg+2,wsz.ws_col); mem_set_2(arg+4,wsz.ws_xpixel); mem_set_2(arg+6,wsz.ws_ypixel); SYSCALL_RET(0); } break; case em_TIOCSWINSZ: { struct winsize wsz; arg = scarg(args,2); wsz.ws_row = mem_get_2(arg); wsz.ws_col = mem_get_2(arg+2); wsz.ws_xpixel = mem_get_2(arg+4); wsz.ws_ypixel = mem_get_2(arg+6); e = ioctl(fd->fd,TIOCSWINSZ,&wsz); if (e < 0) SYSCALL_ERR(os2em_errno(e)); SYSCALL_RET(0); } break; case em_FIOCLEX: fd->flags |= FDF_CLEX; SYSCALL_RET(0); break; case em_TIOCSETAW: { struct termios tio; arg = scarg(args,2); em2os_termios(arg,&tio); e = ioctl(fd->fd,TIOCSETAW,&tio); if (e < 0) SYSCALL_ERR(os2em_errno(e)); SYSCALL_RET(0); } break; case em_FIONCLEX: fd->flags &= ~FDF_CLEX; SYSCALL_RET(0); break; case em_TIOCSETA: { struct termios tio; arg = scarg(args,2); em2os_termios(arg,&tio); e = ioctl(fd->fd,TIOCSETA,&tio); if (e < 0) SYSCALL_ERR(os2em_errno(e)); SYSCALL_RET(0); } break; case em_TIOCGETD: arg = scarg(args,2); mem_set_4(arg,0); // 0 is only ldisc we support SYSCALL_RET(0); break; case em_TIOCSETAF: { struct termios tio; arg = scarg(args,2); em2os_termios(arg,&tio); e = ioctl(fd->fd,TIOCSETAF,&tio); if (e < 0) SYSCALL_ERR(os2em_errno(e)); SYSCALL_RET(0); } break; case em_MTIOCGET: // XXX should we do a real MTIOCGET? SYSCALL_ERR(em_ENOTTY); break; case em_FIONREAD: { int iv; arg = scarg(args,2); e = ioctl(fd->fd,FIONREAD,&iv); if (e < 0) SYSCALL_ERR(os2em_errno(e)); mem_set_4(arg,iv); SYSCALL_RET(0); } break; case em_FIONBIO: { int iv; iv = mem_get_4(scarg(args,2)) ? 1 : 0; e = ioctl(fd->fd,FIONBIO,&iv); if (e < 0) SYSCALL_ERR(os2em_errno(e)); SYSCALL_RET(0); } break; // When adding cases to this switch, add them to print_special_IOCTL too } printf("Unimplemented ioctl %08lx = ",(ULI)ioc); print_decoded_ioctl(stdout,ioc); printf("\n"); top(); } /* * Implement revoke(2). */ static SYSCALL_IMPL(sc_revoke) { const char *p; NULTERM_STATUS nts; int e; p = nulterm_scarg(scarg(args,0),&nts); if (revoke(p) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement symlink(2). */ static SYSCALL_IMPL(sc_symlink) { const char *p1; const char *p2; NULTERM_STATUS nts1; NULTERM_STATUS nts2; int e; p1 = nulterm_scarg(scarg(args,0),&nts1); p2 = nulterm_scarg(scarg(args,1),&nts2); if (symlink(p1,p2) < 0) { e = errno; nulterm_done(&nts1); nulterm_done(&nts2); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts1); nulterm_done(&nts2); SYSCALL_RET(0); } /* * Implement readlink(2). */ static SYSCALL_IMPL(sc_readlink) { unsigned char buf[65536]; uint32_t l; int n; const char *path; int i; uint32_t bufp; NULTERM_STATUS nts; path = nulterm_scarg(scarg(args,0),&nts); bufp = scarg(args,1); l = scarg(args,2); if (l > 65536) l = 65536; n = readlink(path,&buf[0],l); if (n < 0) { i = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(i)); } nulterm_done(&nts); trc(TRC_SYSCALL,"readlink result %.*s\n",n,&buf[0]); for (i=0;i=0;i--) nulterm_done(&nts_argv[i]); for (i=nenvp-1;i>=0;i--) nulterm_done(&nts_envp[i]); lastxp = strdup(path); nulterm_done(&nts_path); trc(TRC_PROC,"execve, %s\n",lastxp); fflush(0); /* * As of vforkbreak(), vm must point to the parent's old VM and * vfork_dropvm to the child's new VM. */ vfork_dropvm = vm; vm = oldvm; vforkbreak(); if (during_vfork && forkwait) do_forkwait(); flush_vfork_backout(); during_vfork = 0; postexec = 1; rv->flags = 0; for (i=nfds-1;i>=0;i--) { FD *fd; fd = fds[i]; if (fd && (fd->flags & FDF_CLEX)) { trc(TRC_EXEC,"closing CLEX fd %d\n",i); add_vfork_backout(VFB_CLOSE,i,*fd); close(fd->fd); fds[i] = 0; free(fd); } } vm_postexec(vfork_dropvm); vfork_dropvm = INITVM(); sig_postexec(); // We don't handle set-ID vm_changed = 1; free(s.lastexec); s.lastexec = lastxp; setproctitle("%s",lastxp); SYSCALL_RET(0); } else { for (i=nargv-1;i>=0;i--) nulterm_done(&nts_argv[i]); for (i=nenvp-1;i>=0;i--) nulterm_done(&nts_envp[i]); nulterm_done(&nts_path); vm_destroy(vm); vm = oldvm; s = oldstate; SYSCALL_ERR(e); } } /* * Implement umask(2). */ static SYSCALL_IMPL(sc_umask) { SYSCALL_RET(umask(scarg(args,0)&0777)&0777); } /* * Implement munmap(2). */ static SYSCALL_IMPL(sc_munmap) { uint32_t addr; uint32_t len; uint32_t o; addr = scarg(args,0); len = scarg(args,1); // if (addr & (PAGE_SIZE-1)) SYSCALL_ERR(em_EINVAL); o = addr; addr = ROUND_DOWN(addr,PAGE_SIZE); len += o - addr; len = ROUND_UP(len,PAGE_SIZE); if (len & 0x80000000) SYSCALL_ERR(em_EINVAL); if (! range_exists(addr,len)) SYSCALL_ERR(em_EINVAL); memseg_clear_conflict(addr,len,0); vm_changed = 1; SYSCALL_RET(0); } /* * Implement madvise(2). */ static SYSCALL_IMPL(sc_madvise) { // madvise() is advisory; we always ignore it SYSCALL_RET(0); } /* * Implement getgroups(2). */ static SYSCALL_IMPL(sc_getgroups) { uint32_t ng; uint32_t gp; gid_t *osv; int actng; int i; ng = scarg(args,0); gp = scarg(args,1); actng = getgroups(0,0); if (ng == 0) SYSCALL_RET(actng); if (actng < ng) ng = actng; osv = malloc(ng*sizeof(gid_t)); i = getgroups(ng,osv); if (i < 0) { i = os2em_errno(errno); free(osv); SYSCALL_ERR(i); } if (i > ng) abort(); ng = i; for (i=0;i em_NGROUPS) || (ng > NGROUPS)) SYSCALL_ERR(em_EINVAL); osv = malloc(ng*sizeof(gid_t)); for (i=0;i= nfds) ? 0 : fds[d2]; if (fd2) { add_vfork_backout(VFB_DUP2,d2,*fd2); e = dup2(fd1->fd,fd2->fd); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); fd2->prot = fd1->prot; } else { e = dup(fd1->fd); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); new = new_fd(e,d2,fd1->prot); if (new != d2) panic("impossible dup2: wanted %d, got %d",d2,new); } SYSCALL_RET(d2); } /* * Implement fcntl(2). * * We implement only a very few fcntls. */ static SYSCALL_IMPL(sc_fcntl) { uint32_t d; uint32_t cmd; FD *fd; int e; int i; d = scarg(args,0); cmd = scarg(args,1); switch (cmd) { case em_F_DUPFD: case em_F_GETFD: case em_F_SETFD: case em_F_GETFL: fd = descriptor_arg(d,0,"fcntl"); if (! fd) SYSCALL_ERR(em_EBADF); break; case em_F_CLOSEM: break; default: printf("Unrecognized fcntl %lu\n",(ULI)cmd); top(); break; } switch (cmd) { case em_F_DUPFD: { int newos; newos = dup(fd->fd); if (newos < 0) SYSCALL_ERR(os2em_errno(errno)); e = new_fd(newos,scarg(args,2),fd->prot); } break; case em_F_GETFD: e = (fd->flags & FDF_CLEX) ? 1 : 0; break; case em_F_SETFD: if (scarg(args,2) & 1) { fd->flags |= FDF_CLEX; } else { fd->flags &= ~FDF_CLEX; } e = 0; break; case em_F_GETFL: /* * Only a few flags are part of the documented interface for * fcntl (and thus can be counted upon from our underlying OS). * But so much else here depends on our underlying OS being * NetBSD that I can accept depending on its semantics here. */ { int v; v = fcntl(fd->fd,F_GETFL,0); e = v & 3; #define F(bit) do { if (v & bit) e |= em_##bit; } while (0) F(O_NONBLOCK); F(O_APPEND); F(O_ASYNC); F(O_SYNC); F(O_DSYNC); F(O_RSYNC); F(O_ALT_IO); #undef F } break; case em_F_CLOSEM: if (d & 0x80000000) { e = em_EBADF; break; } for (i=nfds-1;i>=d;i--) { fd = fds[i]; if (fd) { add_vfork_backout(VFB_CLOSE,i,*fd); close(fd->fd); fds[i] = 0; free(fd); } } e = 0; break; default: panic("impossible fcntl"); break; } if (e < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(e); } /* * Implement select(2). We actually implement this in terms of * poll(2), because that API makes it easier to map between emulated * descriptors and emulator descriptors. */ static SYSCALL_IMPL(sc_select) { uint32_t nfds; uint32_t rp; uint32_t wp; uint32_t xp; uint32_t tvp; struct timeval tv; static struct pollfd *pfds = 0; static int *efds = 0; static int apfds = 0; int npfds; int vx; uint32_t rval; uint32_t wval; uint32_t xval; int pev; FD *fd; int pt; int prv; int i; int j; int n; uint32_t *rm; uint32_t *wm; uint32_t *xm; uint32_t ret; nfds = scarg(args,0); rp = scarg(args,1); wp = scarg(args,2); xp = scarg(args,3); tvp = scarg(args,4); npfds = 0; vx = -1; for (i=0;i> 5)) { vx = i >> 5; rval = rp ? mem_get_4(rp+((i>>5)<<2)) : 0; wval = wp ? mem_get_4(wp+((i>>5)<<2)) : 0; xval = xp ? mem_get_4(xp+((i>>5)<<2)) : 0; } pev = 0; if ((rval >> (i & 31)) & 1) pev |= POLLIN | POLLRDNORM; if ((wval >> (i & 31)) & 1) pev |= POLLOUT | POLLWRNORM; if ((xval >> (i & 31)) & 1) pev |= POLLERR; if (pev) { fd = descriptor_arg(i,0,"select"); if (! fd) SYSCALL_ERR(em_EBADF); if (npfds >= apfds) { apfds = npfds + 8; pfds = realloc(pfds,apfds*sizeof(struct pollfd)); efds = realloc(efds,apfds*sizeof(int)); } pfds[npfds] = (struct pollfd) { .fd = fd->fd, .events = pev }; efds[npfds] = i; npfds ++; } } if (tvp) { tv.tv_sec = mem_get_8(tvp); tv.tv_usec = mem_get_4(tvp+8); if (tv.tv_usec > 1000000) SYSCALL_ERR(em_EINVAL); if (tv.tv_sec > 1000000) { pt = INFTIM; } else { pt = (tv.tv_sec * 1000) + ((tv.tv_usec + 999) / 1000); } } else { pt = INFTIM; } prv = poll(pfds,npfds,pt); if (prv < 0) SYSCALL_ERR(os2em_errno(errno)); n = (nfds + 31) >> 5; rm = rp ? calloc(n,sizeof(uint32_t)) : 0; wm = wp ? calloc(n,sizeof(uint32_t)) : 0; xm = xp ? calloc(n,sizeof(uint32_t)) : 0; for (i=0;i= nfds)) abort(); if (pfds[i].revents & POLLNVAL) abort(); // a can't-happen if (pfds[i].events & POLLERR) { if (pfds[i].revents & (POLLIN | POLLRDNORM | POLLHUP)) { if (rm) rm[j>>5] |= ((uint32_t)1) << (j & 31); } if (pfds[i].revents & (POLLOUT | POLLWRNORM | POLLHUP)) { if (wm) wm[j>>5] |= ((uint32_t)1) << (j & 31); } if (pfds[i].revents & (POLLERR | POLLHUP)) { if (xm) xm[j>>5] |= ((uint32_t)1) << (j & 31); } } else { if (pfds[i].revents & (POLLIN | POLLRDNORM | POLLERR | POLLHUP)) { if (rm) rm[j>>5] |= ((uint32_t)1) << (j & 31); } if (pfds[i].revents & (POLLOUT | POLLWRNORM | POLLERR | POLLHUP)) { if (wm) wm[j>>5] |= ((uint32_t)1) << (j & 31); } } } ret = 0; for (i=n-1;i>=0;i--) { ret += bitcount32( (rm ? rm[i] : 0) | (wm ? wm[i] : 0) | (xm ? xm[i] : 0) ); if (rp) mem_set_4(rp+(i*4),rm[i]); if (wp) mem_set_4(wp+(i*4),wm[i]); if (xp) mem_set_4(xp+(i*4),xm[i]); } SYSCALL_RET(ret); } /* * Implement fsync(2). */ static SYSCALL_IMPL(sc_fsync) { uint32_t d; FD *fd; int e; d = scarg(args,0); fd = descriptor_arg(d,0,"fsync"); if (! fd) SYSCALL_ERR(em_EBADF); e = fsync(fd->fd); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(e); } /* * Implement setpriority(2). */ static SYSCALL_IMPL(sc_setpriority) { uint32_t which; uint32_t who; uint32_t pri; int oswhich; which = scarg(args,0); who = scarg(args,1); pri = scarg(args,2); switch (which) { case em_PRIO_PROCESS: oswhich = PRIO_PROCESS; break; case em_PRIO_PGRP: oswhich = PRIO_PGRP; break; case em_PRIO_USER: oswhich = PRIO_USER; break; default: SYSCALL_ERR(em_EINVAL); break; } if (setpriority(oswhich,(int)(int32_t)who,(int)(int32_t)pri) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement socket(2). */ static SYSCALL_IMPL(sc_socket) { uint32_t dom; uint32_t type; uint32_t proto; int osdom; int ostype; int osproto; int osfd; const char *domstr; const char *typestr; dom = scarg(args,0); type = scarg(args,1); proto = scarg(args,2); switch (dom) { case em_AF_LOCAL: osdom = AF_LOCAL; domstr = "AF_LOCAL"; break; case em_AF_INET: osdom = AF_INET; domstr = "AF_INET"; break; case em_AF_INET6: osdom = AF_INET6; domstr = "AF_INET6"; break; default: printf("socket: unimplemented AF %lu\n",(ULI)dom); top(); break; } // Fortunately, we can support the same set of types regardless of AF. switch (type) { case em_SOCK_STREAM: ostype = SOCK_STREAM; typestr = "SOCK_STREAM"; break; case em_SOCK_DGRAM: ostype = SOCK_DGRAM; typestr = "SOCK_DGRAM"; break; default: printf("socket: unimplemented type %lu\n",(ULI)type); top(); //SYSCALL_ERR(em_ESOCKTNOSUPPORT); break; } if (proto == 0) { osproto = 0; } else { // A few nonzero proto values get used.... if ((proto == em_IPPROTO_TCP) && (type == em_SOCK_STREAM) && ((dom == em_AF_INET) || (dom == em_AF_INET6))) { osproto = IPPROTO_TCP; } else { printf("socket: unimplemented protocol %lu for %s/%s\n",(ULI)proto,domstr,typestr); top(); //SYSCALL_ERR(em_EPROTONOSUPPORT); } } osfd = socket(osdom,ostype,osproto); if (osfd < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(new_fd(osfd,0,P_R|P_W)); } /* * Implement connect(2). */ static SYSCALL_IMPL(sc_connect) { uint32_t d; FD *fd; uint32_t addr; uint32_t alen; int osrv; GETSA sa; d = scarg(args,0); fd = descriptor_arg(d,0,"connect"); if (! fd) SYSCALL_ERR(em_EBADF); addr = scarg(args,1); alen = scarg(args,2); sa = get_sockaddr(addr,alen); if (sa.err) SYSCALL_ERR(sa.err); osrv = connect(fd->fd,sa.sa,sa.salen); free(sa.sa); if (osrv < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement getpriority(2). */ static SYSCALL_IMPL(sc_getpriority) { uint32_t which; uint32_t who; int oswhich; int p; which = scarg(args,0); who = scarg(args,1); switch (which) { case em_PRIO_PROCESS: oswhich = PRIO_PROCESS; break; case em_PRIO_PGRP: oswhich = PRIO_PGRP; break; case em_PRIO_USER: oswhich = PRIO_USER; break; default: SYSCALL_ERR(em_EINVAL); break; } errno = 0; p = getpriority(oswhich,(int)(int32_t)who); if (errno) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET((int32_t)p); } /* * Implement setsockopt(2). We don't support very many socket options. */ static SYSCALL_IMPL(sc_setsockopt) { FD *fd; uint32_t emlevel; uint32_t emoptname; uint32_t emoptvalp; uint32_t emoptlen; fd = descriptor_arg(scarg(args,0),0,"setsockopt"); emlevel = scarg(args,1); emoptname = scarg(args,2); emoptvalp = scarg(args,3); emoptlen = scarg(args,4); if ((emlevel == em_IPPROTO_TCP) && (emoptname == em_TCP_NODELAY)) { int osv; if (!emoptvalp || (emoptlen < 4)) SYSCALL_ERR(em_EINVAL); osv = mem_get_4(emoptvalp) ? 1 : 0; trace_io_data_em("optval",emoptvalp,4); if (setsockopt(fd->fd,IPPROTO_TCP,TCP_NODELAY,&osv,sizeof(int)) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } printf("Unrecognized setsockopt: level=%lld optname=%lld (optvalp=%#08llx optlenp=%lld)\n", (LLI)(int32_t)emlevel, (LLI)(int32_t)emoptname, (ULLI)emoptvalp, (LLI)(int32_t)emoptlen); top(); } /* * Implement gettimeofday(2). */ static SYSCALL_IMPL(sc_gettimeofday) { uint32_t tvp; uint32_t tzp; struct timeval tv; tvp = scarg(args,0); tzp = scarg(args,1); trc(TRC_SYSCALL,"gettimeofday %08llx, %08lx\n",(ULLI)tvp,(ULI)tzp); gettimeofday(&tv,0); if (tvp) { mem_set_8(tvp,tv.tv_sec); mem_set_4(tvp+8,tv.tv_usec); mem_set_4(tvp+12,0); } if (tzp) { mem_set_4(tzp,0); mem_set_4(tzp+4,0); } SYSCALL_RET(0); } /* * Implement getrusage(2). */ static SYSCALL_IMPL(sc_getrusage) { uint32_t who; uint32_t buf; struct rusage ru; int oswho; who = scarg(args,0); buf = scarg(args,1); trc(TRC_SYSCALL,"getrusage %ld, %08lx\n",(LI)(int32_t)who,(ULI)buf); switch (who) { default: SYSCALL_ERR(em_EINVAL); break; case em_RUSAGE_SELF: oswho = RUSAGE_SELF; break; case em_RUSAGE_CHILDREN: oswho = RUSAGE_CHILDREN; break; } if (getrusage(oswho,&ru) < 0) panic("impossible getrusage failure"); store_rusage(buf,&ru); SYSCALL_RET(0); } /* * Implement getsockopt(2). We don't support very many socket options. */ static SYSCALL_IMPL(sc_getsockopt) { FD *fd; uint32_t emlevel; uint32_t emoptname; uint32_t emoptvalp; uint32_t emoptlenp; uint32_t emoptlen; socklen_t osoptlen; int osv_int; int emlen; uint8_t emvbuf[4]; // size of largest supported value int i; fd = descriptor_arg(scarg(args,0),0,"getsockopt"); emlevel = scarg(args,1); emoptname = scarg(args,2); emoptvalp = scarg(args,3); emoptlenp = scarg(args,4); emoptlen = emoptvalp ? mem_get_4(emoptlenp) : 0; if ((emlevel == em_SOL_SOCKET) && (emoptname == em_SO_ERROR)) { uint32_t v; osoptlen = sizeof(int); if (getsockopt(fd->fd,SOL_SOCKET,SO_ERROR,&osv_int,&osoptlen) < 0) SYSCALL_ERR(os2em_errno(errno)); v = (osv_int < 0) ? -(uint32_t)-osv_int : osv_int; emvbuf[0] = v >> 24; emvbuf[1] = (v >> 16) & 0xff; emvbuf[2] = (v >> 8) & 0xff; emvbuf[3] = v & 0xff; emlen = 4; } else { printf("Unrecognized getsockopt: level=%lld optname=%lld (optvalp=%#08llx optlenp=%#08llx [%lld]\n", (LLI)(int32_t)emlevel, (LLI)(int32_t)emoptname, (ULLI)emoptvalp, (ULLI)emoptlenp, (LLI)(int32_t)emoptlen); top(); } if (emoptlen < emlen) emlen = emoptlen; for (i=0;i 0\n"); SYSCALL_RET(0); } if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL); priv.iov = malloc(priv.niov*sizeof(*priv.iov)); p = iovbase; for (i=0;i %d\n",n); if (io_trace_size) { int left; int nt; int x; left = n; if (left > io_trace_size) left = io_trace_size; x = 0; while (left > 0) { nt = left; if (nt > priv.iov[x][1]) nt = priv.iov[x][1]; trace_io_data_em("data",priv.iov[x][0],nt); left -= nt; x ++; } } free(priv.iov); SYSCALL_RET(n); } /* * Implement writev(2). */ static SYSCALL_IMPL(sc_writev) { uint32_t d; IO_PRIV_RWV priv; int n; int i; uint32_t p; uint32_t iovbase; d = scarg(args,0); iovbase = scarg(args,1); priv.niov = scarg(args,2); trc(TRC_SYSCALL,"writev %ld, %08lx, %ld\n",(LI)(int32_t)d,(ULI)iovbase,(LI)(int32_t)priv.niov); priv.fd = descriptor_arg(d,P_W,"writev"); if (! priv.fd) SYSCALL_ERR(em_EBADF); if (priv.niov < 1) { trc(TRC_SYSCALL,"writev -> 0\n"); SYSCALL_RET(0); } if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL); priv.iov = malloc(priv.niov*sizeof(*priv.iov)); p = iovbase; for (i=0;i %d\n",n); if (io_trace_size) { int left; int nt; int x; left = n; if (left > io_trace_size) left = io_trace_size; x = 0; while (left > 0) { nt = left; if (nt > priv.iov[x][1]) nt = priv.iov[x][1]; trace_io_data_em("data",priv.iov[x][0],nt); left -= nt; x ++; } } free(priv.iov); SYSCALL_RET(n); } /* * Implement fchown(2). */ static SYSCALL_IMPL(sc_fchown) { uint32_t d; FD *fd; uint32_t eu; uint32_t eg; d = scarg(args,0); eu = scarg(args,1); eg = scarg(args,2); fd = descriptor_arg(d,0,"fchown"); if (! fd) SYSCALL_ERR(em_EBADF); if (fchown(fd->fd,(eu==-(uint32_t)1)?-1:eu,(eg==-(uint32_t)1)?-1:eg) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement fchmod(2). */ static SYSCALL_IMPL(sc_fchmod) { uint32_t d; FD *fd; uint32_t mode; d = scarg(args,0); mode = scarg(args,1); fd = descriptor_arg(d,0,"fchmod"); if (! fd) SYSCALL_ERR(em_EBADF); if (fchmod(fd->fd,mode) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement rename(2). */ static SYSCALL_IMPL(sc_rename) { const char *p1; const char *p2; NULTERM_STATUS nts1; NULTERM_STATUS nts2; int r; int e; p1 = nulterm_scarg(scarg(args,0),&nts1); p2 = nulterm_scarg(scarg(args,1),&nts2); r = rename(p1,p2); e = errno; nulterm_done(&nts1); nulterm_done(&nts2); if (r < 0) SYSCALL_ERR(os2em_errno(e)); else SYSCALL_RET(0); } /* * Implement flock(2). */ static SYSCALL_IMPL(sc_flock) { uint32_t d; uint32_t op; int osop; FD *fd; int e; d = scarg(args,0); op = scarg(args,1); osop = ((op & em_LOCK_EX) ? LOCK_EX : 0) | ((op & em_LOCK_SH) ? LOCK_SH : 0) | ((op & em_LOCK_NB) ? LOCK_NB : 0) | ((op & em_LOCK_UN) ? LOCK_UN : 0); fd = descriptor_arg(d,0,"flock"); if (! fd) SYSCALL_ERR(em_EBADF); e = flock(fd->fd,osop); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(e); } /* * Implement mkfifo(2). */ static SYSCALL_IMPL(sc_mkfifo) { const char *path; NULTERM_STATUS nts; uint32_t mode; int e; path = nulterm_scarg(scarg(args,0),&nts); mode = scarg(args,1); e = mkfifo(path,mode); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement sendto(2). * * XXX Leverage io_rw maybe? */ static SYSCALL_IMPL(sc_sendto) { uint32_t d; uint32_t msgptr; uint32_t msglen; uint32_t emflags; uint32_t toptr; uint32_t tolen; FD *fd; MEMSEG *ms; int osflags; unsigned char *tbuf; const void *osbuf; GETSA sa; int osrv; d = scarg(args,0); msgptr = scarg(args,1); msglen = scarg(args,2); emflags = scarg(args,3); toptr = scarg(args,4); tolen = scarg(args,5); fd = descriptor_arg(d,P_W,"sendto"); if (! fd) SYSCALL_ERR(em_EBADF); ms = memseg_find(msgptr,0,"sendto"); if (! (ms->prot & P_R)) { printf("%d: sendto %08lx: not accessible\n",mypid,(ULI)msgptr); trc(TRC_ERR,"sendto %08lx: not accessible\n",(ULI)msgptr); top(); } if (ms->end-msgptr >= msglen) { (*ms->ops->check)(ms,msgptr-ms->base,msglen,P_R); osbuf = ms->data + (msgptr - ms->base); tbuf = 0; } else { tbuf = malloc(msglen); if (! tbuf) SYSCALL_ERR(em_ENOBUFS); copyin(tbuf,msgptr,msglen,"sendto dtaa",&free,tbuf); osbuf = tbuf; } if (tolen > 0) { sa = get_sockaddr(toptr,tolen); if (sa.err) { free(tbuf); SYSCALL_ERR(sa.err); } } else { sa.sa = 0; sa.salen = 0; } osflags = em2os_MSG_flags(emflags); osrv = sendto(fd->fd,osbuf,msglen,osflags,sa.sa,sa.salen); if (osrv < 0) { osrv = errno; free(tbuf); free(sa.sa); SYSCALL_ERR(osrv); } free(tbuf); free(sa.sa); SYSCALL_RET(osrv); } /* * Implement socketpair(2). */ static SYSCALL_IMPL(sc_socketpair) { uint32_t dom; uint32_t type; uint32_t proto; uint32_t fdsp; int osdom; int ostype; int osd[2]; uint32_t emd[2]; dom = scarg(args,0); type = scarg(args,1); proto = scarg(args,2); fdsp = scarg(args,3); switch (dom) { case em_AF_LOCAL: osdom = AF_LOCAL; break; case em_AF_INET: osdom = AF_INET; break; case em_AF_INET6: osdom = AF_INET6; break; default: printf("socketpair: unimplemented AF %lu\n",(ULI)dom); top(); //SYSCALL_ERR(em_EPROTONOSUPPORT); break; } // Fortunately, we can support the same set of types regardless of AF. switch (type) { case em_SOCK_STREAM: ostype = SOCK_STREAM; break; case em_SOCK_DGRAM: ostype = SOCK_DGRAM; break; default: printf("socketpair: unimplemented type %lu\n",(ULI)type); top(); //SYSCALL_ERR(em_ESOCKTNOSUPPORT); break; } if (proto != 0) { printf("socketpair: protocol != 0 -> EPROTONOSUPPORT\n"); SYSCALL_ERR(em_EPROTONOSUPPORT); } if (socketpair(osdom,ostype,0,&osd[0]) < 0) SYSCALL_ERR(os2em_errno(errno)); emd[0] = new_fd(osd[0],0,P_R|P_W); emd[1] = new_fd(osd[1],0,P_R|P_W); mem_set_4(fdsp,emd[0]); mem_set_4(fdsp+4,emd[1]); SYSCALL_RET(0); } /* * Implement mkdir(2). */ static SYSCALL_IMPL(sc_mkdir) { const char *path; NULTERM_STATUS nts; uint32_t mode; int e; path = nulterm_scarg(scarg(args,0),&nts); mode = scarg(args,1); if (mkdir(path,mode) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement rmdir(2). */ static SYSCALL_IMPL(sc_rmdir) { const char *path; NULTERM_STATUS nts; int e; path = nulterm_scarg(scarg(args,0),&nts); if (rmdir(path) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement utimes(2). */ static SYSCALL_IMPL(sc_utimes) { const char *path; NULTERM_STATUS nts; uint32_t tp; struct timeval t[2]; struct timeval *tvp; int e; path = nulterm_scarg(scarg(args,0),&nts); tp = scarg(args,1); if (tp == 0) { tvp = 0; } else { t[0].tv_sec = mem_get_8(tp); t[0].tv_usec = mem_get_4(tp+8); t[1].tv_sec = mem_get_8(tp+16); t[1].tv_usec = mem_get_4(tp+24); tvp = &t[0]; } if (utimes(path,tvp) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement statfs(2). * * There is a complication here. For some underlying OS versions, we * have to use statvfs() instead of statfs(); see the comment near the * head of this file, where STATFS_VIA_STATVFS and * GETFSSTAT_VIA_GETVFSSTAT are potentially set. */ static SYSCALL_IMPL(sc_statfs) { #ifdef STATFS_VIA_STATVFS struct statvfs sf; #define osCALL statvfs #define osCOPY store_statvfs_as_statfs #else struct statfs sf; #define osCALL statfs #define osCOPY store_statfs #endif uint32_t bufp; int e; NULTERM_STATUS nts; const char *path; path = nulterm_scarg(scarg(args,0),&nts); bufp = scarg(args,1); e = osCALL(path,&sf); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); osCOPY(bufp,&sf,"statfs",0,0); SYSCALL_RET(0); #undef osCALL #undef osCOPY } /* * Implement fstatfs(2). * * There is a complication here. For some underlying OS versions, we * have to use fstatvfs() instead of fstatfs(); see the comment near * the head of this file, where STATFS_VIA_STATVFS and * GETFSSTAT_VIA_GETVFSSTAT are potentially set. */ static SYSCALL_IMPL(sc_fstatfs) { #ifdef STATFS_VIA_STATVFS struct statvfs sf; #define osCALL fstatvfs #define osCOPY store_statvfs_as_statfs #else struct statfs sf; #define osCALL fstatfs #define osCOPY store_statfs #endif uint32_t d; uint32_t bufp; FD *fd; int e; d = scarg(args,0); bufp = scarg(args,1); fd = descriptor_arg(d,0,"fstatfs"); if (! fd) SYSCALL_ERR(em_EBADF); e = osCALL(fd->fd,&sf); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); osCOPY(bufp,&sf,"statfs",0,0); SYSCALL_RET(0); #undef osCALL #undef osCOPY } /* * Implement pread(2). * * The comment in sys_pread() in vfs_syscalls.c, appearing to give the * syscall arguments, lies. The truth, in syscallargs.h, has a pad * value between nbyte and offset. */ static SYSCALL_IMPL(sc_pread) { uint32_t d; IO_PRIV_RW priv; int n; syscall_restartable = 1; d = scarg(args,0); priv.iov.base = scarg(args,1); priv.iov.len = scarg(args,2); // 3 is unused padding priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5); trc(TRC_SYSCALL,"pread %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)priv.iov.base,(LI)(int32_t)priv.iov.len,(ULLI)priv.off); priv.fd = descriptor_arg(d,P_R,"pread"); if (! priv.fd) SYSCALL_ERR(em_EBADF); if (priv.iov.len < 1) { trc(TRC_SYSCALL,"pread -> 0\n"); SYSCALL_RET(0); } n = io_rw(1,&getiov_rw,P_W,&doio_pread,&priv,"pread"); if (n < 0) { n = os2em_errno(errno); trc(TRC_SYSCALL,"pread -> error %d (%s)\n",n,em_strerror(n)); SYSCALL_ERR(n); } trc(TRC_SYSCALL,"pread -> %d\n",n); trace_io_data_em("data",priv.iov.base,n); SYSCALL_RET(n); } /* * Implement pwrite(2). * * The comment in sys_pwrite() in vfs_syscalls.c, appearing to give the * syscall arguments, lies. The truth, in syscallargs.h, has a pad * value between nbyte and offset. */ static SYSCALL_IMPL(sc_pwrite) { uint32_t d; IO_PRIV_RW priv; int n; syscall_restartable = 1; d = scarg(args,0); priv.iov.base = scarg(args,1); priv.iov.len = scarg(args,2); // 3 is unused padding priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5); trc(TRC_SYSCALL,"pwrite %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)priv.iov.base,(LI)(int32_t)priv.iov.len,(ULLI)priv.off); priv.fd = descriptor_arg(d,P_W,"pwrite"); if (! priv.fd) SYSCALL_ERR(em_EBADF); if (priv.iov.len < 1) { trc(TRC_SYSCALL,"pwrite -> 0\n"); SYSCALL_RET(0); } n = io_rw(1,&getiov_rw,P_R,&doio_pwrite,&priv,"pwrite"); if (n < 0) { n = os2em_errno(errno); trc(TRC_SYSCALL,"pwrite -> error %d (%s)\n",n,em_strerror(n)); SYSCALL_ERR(n); } trc(TRC_SYSCALL,"pwrite -> %d\n",n); trace_io_data_em("data",priv.iov.base,n); SYSCALL_RET(n); } /* * Implement setgid(2). */ static SYSCALL_IMPL(sc_setgid) { uint32_t gid; gid = scarg(args,0); if (setgid(gid) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement setegid(2). */ static SYSCALL_IMPL(sc_setegid) { uint32_t gid; gid = scarg(args,0); if (setegid(gid) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement seteuid(2). */ static SYSCALL_IMPL(sc_seteuid) { uint32_t uid; uid = scarg(args,0); if (seteuid(uid) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement getrlimit(2). * * The 1.4T getrlimit manpage lies by omission. It does not list * EINVAL, which is generated if the resource ID value is invalid. */ static SYSCALL_IMPL(sc_getrlimit) { uint32_t res; uint32_t ptr; int osres; struct rlimit rl; res = scarg(args,0); ptr = scarg(args,1); switch (res) { case em_RLIMIT_CPU: osres = RLIMIT_CPU; break; case em_RLIMIT_FSIZE: osres = RLIMIT_FSIZE; break; case em_RLIMIT_DATA: osres = RLIMIT_DATA; break; case em_RLIMIT_STACK: osres = RLIMIT_STACK; break; case em_RLIMIT_CORE: osres = RLIMIT_CORE; break; case em_RLIMIT_RSS: osres = RLIMIT_RSS; break; case em_RLIMIT_MEMLOCK: osres = RLIMIT_MEMLOCK; break; case em_RLIMIT_NPROC: osres = RLIMIT_NPROC; break; case em_RLIMIT_NOFILE: osres = RLIMIT_NOFILE; break; default: SYSCALL_ERR(em_EINVAL); break; } if (getrlimit(osres,&rl) < 0) SYSCALL_ERR(os2em_errno(errno)); store_rlimit(ptr,&rl); SYSCALL_RET(0); } /* * Implement setrlimit(2). * * The 1.4T setrlimit manpage lies by omission. It does not list * EINVAL, which is generated if the resource ID value is invalid. */ static SYSCALL_IMPL(sc_setrlimit) { uint32_t res; uint32_t ptr; int osres; struct rlimit rl; res = scarg(args,0); ptr = scarg(args,1); switch (res) { case em_RLIMIT_CPU: osres = RLIMIT_CPU; break; case em_RLIMIT_FSIZE: osres = RLIMIT_FSIZE; break; case em_RLIMIT_DATA: osres = RLIMIT_DATA; break; case em_RLIMIT_STACK: osres = RLIMIT_STACK; break; case em_RLIMIT_CORE: osres = RLIMIT_CORE; break; case em_RLIMIT_RSS: osres = RLIMIT_RSS; break; case em_RLIMIT_MEMLOCK: osres = RLIMIT_MEMLOCK; break; case em_RLIMIT_NPROC: osres = RLIMIT_NPROC; break; case em_RLIMIT_NOFILE: osres = RLIMIT_NOFILE; break; default: SYSCALL_ERR(em_EINVAL); break; } load_rlimit(ptr,&rl); if (setrlimit(osres,&rl) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement mmap(2). */ static SYSCALL_IMPL(sc_mmap) { uint32_t addr; uint32_t len; uint32_t prot; uint32_t flags; uint32_t fd; uint64_t offset; unsigned int bwp; uint32_t end; void *mmrv; addr = scarg(args,0); len = scarg(args,1); prot = scarg(args,2); flags = scarg(args,3); fd = scarg(args,4); // 5 not used - padding offset = (scarg(args,6) * 0x100000000ULL) | scarg(args,7); if (flags & em_MAP_COPY) flags = (flags & ~em_MAP_COPY) | em_MAP_PRIVATE; if ((flags & (em_MAP_SHARED|em_MAP_PRIVATE)) == (em_MAP_SHARED|em_MAP_PRIVATE)) SYSCALL_ERR(em_EINVAL); if (len & 0x80000000) SYSCALL_ERR(em_EINVAL); bwp = offset & (PAGE_SIZE-1); offset -= bwp; len += bwp; len = ROUND_UP(len,PAGE_SIZE); if (len & 0x80000000) SYSCALL_ERR(em_EINVAL); if (flags & em_MAP_FIXED) { addr -= bwp; if (addr & (PAGE_SIZE-1)) SYSCALL_ERR(em_EINVAL); if (addr >= USRSTACK) SYSCALL_ERR(em_EINVAL); end = addr + len; if (end < addr) SYSCALL_ERR(em_EINVAL); } else { addr = find_space(MAXDSIZE,len,USRSTACK); } if (flags & em_MAP_ANON) { if (fd != -(uint32_t)1) SYSCALL_ERR(em_EINVAL); if (len == 0) SYSCALL_RET(addr); mmrv = mmap( 0, len, ((prot&em_PROT_READ)?PROT_READ:0) | ((prot&em_PROT_WRITE)?PROT_WRITE:0) | ((prot&em_PROT_EXEC)?PROT_EXEC:0), MAP_ANON | ((flags & em_MAP_SHARED) ? MAP_SHARED : 0) | ((flags & (em_MAP_PRIVATE|em_MAP_COPY)) ? MAP_PRIVATE : 0), -1, 0 ); if (mmrv == MAP_FAILED) SYSCALL_ERR(os2em_errno(errno)); } else { FD *f; if (offset % PAGE_SIZE) { printf("File mmap with offset not a multiple of %d not implemented\n",PAGE_SIZE); top(); } f = descriptor_arg(fd,0,"mmap"); if (! f) SYSCALL_ERR(em_EBADF); if ((prot & (em_PROT_READ|em_PROT_EXEC)) && !(f->prot & P_R)) SYSCALL_ERR(em_EACCES); if ((prot & em_PROT_WRITE) && (flags & em_MAP_SHARED) && !(f->prot & P_W)) SYSCALL_ERR(em_EACCES); if (len == 0) SYSCALL_RET(addr); mmrv = mmap( 0, len, ((prot&em_PROT_READ)?PROT_READ:0) | ((prot&em_PROT_WRITE)?PROT_WRITE:0) | ((prot&em_PROT_EXEC)?PROT_EXEC:0), MAP_FILE | ((flags & em_MAP_SHARED) ? MAP_SHARED : 0) | ((flags & (em_MAP_PRIVATE|em_MAP_COPY)) ? MAP_PRIVATE : 0), f->fd, offset ); if (mmrv == MAP_FAILED) SYSCALL_ERR(os2em_errno(errno)); } memseg_clear_conflict(addr,len,memseg_new_mmap(addr,len,((prot&em_PROT_READ)?P_R:0)|((prot&em_PROT_WRITE)?P_W:0)|((prot&em_PROT_EXEC)?P_X:0),flags,mmrv)); vm_changed = 1; SYSCALL_RET(addr); } /* * Implement lseek(2). */ static SYSCALL_IMPL(sc_lseek) { uint32_t d; uint64_t off; uint32_t whence; FD *fd; off_t e; int oswhence; d = scarg(args,0); // args[1] is unused padding off = (((uint64_t)scarg(args,2)) << 32) | scarg(args,3); whence = scarg(args,4); fd = descriptor_arg(d,0,"lseek"); if (! fd) SYSCALL_ERR(em_EBADF); switch (whence) { case em_SEEK_SET: oswhence = SEEK_SET; break; case em_SEEK_CUR: oswhence = SEEK_CUR; break; case em_SEEK_END: oswhence = SEEK_END; break; default: SYSCALL_ERR(em_EINVAL); break; } e = lseek(fd->fd,off,oswhence); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET2((e>>16)>>16,e); } /* * Implement ftruncate(2). */ static SYSCALL_IMPL(sc_ftruncate) { uint32_t d; uint64_t off; FD *fd; d = scarg(args,0); // args[1] is unused padding off = (((uint64_t)scarg(args,2)) << 32) | scarg(args,3); fd = descriptor_arg(d,P_W,"ftruncate"); if (! fd) SYSCALL_ERR(em_EBADF); if (ftruncate(fd->fd,off) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement __sysctl, the syscall behind sysctl(3). */ static SYSCALL_IMPL(sc___sysctl) { uint32_t mibp; uint32_t v; int nmib; uint32_t mib[16]; int i; // MIB length in range? v = scarg(args,1); if ((v < 2) || (v > 16)) { trc(TRC_SYSCALL,"sysctl: MIB length %lu not in range 2..16 -> EINVAL\n",(ULI)v); SYSCALL_ERR(em_EINVAL); } nmib = v; // We don't support setting (yet) if (scarg(args,4)) { trc(TRC_SYSCALL,"sysctl: setting not yet supported -> EPERM\n"); SYSCALL_ERR(em_EPERM); } // Read MIB mibp = scarg(args,0); for (i=nmib-1;i>=0;i--) mib[i] = mem_get_4(mibp+(i<<2)); trace_io_data_em("MIB",mibp,nmib*4); // Do it! switch (mib[0]) { case em_CTL_KERN: if (em_sysctl_kern(mib+1,nmib-1,scarg(args,2),scarg(args,3),rv)) return; break; case em_CTL_VM: if (em_sysctl_vm(mib+1,nmib-1,scarg(args,2),scarg(args,3),rv)) return; break; case em_CTL_HW: if (em_sysctl_hw(mib+1,nmib-1,scarg(args,2),scarg(args,3),rv)) return; break; } printf("Unsupported sysctl "); for (i=0;ifd,tvp) < 0) SYSCALL_ERR(os2em_errno(errno)); SYSCALL_RET(0); } /* * Implement poll(2). */ static SYSCALL_IMPL(sc_poll) { uint32_t epfds; uint32_t npfds; uint32_t timeout; uint32_t *fdv = 0; uint16_t *evv = 0; struct pollfd *pfds = 0; unsigned char *inval = 0; int i; uint32_t efd; FD *fd; uint16_t ev; uint32_t base; int ninval; int retv; epfds = scarg(args,0); npfds = scarg(args,1); timeout = scarg(args,2); // Would npfds*sizeof(struct pollfds) overflow? ENOMEM. if ( (npfds >= 0x20000000) || // emulated (npfds >= ((~(size_t)0) / sizeof(struct pollfd))) // emulator ) SYSCALL_ERR(em_ENOMEM); pfds = malloc(npfds*sizeof(struct pollfd)); inval = malloc(npfds); fdv = malloc(npfds*sizeof(uint32_t)); evv = malloc(npfds*sizeof(uint16_t)); #define FREETHINGS() do { free(pfds); free(inval); free(fdv); free(evv); } while (0) if (!pfds || !inval || !fdv || !evv) { FREETHINGS(); SYSCALL_ERR(em_ENOMEM); } ninval = 0; for (i=0;i ",i,(LI)(int32_t)efd,fd->fd,(ULI)ev); inval[i] = 0; pfds[i].fd = fd->fd; if (trc_if(TRC_SYSCALL)) { const char *pref; pref = ""; #define BIT(n) do { if (ev & em_##n) { trc(TRC_SYSCALL,"%s%s",pref,#n); pref = "|"; } } while (0) BIT(POLLIN); BIT(POLLPRI); BIT(POLLOUT); BIT(POLLRDNORM); BIT(POLLWRNORM); BIT(POLLRDBAND); BIT(POLLWRBAND); #undef BIT if (ev & ~(em_POLLIN|em_POLLPRI|em_POLLOUT|em_POLLRDNORM| em_POLLWRNORM|em_POLLRDBAND|em_POLLWRBAND)) { trc(TRC_SYSCALL,"%s0x%lx",pref, (ULI)(ev & ~(em_POLLIN|em_POLLPRI|em_POLLOUT|em_POLLRDNORM| em_POLLWRNORM|em_POLLRDBAND|em_POLLWRBAND)) ); pref = "|"; } if (! pref[0]) trc(TRC_SYSCALL,"0"); trc(TRC_SYSCALL,"\n"); } pfds[i].events = ((ev & em_POLLIN) ? POLLIN : 0) | ((ev & em_POLLPRI) ? POLLPRI : 0) | ((ev & em_POLLOUT) ? POLLOUT : 0) | ((ev & em_POLLRDNORM) ? POLLRDNORM : 0) | ((ev & em_POLLWRNORM) ? POLLWRNORM : 0) | ((ev & em_POLLRDBAND) ? POLLRDBAND : 0) | ((ev & em_POLLWRBAND) ? POLLWRBAND : 0); } } if (ninval) { for (i=0;ifd,osbuf,size); if (e < 0) SYSCALL_ERR(os2em_errno(errno)); oso = 0; emo = 0; left = size; while (oso < e) { if (oso+_DIRENT_MINSIZE(osde) > e) panic("getdents() partial (min)"); osde = (void *)(osbuf+oso); if (oso+osde->d_namlen > e) panic("getdents() partial (actual)"); if (osde->d_namlen > 255) { printf("getdents: d_namlen %d > 255, skipping entry\n",(int)osde->d_namlen); } else { l = 8 + ROUND_UP(osde->d_namlen+1,4); if (left < l) panic("getdents() overrun"); mem_set_4(buf+emo,osde->d_fileno); mem_set_2(buf+emo+4,l); switch (osde->d_type) { case DT_UNKNOWN: dt = em_DT_UNKNOWN; break; case DT_FIFO: dt = em_DT_FIFO; break; case DT_CHR: dt = em_DT_CHR; break; case DT_DIR: dt = em_DT_DIR; break; case DT_BLK: dt = em_DT_BLK; break; case DT_REG: dt = em_DT_REG; break; case DT_LNK: dt = em_DT_LNK; break; case DT_SOCK: dt = em_DT_SOCK; break; case DT_WHT: dt = em_DT_WHT; break; default: dt = em_DT_UNKNOWN; break; } mem_set_1(buf+emo+6,dt); mem_set_1(buf+emo+7,osde->d_namlen); copyout(&osde->d_name[0],buf+emo+8,osde->d_namlen,"getdents",&free,osbuf); copyout(&nulbuf[0],buf+emo+8+osde->d_namlen,l-(8+osde->d_namlen),"getdents",&free,osbuf); emo += l; } oso += osde->d_reclen; } SYSCALL_RET(emo); } /* * Implement lchmod(2). */ static SYSCALL_IMPL(sc_lchmod) { const char *path; NULTERM_STATUS nts; uint32_t mode; int e; path = nulterm_scarg(scarg(args,0),&nts); mode = scarg(args,1); if (lchmod(path,mode) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement lchown(2). */ static SYSCALL_IMPL(sc_lchown) { const char *path; NULTERM_STATUS nts; uint32_t eu; uint32_t eg; int e; path = nulterm_scarg(scarg(args,0),&nts); eu = scarg(args,1); eg = scarg(args,2); if (lchown(path,(eu==-(uint32_t)1)?-1:eu,(eg==-(uint32_t)1)?-1:eg) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement lutimes(2). */ static SYSCALL_IMPL(sc_lutimes) { const char *path; NULTERM_STATUS nts; uint32_t tp; struct timeval t[2]; struct timeval *tvp; int e; path = nulterm_scarg(scarg(args,0),&nts); tp = scarg(args,1); if (tp == 0) { tvp = 0; } else { t[0].tv_sec = mem_get_8(tp); t[0].tv_usec = mem_get_4(tp+8); t[1].tv_sec = mem_get_8(tp+16); t[1].tv_usec = mem_get_4(tp+24); tvp = &t[0]; } if (lutimes(path,tvp) < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); SYSCALL_RET(0); } /* * Implement __stat13, versioned stat(2). */ static SYSCALL_IMPL(sc___stat13) { const char *path; uint32_t stp; struct stat stb; int e; NULTERM_STATUS nts; path = nulterm_scarg(scarg(args,0),&nts); stp = scarg(args,1); e = stat(path,&stb); if (e < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); store_stat(stp,&stb); SYSCALL_RET(0); } /* * Implement __fstat13, versioned fstat(2). */ static SYSCALL_IMPL(sc___fstat13) { uint32_t d; uint32_t stp; struct stat stb; FD *fd; d = scarg(args,0); stp = scarg(args,1); fd = descriptor_arg(d,0,"__fstat13"); if (! fd) SYSCALL_ERR(em_EBADF); if (fstat(fd->fd,&stb) < 0) panic("impossible fstat failure"); store_stat(stp,&stb); SYSCALL_RET(0); } /* * Implement __lstat13, versioned lstat(2). */ static SYSCALL_IMPL(sc___lstat13) { const char *path; uint32_t stp; struct stat stb; int e; NULTERM_STATUS nts; path = nulterm_scarg(scarg(args,0),&nts); stp = scarg(args,1); e = lstat(path,&stb); if (e < 0) { e = errno; nulterm_done(&nts); SYSCALL_ERR(os2em_errno(e)); } nulterm_done(&nts); store_stat(stp,&stb); SYSCALL_RET(0); } /* * Implement __sigaltstack14, versioned sigaltstack(2). * * We don't yet support on-signal-stack signal delivery. */ static SYSCALL_IMPL(sc___sigaltstack14) { uint32_t ssp; uint32_t ossp; uint32_t ss_base; uint32_t ss_size; uint32_t ss_flags; ssp = scarg(args,0); ossp = scarg(args,1); if (ssp) { ss_base = mem_get_4(ssp); ss_size = mem_get_4(ssp+4); ss_flags = mem_get_4(ssp+8); } if (ossp) { mem_set_4(ossp,s.sigstack_base); mem_set_4(ossp+4,s.sigstack_size); mem_set_4(ossp+8,(s.onsigstack?em_SS_ONSTACK:0)|(s.sigstack_enabled?0:em_SS_DISABLE)); } if (! ssp) SYSCALL_RET(0); if (ss_flags & ~(uint32_t)em_SS_ALLBITS) SYSCALL_ERR(em_EINVAL); if (ss_flags & em_SS_DISABLE) { if (s.onsigstack) SYSCALL_ERR(em_EINVAL); } else { if (ss_size < em_MINSIGSTKSZ) SYSCALL_ERR(em_ENOMEM); } s.sigstack_enabled = ! (ss_flags & em_SS_DISABLE); if (ss_flags & em_SS_ONSTACK) { printf("Signal stack support incomplete\n"); top(); } s.sigstack_base = ss_base; s.sigstack_size = ss_size; SYSCALL_RET(0); } /* * Implement __vfork14, versioned vfork(2). * * See the comment on sc_fork for a discussion of return value * semantics. * * We have to help the OS a bit. Most of the semantics of vfork are * taken care of by the underlying OS. But there are some cases where * OS state can be changed and, under real 1.4T, would be handled * entirely by the kernel, but in our case has traces in userland * memory (which thus survive the return to the parent). The only * example at this writing is file descriptors, which correspond to * not only OS file descriptors but small pieces of memory as well. * This is why VFORKBACKOUT exists. * * Furthermore, we can't return from here in a vforked child, or we'll * trash the parent's stack (our stack, not the emulated machine's * stack). So the actual OS-level vfork has to happen all the way up * in run(). * * We could, maybe, arrange to share emulated VM with an OS child * created with fork() rather than vfork(), but then we have the * problem of breaking that association when doing an emulated fork(). * * In order to leverage dosyscall()'s logic, though, run() arranges to * reenter us to handle syscall return. Keeping track of the control * flow is what vfork_stage is for. It is VFORK_NONE during normal * operation. We set it to VFORK_START before returning the first * time. run() then sets it to VFORK_FAIL or VFORK_SUCCESS before we * are reentered, with vfork_value set to the errno (VFORK_FAIL) or * PID (VFORK_SUCCESS) to return. */ static SYSCALL_IMPL(sc___vfork14) { trc(TRC_VFORK,"%s entered, stage = %d (%s)\n",__func__,(int)vfork_stage,vfork_stage_str(vfork_stage)); switch (vfork_stage) { case VFORK_NONE: window_flush(); vfork_stage = VFORK_START; alert_run = 1; s.npc = s.pc; s.pc = s.xa; rv->flags |= SCRV_BYPASS; return; break; case VFORK_FAIL: vfork_stage = VFORK_NONE; SYSCALL_ERR(vfork_value); break; case VFORK_SUCCESS: vfork_stage = VFORK_NONE; if (vfork_value) { trc(TRC_PROC,"vfork parent, child %lu\n",(ULI)vfork_value); } else { trc(TRC_PROC,"vfork child, parent %lu\n",(ULI)getppid()); } SYSCALL_RET2(vfork_value?:0,!vfork_value); break; default: panic("invalid vfork stage %d in %s",(int)vfork_stage,__func__); break; } } /* * Implement preadv(2). * * The comment in sys_preadv() in vfs_syscalls.c, appearing to give the * syscall arguments, lies. The truth, in syscallargs.h, has a pad * value between iovcnt and offset. */ static SYSCALL_IMPL(sc_preadv) { uint32_t d; IO_PRIV_RWV priv; int n; int i; uint32_t p; uint32_t iovbase; syscall_restartable = 1; d = scarg(args,0); iovbase = scarg(args,1); priv.niov = scarg(args,2); // 3 is unused padding priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5); trc(TRC_SYSCALL,"preadv %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)iovbase,(LI)(int32_t)priv.niov,(ULLI)priv.off); priv.fd = descriptor_arg(d,P_R,"preadv"); if (! priv.fd) SYSCALL_ERR(em_EBADF); if (priv.niov < 1) { trc(TRC_SYSCALL,"preadv -> 0\n"); SYSCALL_RET(0); } if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL); priv.iov = malloc(priv.niov*sizeof(*priv.iov)); p = iovbase; for (i=0;i error %d (%s)\n",n,em_strerror(n)); free(priv.iov); SYSCALL_ERR(n); } trc(TRC_SYSCALL,"preadv -> %d\n",n); if (io_trace_size) { int left; int nt; int x; left = n; if (left > io_trace_size) left = io_trace_size; x = 0; while (left > 0) { nt = left; if (nt > priv.iov[x][1]) nt = priv.iov[x][1]; trace_io_data_em("data",priv.iov[x][0],nt); left -= nt; x ++; } } free(priv.iov); SYSCALL_RET(n); } /* * Implement pwritev(2). * * The comment in sys_pwritev() in vfs_syscalls.c, appearing to give * the syscall arguments, lies. The truth, in syscallargs.h, has a * pad value between iovcnt and offset. */ static SYSCALL_IMPL(sc_pwritev) { uint32_t d; IO_PRIV_RWV priv; int n; int i; uint32_t p; uint32_t iovbase; syscall_restartable = 1; d = scarg(args,0); iovbase = scarg(args,1); priv.niov = scarg(args,2); // 3 is unused padding priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5); trc(TRC_SYSCALL,"pwritev %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)iovbase,(LI)(int32_t)priv.niov,(ULLI)priv.off); priv.fd = descriptor_arg(d,P_W,"pwritev"); if (! priv.fd) SYSCALL_ERR(em_EBADF); if (priv.niov < 1) { trc(TRC_SYSCALL,"pwritev -> 0\n"); SYSCALL_RET(0); } if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL); priv.iov = malloc(priv.niov*sizeof(*priv.iov)); p = iovbase; for (i=0;i error %d (%s)\n",n,em_strerror(n)); free(priv.iov); SYSCALL_ERR(n); } trc(TRC_SYSCALL,"pwritev -> %d\n",n); if (io_trace_size) { int left; int nt; int x; left = n; if (left > io_trace_size) left = io_trace_size; x = 0; while (left > 0) { nt = left; if (nt > priv.iov[x][1]) nt = priv.iov[x][1]; trace_io_data_em("data",priv.iov[x][0],nt); left -= nt; x ++; } } free(priv.iov); SYSCALL_RET(n); } /* * Implement __sigaction14, versioned sigaction(2). */ static SYSCALL_IMPL(sc___sigaction14) { uint32_t sig; uint32_t act; uint32_t oact; uint32_t flags; uint32_t handler; int i; EMSIGSET mask; sig = scarg(args,0); act = scarg(args,1); oact = scarg(args,2); if ((sig < 1) || (sig >= em__NSIG)) SYSCALL_ERR(em_EINVAL); if (act) { handler = mem_get_4(act); for (i=0;i<4;i++) mask.bits[i] = mem_get_4(act+4+(i<<2)); flags = mem_get_4(act+20); if (flags & em_SA_ONSTACK) { printf("Signal stack support incomplete\n"); top(); } } if (oact) { mem_set_4(oact,s.sigh[sig].handler); for (i=0;i<4;i++) mem_set_4(oact+4+(i*4),s.sigh[sig].mask.bits[i]); mem_set_4(oact+20,s.sigh[sig].flags); } if (act) { switch (sig) { case em_SIGKILL: case em_SIGSTOP: if (handler != em_SIG_DFL) SYSCALL_ERR(em_EINVAL); break; } if (trc_if(TRC_SIGNAL)) { FILE *f; int j; const char *pref; const char *sn; SYM *hsym; uint32_t fv; f = trc_f(TRC_SIGNAL); fprintf(f,"installing handler %08lx",(ULI)handler); switch (handler) { case em_SIG_DFL: fprintf(f," (SIG_DFL)"); break; case em_SIG_IGN: fprintf(f," (SIG_IGN)"); break; case em_SIG_ERR: fprintf(f," (SIG_ERR)"); break; default: hsym = lookup_fxn(handler); if (hsym) fprintf(f," (%s)",hsym->name); break; } fprintf(f," for %lu",(ULI)sig); sn = em_signame(sig,0); if (sn) fprintf(f," (%s)",sn); fprintf(f,", mask "); print_em_sig_mask(f,&mask); fprintf(f," flags %08lx",(ULI)flags); pref = " ("; fv = flags; for (j=0;sigaction_flags[j].name;j++) { if (fv & sigaction_flags[j].bit) { fprintf(f,"%s%s",pref,sigaction_flags[j].name); pref = "|"; fv &= ~sigaction_flags[j].bit; } } if (pref[0] != ' ') { if (fv) fprintf(f,"|%lx",(ULI)fv); fprintf(f,")"); } else { if (fv) fprintf(f," (unrecognized)"); } fprintf(f,"\n"); } s.sigh[sig].handler = handler; s.sigh[sig].mask = mask; s.sigh[sig].flags = flags; if ( (handler == em_SIG_IGN) || ((handler == em_SIG_DFL) && (sigdef[sig] == SIGDEF_IGNORE)) ) { if (! (s.ignsigs & (1ULL << sig))) { s.ignsigs |= 1ULL << sig; if (handler == em_SIG_IGN) { trc(TRC_SIGNAL,"emulator now setting %s ignored\n",em_signame(sig,"(unknown)")); set_our_catcher(em2os_signal(sig),SIG_IGN); } else { trc(TRC_SIGNAL,"emulator now setting %s default\n",em_signame(sig,"(unknown)")); set_our_catcher(em2os_signal(sig),SIG_DFL); } } s.sigpend[sig] = 0; } else { if (s.ignsigs & (1ULL << sig)) { s.ignsigs &= ~(1ULL << sig); trc(TRC_SIGNAL,"emulator now catching %s\n",em_signame(sig,"(unknown)")); set_our_catcher(em2os_signal(sig),&catch_signal); } } } SYSCALL_RET(0); } /* * Implement __sigprocmask14, versioned sigprocmask(2). */ static SYSCALL_IMPL(sc___sigprocmask14) { uint32_t how; uint32_t set; uint32_t oset; uint64_t mask; how = scarg(args,0); set = scarg(args,1); oset = scarg(args,2); if (oset) { mem_set_4(oset,s.sigmask>>1); mem_set_4(oset+4,s.sigmask>>33); mem_set_4(oset+8,0); mem_set_4(oset+12,0); } if (set) { switch (how) { default: SYSCALL_ERR(em_EINVAL); break; case em_SIG_BLOCK: case em_SIG_UNBLOCK: case em_SIG_SETMASK: break; } mask = mem_get_4(set); mask |= mem_get_4(set+4) * 0x100000000ULL; mask <<= 1; mask &= SIG_ALLMASK; switch (how) { case em_SIG_BLOCK: s.sigmask |= mask; break; case em_SIG_UNBLOCK: s.sigmask &= ~mask; break; case em_SIG_SETMASK: s.sigmask = mask; break; } s.sigmask &= SIG_CANBLOCK; if (trc_if(TRC_SIGNAL)) { FILE *f; f = trc_f(TRC_SIGNAL); fprintf(f,"signal mask now "); if (s.sigmask == 0) { fprintf(f,"empty"); } else { int i; const char *pref; pref = ""; mask = s.sigmask; for (i=1;i> i) & 1U) { const char *n; n = em_signame(i,0); if (n) fprintf(f,"%s%s",pref,n); else fprintf(f,"%s?%d",pref,i); mask &= ~(((uint64_t)1) << i); pref = "|"; } } if (mask) fprintf(f,"+0x%llx",(ULLI)mask); } fprintf(f,"\n"); } alert_run = 1; } SYSCALL_RET(0); } /* * Implement __sigsuspend14, versioned sigsuspend(2). * * There is a bit of a problem here. On real hardware, the syscall * returns and signal delivery happens immediately after (before * userland can do much with the syscall return); the signal handler * runs between the syscall returning to userland and the libc stub * returning to its caller. * * But, here, simply doing this naïvely, calling deliver_signals() * here, does not work. When we do that, signal delivery saves - and * restores - userland state that does not include the syscall return; * syscall return then mangles the machine state set up by signal * delivery. Arguably ideal would be to return from here and let * run()'s check for signals handle delivery. But it's complicated * enough to tell whether deliver_signals() would actually deliver * anything that it's difficult to do. So, instead, we replace the * usual SYSCALL_ERR(em_EINTR) with frobbing of the first-saved signal * state, so that when the first-delivered signal's handler returns it * restores the return-to-userland state. This is why deliver_signals * takes an argument: so that we can fiddle it here. * * Also, there is a race here. If implemented naïvely, as just * while (1) * { if (anysigpend && deliver_signals(&fp) && fp) break; * sigemptyset(&osmask); * sigsuspend(&osmask); * } * then a signal could arrive between checking anysigpend and entering * sigsuspend. So we burn two more syscalls to block signals during * the critical part of that test. */ static SYSCALL_IMPL(sc___sigsuspend14) { uint32_t emset; uint64_t oldmask; sigset_t osmask; uint32_t fp; sigset_t osblock; sigset_t osold; EMSIGSET emmask; FILE *f; emset = scarg(args,0); oldmask = s.sigmask; emmask.bits[0] = mem_get_4(emset); emmask.bits[1] = mem_get_4(emset+4); emmask.bits[2] = mem_get_4(emset+8); emmask.bits[3] = mem_get_4(emset+12); f = trc_f(TRC_SIGNAL); if (f) { fprintf(f,"__sigsuspend14 entry, mask = "); print_em_sig_mask(f,&emmask); fprintf(f,"\n"); } s.sigmask = ((emmask.bits[0] | (((uint64_t)emmask.bits[1]) << 32)) << 1) & SIG_CANBLOCK; trc(TRC_SIGNAL,"__sigsuspend14 entering loop\n"); sigfillset(&osblock); sigemptyset(&osold); // XXX API botch sigprocmask(SIG_BLOCK,&osblock,&osold); while (1) { if (anysigpend) { trc(TRC_SIGNAL,"__sigsuspend14 noticing anysigpend\n"); if (deliver_signals(&fp)) { trc(TRC_SIGNAL,"__sigsuspend14 deliver_signals returned true\n"); if (fp) { trc(TRC_SIGNAL,"__sigsuspend14 breaking from loop\n"); break; } } } sigemptyset(&osmask); trc(TRC_SIGNAL,"__sigsuspend14 calling underlying sigsuspend\n"); sigsuspend(&osmask); trc(TRC_SIGNAL,"__sigsuspend14 underlying sigsuspend returned\n"); } sigprocmask(SIG_SETMASK,&osold,0); trc(TRC_SIGNAL,"__sigsuspend14 out of loop\n"); /* * Error return sets CC_C and leaves the errno in %o0, and we want to * reset the signal mask to oldmask. So, fiddle the * delivered-signal saved state correspondingly. */ mem_set_4(fp+20,oldmask>>1); // sf.sf_sc.__sc_mask13 mem_set_4(fp+36,mem_get_4(fp+36)|(em_PSR_CC_C<>1); // sf.sf_sc.sc_mask, first word mem_set_4(fp+52,oldmask>>33); // sf.sf_sc.sc_mask, second word rv->flags |= SCRV_BYPASS; } /* * Implement __sigreturn14, versioned sigreturn(2). * * There are subtleties here, mostly bearing the use of this in the * implementation of longjmp. On return, real hardware reloads the * ins and locals (%i* and %l*) from the register save area based off * the restored %sp. This is important to get them right when jumping * up the stack. In a real kernel it happens as part of syscall * return; here we have to do it manually. But, in case the longjmp * doesn't switch stack frames, we have to make sure the current * values are in the register save area. * * But first, we want to flush all windows to the stack, so we can give * a clean environment to the returned-to code. We actually need to, * for once, push the current window to the stack too, in case that's * the window being restored. The kernel gets this for free, because * the syscall implementation runs in a window inner to the innermost * user window. */ static SYSCALL_IMPL(sc___sigreturn14) { uint32_t ctx; uint32_t pc; uint32_t npc; uint32_t psr; uint32_t g1; uint32_t o0; uint32_t sp; uint32_t maskl; uint32_t maskh; int i; window_flush(); // does a save_cwindow() spill_window(s.cwp); s.iwp = cwp_r(s.cwp); ctx = scarg(args,0); trc(TRC_SYSCALL,"__sigreturn14 restoring from ctx=%08lx\n",(ULI)ctx); sp = mem_get_4(ctx+8); pc = mem_get_4(ctx+12); npc = mem_get_4(ctx+16); psr = mem_get_4(ctx+20); g1 = mem_get_4(ctx+24); o0 = mem_get_4(ctx+28); maskl = mem_get_4(ctx+32); maskh = mem_get_4(ctx+36); if ((pc | npc) & 3) SYSCALL_ERR(em_EINVAL); s.cc = psr_to_cc(psr); if (psr & em_PSR_EF) s.flags |= SF_FPU; s.pc = pc; s.npc = npc; s.regs[R_G1] = g1; s.regs[R_O0] = o0; s.regs[R_SP] = sp; // no sigstack; on-stack delivery not implemented s.sigmask = (((((uint64_t)maskh) << 32) | maskl) << 1) & SIG_CANBLOCK; if (trc_if(TRC_SYSCALL)) { FILE *f; f = trc_f(TRC_SYSCALL); fprintf(f,"__sigreturn14 restored pc %08lx npc %08lx g1 %08lx o0 %08lx sp %08lx cc ", (ULI)s.pc,(ULI)s.npc, (ULI)s.regs[R_G1], (ULI)s.regs[R_O0], (ULI)s.regs[R_SP]); print_cc(f,s.cc); fprintf(f,"\n"); } for (i=0;i<8;i++) s.regs[R_L0+i] = mem_get_4(sp+(i*4)); for (i=0;i<8;i++) s.regs[R_I0+i] = mem_get_4(sp+32+(i*4)); rv->flags |= SCRV_BYPASS; } /* * Implement __getcwd, the syscall behind getcwd(3). * * __getcwd's API is not documented (I got it by UTSLing); I see no * good way to implement it in terms of getcwd(3), so we call the * underlying OS's __getcwd(). Ugh. * * __getcwd has no declaration visible outside libc. So, we declare it * ourselves. Double ugh. */ extern int __getcwd(char *, size_t); static SYSCALL_IMPL(sc___getcwd) { uint32_t bufp; uint32_t len; char *osbuf; int e; bufp = scarg(args,0); len = scarg(args,1); osbuf = malloc(len?:1); if (! osbuf) { printf("Out of memory allocating __getcwd() buffer\n"); top(); } e = __getcwd(osbuf,len); if (e < 0) { e = errno; free(osbuf); SYSCALL_ERR(os2em_errno(e)); } if (e > len) panic("impossible __getcwd return"); copyout(osbuf,bufp,e,"__getcwd",&free,osbuf); free(osbuf); SYSCALL_RET(e); } #define F(n) [em_SYS_##n] = &sc_##n static void (*sysent_fn[])(SCARGS *, SCRV *) = { F(exit), // 1 F(fork), // 2 F(read), // 3 F(write), // 4 F(open), // 5 F(close), // 6 F(wait4), // 7 F(link), // 9 F(unlink), // 10 F(chdir), // 12 F(fchdir), // 13 F(chmod), // 15 F(chown), // 16 F(break), // 17 F(getfsstat), // 18 F(getpid), // 20 F(setuid), // 23 F(getuid), // 24 F(geteuid), // 25 F(recvfrom), // 29 F(access), // 33 F(fchflags), // 35 F(kill), // 37 F(getppid), // 39 F(dup), // 41 F(pipe), // 42 F(getegid), // 43 F(getgid), // 47 F(__getlogin), // 49 F(ioctl), // 54 F(revoke), // 56 F(symlink), // 57 F(readlink), // 58 F(execve), // 59 F(umask), // 60 F(munmap), // 73 F(madvise), // 75 F(getgroups), // 79 F(setgroups), // 80 F(getpgrp), // 81 F(setpgid), // 82 F(setitimer), // 83 F(dup2), // 90 F(fcntl), // 92 F(select), // 93 F(fsync), // 95 F(setpriority), // 96 F(socket), // 97 F(connect), // 98 F(getpriority), // 100 F(setsockopt), // 105 F(gettimeofday), // 116 F(getrusage), // 117 F(getsockopt), // 118 F(readv), // 120 F(writev), // 121 F(fchown), // 123 F(fchmod), // 124 F(rename), // 128 F(flock), // 131 F(mkfifo), // 132 F(sendto), // 133 F(socketpair), // 135 F(mkdir), // 136 F(rmdir), // 137 F(utimes), // 138 F(statfs), // 157 F(fstatfs), // 158 F(pread), // 173 F(pwrite), // 174 F(setgid), // 181 F(setegid), // 182 F(seteuid), // 183 F(getrlimit), // 194 F(setrlimit), // 195 F(mmap), // 197 F(lseek), // 199 F(ftruncate), // 201 F(__sysctl), // 202 F(futimes), // 206 F(poll), // 209 F(nanosleep), // 240 F(getdents), // 272 F(lchmod), // 274 F(lchown), // 275 F(lutimes), // 276 F(__stat13), // 278 F(__fstat13), // 279 F(__lstat13), // 280 F(__sigaltstack14), // 281 F(__vfork14), // 282 F(preadv), // 289 F(pwritev), // 290 F(__sigaction14), // 291 F(__sigprocmask14), // 293 F(__sigsuspend14), // 294 F(__sigreturn14), // 295 F(__getcwd), // 296 [0] = 0 }; #undef F /* * Perform a syscall. This is called when a trap instruction specifies * the code that means "do a syscall". We set up an SCARGS for any * arguments, extract the call number, handle __syscall here (we don't * cascade __syscall references), trace the arguments, perform the * call, trace the return values or error, and return by whichever * method is appropriate. */ static void dosyscall(uint32_t id) { SCARGS args; uint32_t callno; SCRV rv; uint32_t g2; uint32_t g7; void (*fn)(SCARGS *, SCRV *); FILE *f; g2 = s.regs[R_G2]; g7 = s.regs[R_G7]; args.nreg = 6; args.regs[0] = s.regs[R_O0]; args.regs[1] = s.regs[R_O1]; args.regs[2] = s.regs[R_O2]; args.regs[3] = s.regs[R_O3]; args.regs[4] = s.regs[R_O4]; args.regs[5] = s.regs[R_O5]; args.sp = s.regs[R_SP]; callno = id & ~(em_SYSCALL_G2RFLAG | em_SYSCALL_G7RFLAG); trc(TRC_SYSCALL,"(%llu) syscall CALL %d (",s.instrs,callno); if (callno == em_SYS___syscall) { callno = args.regs[1]; args.regs[0] = args.regs[2]; args.regs[1] = args.regs[3]; args.regs[2] = args.regs[4]; args.regs[3] = args.regs[5]; args.nreg = 4; trc(TRC_SYSCALL,"__syscall -> %d (",callno); if ((callno >= nsysent) || !sysent[callno].name) { trc(TRC_SYSCALL,"?)"); } else { trc(TRC_SYSCALL,"%s)",sysent[callno].name); } } else { if ((callno >= nsysent) || !sysent[callno].name) { trc(TRC_SYSCALL,"?"); } else { trc(TRC_SYSCALL,"%s",sysent[callno].name); } } f = trc_f(TRC_SYSCALL); if (f) { fprintf(f,") ("); print_syscall_values(f,sysent[callno].args,&args); fprintf(f,")\n"); } rv.err = 0; rv.flags = ((id & em_SYSCALL_G2RFLAG) ? SCRV_G2R : 0) | ((id & em_SYSCALL_G7RFLAG) ? SCRV_G7R : 0); if (callno >= nsysent) { printf("Unknown syscall %08lx\n",(ULI)callno); top(); } fn = sysent_fn[callno]; if (! fn) { printf("Unknown syscall %08lx\n",(ULI)callno); top(); } syscall_restartable = 0; (*fn)(&args,&rv); if (rv.flags & SCRV_BYPASS) { trc(TRC_SYSCALL,"(%llu) syscall BYPASS\n",s.instrs); return; } trc(TRC_SYSCALL,"(%llu) syscall RET ",s.instrs); if (rv.err == 0) { f = trc_f(TRC_SYSCALL); if (f) { fprintf(f,"success"); if (sysent[callno].rv[0] != 'V') { SCARGS a; a.regs[0] = rv.rv; a.regs[1] = rv.rv2; a.sp = 0; a.nreg = 2; fprintf(f," "); print_syscall_values(f,sysent[callno].rv,&a); } fprintf(f,", returning to "); } if (rv.flags & SCRV_G2R) { s.pc = g2; s.npc = s.pc + 4; trc(TRC_SYSCALL,"%%g2\n"); } else if (rv.flags & SCRV_G7R) { s.pc = g7; s.npc = s.pc + 4; trc(TRC_SYSCALL,"%%g7\n"); } else { s.cc &= ~CC_C; trc(TRC_SYSCALL,"pc/npc\n"); } if (rv.flags & SCRV_RVSET) s.regs[R_O0] = rv.rv; if (rv.flags & SCRV_RV2SET) s.regs[R_O1] = rv.rv2; } else { if ((rv.err == em_EINTR) && syscall_restartable) { trc(TRC_SYSCALL,"restartable syscall showing EINTR\n"); s.npc = s.pc; s.pc = s.xa; s.flags |= SF_SIGRESTART; alert_run = 1; return; } trc(TRC_SYSCALL,"error %lu (%s)\n",(ULI)rv.err,em_strerror(rv.err)); // Must match no-SA_RESTART code in deliver_signals() s.regs[R_O0] = rv.err; s.cc |= CC_C; } } /* * Implement a trap: either an unconditional trap or a taken * conditional trap. */ static void trap(uint32_t arg) { switch (arg) { case 0: // syscall dosyscall(s.regs[R_G1]); break; case 3: // flush windows window_flush(); break; case 0x30: // get %wim stuff #if NWINDOWS > 32 #error "Trap 0x30 assumes NWINDOWS is at most 32" #endif s.regs[R_O0] = ((uint32_t)1) << s.iwp; #if NWINDOWS == 32 s.regs[R_O1] = ~(uint32_t)0; #else s.regs[R_O1] = ~((~(uint32_t)0)<> (s.cc & 15)) & 1) trap(arg); } /* * Implement mulscc. The arguments are the DREG number, the SREG1 * number, and the value from I, SIMM13, and/or regs[SREG2]. */ static void mulscc(int dr, int sr1, uint32_t v) { uint32_t t; uint32_t t2; t2 = s.regs[sr1] & 1; t = s.regs[sr1] >> 1; switch (s.cc & (CC_N | CC_V)) { case 0: case CC_N | CC_V: break; case CC_N: case CC_V: t |= 0x80000000; break; } s.regs[dr] = addcc(t,(s.y&1)?v:0); s.y = (s.y >> 1) | (t2 << 31); } /* * Check that a register number is even (eg, for ldd/std). If not, * handle it as an unimplemented instruction. */ static void even_regno(int reg, uint32_t inst) { if (reg & 1) { unimp(s.xa,inst); top(); } } /* * Take a single, v, and pull it apart into an FPNUM. */ static FPNUM crack_single(uint32_t v) { int e; FPNUM n; n.raw = v; e = (v >> MANTBITS_S) & 0x1ff; n.sign = (v >> 31) & 1; n.bexp = e; n.exp = e - EXPBIAS_S; n.mant = (v & ((1ULL << MANTBITS_S) - 1)); switch (e) { case 0: n.kind = n.mant ? FPK_DENORM : FPK_ZERO; break; case MAXBEXP_S: n.kind = n.mant ? FPK_NaN : FPK_INFTY; if (0) { default: n.kind = FPK_NORMAL; } n.mant |= 1ULL << MANTBITS_S; break; } return(n); } /* * Take a double, v, and pull it apart into an FPNUM. */ static FPNUM crack_double(uint64_t v) { int e; FPNUM n; n.raw = v; e = (v >> MANTBITS_D) & 0x7ff; n.sign = (v >> 63) & 1; n.bexp = e; n.exp = e - EXPBIAS_D; n.mant = (v & ((1ULL << MANTBITS_D) - 1)); switch (e) { case 0: n.kind = n.mant ? FPK_DENORM : FPK_ZERO; break; case MAXBEXP_D: n.kind = n.mant ? FPK_NaN : FPK_INFTY; if (0) { default: n.kind = FPK_NORMAL; } n.mant |= 1ULL << MANTBITS_D; break; } return(n); } /* * Take a single represented as an FPNUM and collapse it back into a * 32-bit single. Both exp and bexp must be set correctly (this code * may use whichever it finds more convenient), but raw need not. * kind must be correct. */ static uint32_t merge_single(FPNUM n) { switch (n.kind) { default: abort(); break; case FPK_NORMAL: return( (n.sign ? 0x80000000 : 0) | (((uint32_t)n.bexp) << 23) | (n.mant & 0x007fffff) ); break; case FPK_ZERO: return(n.sign?0x80000000:0); break; case FPK_DENORM: return( (n.sign ? 0x80000000 : 0) | n.mant ); break; case FPK_INFTY: return(n.sign?0xff800000:0x7f800000); break; case FPK_NaN: return( (n.sign ? 0xff800000 : 0x7f800000) | (n.mant & 0x007fffff) ); break; } } /* * Take a double represented as an FPNUM and collapse it back into a * 64-bit double. Both exp and bexp must be set correctly (this code * may use whichever it finds more convenient), but raw need not. * kind must be correct. */ static uint64_t merge_double(FPNUM n) { switch (n.kind) { default: abort(); break; case FPK_NORMAL: return( (n.sign ? 0x8000000000000000ULL : 0) | (((uint64_t)n.bexp) << 52) | (n.mant & 0x000fffffffffffffULL) ); break; case FPK_ZERO: return(n.sign?0x8000000000000000ULL:0); break; case FPK_DENORM: return( (n.sign ? 0x8000000000000000ULL : 0) | n.mant ); break; case FPK_INFTY: return(n.sign?0xfff0000000000000ULL:0x7ff0000000000000ULL); break; case FPK_NaN: return( (n.sign ? 0xfff0000000000000ULL : 0x7ff0000000000000ULL) | (n.mant & 0x000fffffffffffffULL) ); break; } } /* * Dump out a single FPNUM, in a form appropriate for tracing. */ static void dump_single(FILE *to, FPNUM n, int rawvalid) { uint32_t hosti; float hostf; if (rawvalid) fprintf(to,"%08llx",(ULLI)n.raw); fprintf(to,"