/*
 * There is a problem here.  The sigset_t API defines no way to tell
 *  what the maximum signal/bit number supported by a sigset_t is.  At
 *  least one implementation doesn't check bit number arguments, just
 *  accessing outside of the sigset_t if the number is out of range, so
 *  we can't do something like sigfillset and then counting the set
 *  bits.
 *
 * We blindly assume that all implementations support at least 128
 *  bits, and that, for the os2em conversion, we can ignore bits above
 *  128.
 *
 * Goddammit.  5.2/amd64's implementation of nested functions is just
 *  straight-up broken.  Sometimes they don't even work; other times,
 *  they mostly-work but produce grossly broken debugging info.  So we
 *  don't use them unless we have to (eg, throw-out functions).  It's a
 *  pity, because they really do make for a significantly more
 *  expressive language.
 *
 * SPARC's floating point is IEEE 754.  This represents numbers as
 *
 *	(sign)(exponent)(mantissa)
 *
 *  where the sign is one bit, 0 for +ve and 1 for -ve; exponent is 8
 *  bits for single and 11 for double, and mantissa does not include
 *  the hidden bit and is the remainder of the bits: 23 for single and
 *  52 for double.  SPARC v8 also includes quad, a 128-bit format for
 *  which the exponent is 15 bits and the mantissa is 112, but what
 *  we're emulating doesn't include them.  The exponent is stored with
 *  a bias added to it: 127 for single and 1023 for double.  That is,
 *  for example, 1.0 is represented as 0x3f800000 for single or
 *  0x3ff0000000000000 for double.
 *
 * Special cases in floating point:
 *	* Biased exponent 0, mantissa 0: zero, +ve or -ve per sign
 *	* Biased exponent ~0, mantissa 0: infinity, +ve or -ve per sign
 *	* Biased exponent 0, mantissa !=0: denormalized
 *  There are also NaNs, but one reference says IEEE 754 does not
 *  specify what bit patterns are or are not NaNs(!).  I find that hard
 *  to believe.  In any case, the SPARC document specifies that NaNs
 *  are values with biased exponent ~0 and mantissa !=0, with the high
 *  bit of the mantissa field set for a quiet NaN and clear for
 *  a signaling NaN.
 *
 * A normalized number with biased exponent E and mantissa field M
 *  represents 2^(E-B)*1.M, where B is the exponent bias for the format
 *  in use.  But if the biased exponent is 0, the number is
 *  denormalized (zero can be thought of as a denormal), representing
 *  instead 2^(1-B)*0.M - that is, the hidden bit is 0 instead of 1 and
 *  the effective exponent is what it would be if the biased exponent
 *  were actually 1.
 */

#include <elf.h>
#include <poll.h>
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#include <stdarg.h>
#include <signal.h>
#include <stdlib.h>
#include <unistd.h>
#include <dirent.h>
#include <sys/un.h>
#include <termios.h>
#include <strings.h>
#include <sys/uio.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sys/resource.h>

/*
 * As of 1.4T, which is what we emulate, filesystem info comes from
 *  statfs(), for which we need to include <sys/mount.h>.  As of 4.0.1
 *  and 5.2, we need to call statvfs(), from <sys/statvfs.h>.  Neither
 *  has compatability with the other.  Hence this dance.  :-þ
 *
 * Similar remarks apply to getfsstat()/getvfsstat().
 */
#include <sys/param.h>
#if __NetBSD_Version__ >= 400000003 // 4.0.1 and later
#define STATFS_VIA_STATVFS
#define GETFSSTAT_VIA_GETVFSSTAT
#elif __NetBSD_Version__ <= 104200000 // 1.4T and earlier
#undef STATFS_VIA_STATVFS
#undef GETFSSTAT_VIA_GETVFSSTAT
#else
#error "Figure out whether STATFS_VIS_STATVFS and/or GETFSSTAT_VIA_GETVFSSTAT should be defined"
#endif

#ifdef STATFS_VIA_STATVFS
#include <sys/statvfs.h>
#else
#include <sys/mount.h>
#endif

#ifdef GETFSSTAT_VIA_GETVFSSTAT
#include <sys/statvfs.h>
#else
#include <sys/mount.h>
#endif

#include "user.h"
#include "types.h"
#include "tracemgr.h"
#include "realthing.h"
#include "stdio-util.h"

/*
 * The revs I use that don't have WNOREAP have WNOWAIT, with the same
 *  functionality.  It's not documented, but it's there, apparently for
 *  compatability with other OSes but usable natively.
 */
#if defined(WNOWAIT) && !defined(WNOREAP)
#define WNOREAP WNOWAIT
#endif

/*
 * I see no clean way to implement __getcwd as a wrapper around getcwd,
 *  and __getcwd has no declaration visible outside libc.  So, this.
 *
 * Ugh.
 */
extern int __getcwd(char *, size_t);

#define NWINDOWS 8

extern const char *__progname;

#include "sysent.h"
#include "em-const.h"

// PAGE_SIZE must be a power of two
#define PAGE_SIZE 4096
#define USRSTACK 0xf0000000
#define MAXSSIZE (1U<<24)
#define MAXDSIZE 0x04000000
#define MAXFDS 4096
#define STACKGAPLEN 400

// Second arg must be a power of two
#define ROUND_UP(a,b) (((a)+(b)-1) & ~((b)-1))
#define ROUND_DOWN(a,b) ((a) & ~((b)-1))

#define REP4(x) x, x, x, x
#define REP8(x) REP4(x), REP4(x)
#define REP16(x) REP8(x), REP8(x)
#define REP32(x) REP16(x), REP16(x)
#define REP64(x) REP32(x), REP32(x)
#define REP128(x) REP64(x), REP64(x)
#define REP256(x) REP128(x), REP128(x)
#define REP2b(x) REP4(x)
#define REP3b(x) REP8(x)
#define REP4b(x) REP16(x)
#define REP5b(x) REP32(x)
#define REP6b(x) REP64(x)
#define REP7b(x) REP128(x)
#define REP8b(x) REP256(x)

#define OPC(opc) (((opc)>>30)&3)	/* format select bits */
#define OP2(opc) (((opc)>>22)&7)	/* opcode bits, format 2 */
#define DREG(opc) (((opc)>>25)&31)	/* dest reg bits, formats 2 and 3 */
#define A(opc) (((opc)>>29)&1)		/* annul bit, format 2 */
#define COND(opc) (((opc)>>25)&15)	/* condition code bits, format 2 */
#define IMM22(opc) ((opc)&0x003fffff)	/* immediate data, format 2 */
#define DISP22(opc) signextend(IMM22(opc),22) /* displacement, format 2 */
#define OP3(opc) (((opc)>>19)&0x3f)	/* opcode bits, format 3 */
#define SREG1(opc) (((opc)>>14)&31)	/* source reg 1 bits, format 3 */
#define SREG2(opc) ((opc)&31)		/* source reg 2 bits, format 3 */
#define I(opc) (((opc)>>13)&1)		/* immediate bit, format 3 */
#define ASI(opc) (((opc)>>5)&0xff)	/* alternative space bits, format 3 */
#define SIMM13(opc) signextend((opc)&0x1fff,13) /* immediate data, format 3 */
#define OPF(opc) (((opc)>>5)&0x1ff)	/* FPU opcode bits, format 3 */

#define P_R 0x01
#define P_W 0x02
#define P_X 0x04

#define SYSCALL_IMPL(fn) void fn(SCARGS *args __attribute__((__unused__)), SCRV *rv __attribute__((__unused__)))
#define SYSCALL_SETERR(e) do { rv->err = (e); } while (0)
#define SYSCALL_ERR(e) do { SYSCALL_SETERR((e)); return; } while (0)
#define SYSCALL_SETRET(v) do { rv->rv = (v); rv->flags |= SCRV_RVSET; } while (0)
#define SYSCALL_RET(v) do { SYSCALL_SETRET((v)); return; } while (0)
#define SYSCALL_RET2(v1,v2) do { rv->rv = (v1); rv->rv2 = (v2); rv->flags |= SCRV_RVSET | SCRV_RV2SET; return; } while (0)

/*
 * The state of the initial-exec machinery:
 *
 * INITIAL means nothing has been done with it yet.
 * DELAY means it should be delayed to give the UI a crack at it.
 * WORKED means it worked.
 * FAILED means it failed.
 */
typedef enum {
	  IES_INITIAL = 1,
	  IES_DELAY,
	  IES_WORKED,
	  IES_FAILED,
	  } INIT_EXEC_STATE;

/*
 * Kinds of operation we may need to back out (or otherwise fixup)
 *  during a vfork.  See the comments on sc___vfork14 and struct
 *  vforkbackout for more.
 */
typedef enum {
	  VFB_OPEN,
	  VFB_CLOSE,
	  VFB_DUP2,
	  VFB_TRCMGR,
	  } VFBKIND;

/*
 * The stages of a vfork operation.  See the comment on sc___vfork14
 *  for more.
 */
typedef enum {
	  VFORK_NONE = 1,
	  VFORK_START,
	  VFORK_FAIL,
	  VFORK_SUCCESS,
	  } VFORKSTAGE;

/*
 * The possible kinds of floating-point number.
 *
 * FPK_NORMAL is most numbers.
 * FPK_ZERO is zero (+ve or -ve).
 * FPK_DENORM is denormals.
 * FPK_INFTY is infinity (+ve or -ve).
 * FPK_NaN is a NaN (quiet or signaling).
 */
typedef enum {
	  FPK_NORMAL = 1,
	  FPK_ZERO,
	  FPK_DENORM,
	  FPK_INFTY,
	  FPK_NaN,
	  } FPKIND;

typedef struct memseg MEMSEG;
typedef struct memsegops MEMSEGOPS;
typedef struct state STATE;
typedef struct fd FD;
typedef struct memseg_priv_malloc MEMSEG_PRIV_MALLOC;
typedef struct memseg_priv_mmap MEMSEG_PRIV_MMAP;
typedef struct memseg_priv_arena MEMSEG_PRIV_ARENA;
typedef struct malblock MALBLOCK;
typedef struct trc TRC;
typedef struct memacc MEMACC;
typedef struct mfblk MFBLK;
typedef struct scrv SCRV;
typedef struct sig SIG;
typedef struct nulterm_status NULTERM_STATUS;
typedef struct vforkbackout VFORKBACKOUT;
typedef struct emsigset EMSIGSET;
typedef struct statestack STATESTACK;
typedef struct elf_ctx ELF_CTX;
typedef struct psect_ops PSECT_OPS;
typedef struct memwatch MEMWATCH;
typedef struct sym SYM;
typedef struct stab STAB;
typedef struct symlist SYMLIST;
typedef struct regwin REGWIN;
typedef struct bpt BPT;
typedef struct bitval BITVAL;
typedef struct fpnum FPNUM;
typedef struct iov IOV;
typedef struct io_priv_rw IO_PRIV_RW;
typedef struct io_priv_rwv IO_PRIV_RWV;
typedef struct int128 INT128;
typedef struct vm VM;

/*
 * A VM space.
 */
struct vm {
  MEMSEG *m;
  uint32_t dbrk;
  } ;
#define INITVM() ((VM){.m=0,.dbrk=0})

/*
 * A 128-bit integer.  We use these occasionally when doing floating
 *  point operations.
 */
struct int128 {
  uint64_t h;
  uint64_t l;
  } ;

/*
 * Represents one segment of data for an emulated I/O operation.
 *  Inspired, in both function and name, by struct iovec, as used by,
 *  eg, readv/writev.
 */
struct iov {
  uint32_t base;
  uint32_t len;
  } ;

/*
 * I/O private for sc_{,p}{read,write}().
 */
struct io_priv_rw {
  FD *fd;
  IOV iov;
  uint64_t off;
  } ;

/*
 * I/O private for sc_{,p}{read,write}v().
 */
struct io_priv_rwv {
  FD *fd;
  uint64_t off;
  uint32_t niov;
  uint32_t (*iov)[2];
  } ;

/*
 * A floating-point number, broken apart into its pieces.
 *
 * This is used for both single and double floats; the difference is
 *  the sizes of the parts when (re)assembled.
 *
 * raw is the raw bitpattern (in the low 32 bits, for single); it is
 *  defined only sometimes (in general, for FPNUMs obtained by cracking
 *  binary representations).  sign is 0 for +ve or 1 for -ve.  bexp is
 *  the biased exponent, ie, the value in the IEEE bitpattern.  exp is
 *  bexp with the bias subtracted off.  mant is the mantissa; it has
 *  had the hidden bit (0 for denormals and zeros, 1 for everything
 *  else) restored, but is otherwise just the bitpattern from the IEEE
 *  value.  The mantissa is in the low 24 (single) or 53 (double) bits
 *  of mant.  kind is the general class of the number; see FPKIND,
 *  above.
 *
 * Note that an FPNUM does not inherently know whether it's holding a
 *  single or a double (though it's implicit in the difference between
 *  exp and bexp).  Code using these is expected to know a priori what
 *  precision number it's dealing with.
 */
struct fpnum {
  uint64_t raw;
  int sign;
  int bexp;
  int exp;
  uint64_t mant;
  FPKIND kind;
  } ;
// EXPBIAS_x is the bias value
// MAXBEXP_x is the bexp value for infinities and NaNs
// MANTBITS_x is the number of mantissa bits, not counting hidden bit
// SIGNAN_x() on the mant field of a NaN is true iff it's signaling.
#define EXPBIAS_S 127
#define MAXBEXP_S 255
#define EXPBIAS_D 1023
#define MAXBEXP_D 2047
#define MANTBITS_S 23
#define MANTBITS_D 52
#define SIGNAN_D(m) (! ((m) & (1ULL << (MANTBITS_D-1))))
#define SIGNAN_S(m) (! ((m) & (1ULL << (MANTBITS_S-1))))

/*
 * A bit with a name.
 */
struct bitval {
  const char *name;
  uint32_t bit;
  } ;

/*
 * A breakpoint.  This doesn't really need to be a struct, but it makes
 *  the code easier to extend to add additional stuff to breakpoints,
 *  like ignore counts or actions to take when hit.
 */
struct bpt {
  uint32_t addr;
  } ;

/*
 * A symbol from a symbol table (see STAB).
 */
struct sym {
  char *name;
  uint32_t val;
  } ;

/*
 * A symbol table, a collection of SYMs.
 *
 * In order to support (relatively) fast lookups, we make this a
 *  separate data structure, instead of just keeping SYMs in a linked
 *  list.  We use a sorted array, with searches using binary search.
 *  (But see also SYMLIST.)
 */
struct stab {
  SYM *syms;
  int nsyms;
  char *strs;
  int strslen;
  uint32_t textbeg;
  uint32_t textend;
  } ;
#define STAB_INIT_EMPTY { .syms = 0, .nsyms = 0, .strs = 0 }

/*
 * A linked list of SYMs.  This is used while reading symbols in; the
 *  resulting list is processed into a STAB for routine lookups.
 */
struct symlist {
  SYMLIST *link;
  SYM *sym;
  } ;

/*
 * A memory watchpoint.
 */
struct memwatch {
  MEMWATCH *link;
  uint32_t base;
  uint32_t len;
  uint32_t end;
  unsigned int flags;
#define MWF_TRIPPED 0x00000001
  } ;

/*
 * Common data for "read an ELF file" operations.  We want to do one of
 *  these while doing another (for PT_INTERP dynamic-linker loading
 *  when execing dynamically-linked executables), necessitating
 *  something at least a bit like this.
 */
struct elf_ctx {
  // Path of file being read
  const char *path;
  // (Underlying) OS file descriptor open onto path.
  int fd;
  // The Elf32_Ehdr of the file, as read off disk.
  Elf32_Ehdr eh;
  // The Elf32_Phdrs of the file, as read off disk, and their count.
  Elf32_Phdr *ph;
  int phn;
  // The Elf32_Shdrs of the file, as read off disk, and their count.
  Elf32_Shdr *sh;
  int shn;
  // PT_INTERP content, a zero-length string if none (yet).
  char interp[em_MAXPATHLEN+1];
  // The value of e_entry, internalized.
  uint32_t entry;
  // VA of the beginning of the text segment.
  uint32_t taddr;
  // VA of the beginning of the data segment.
  uint32_t daddr;
  // Offset to relocate all loaded sections by.
  uint32_t loadbase;
  // End of data section.
  uint32_t dend;
  // Value for AT_PHDR Aux32Info struct.
  uint32_t dli_pha;
  // Value for AT_PHENT Aux32Info struct.
  uint32_t dli_phes;
  // Value for AT_PHNUM Aux32Info struct.
  uint32_t dli_phn;
  // Value for AT_BASE Aux32Info struct.
  uint32_t dli_interp;
  // Value for AT_ENTRY Aux32Info struct.
  uint32_t dli_entry;
  // PT_PHDR value, sometimes copied to dli_pha.
  uint32_t phdr;
  } ;

/*
 * There are three program headers we care about.  When scanning
 *  different files' headers, we want different operations.  This
 *  collects them together.
 */
struct psect_ops {
  void (*pt_load)(ELF_CTX *, Elf32_Phdr *, void (*)(void));
  void (*pt_interp)(ELF_CTX *, Elf32_Phdr *, void (*)(void));
  void (*pt_phdr)(ELF_CTX *, Elf32_Phdr *, void (*)(void));
  } ;
#define PSECT_OPS_INIT(name) {\
	&psect_pt_load_##name,		\
	&psect_pt_interp_##name,	\
	&psect_pt_phdr_##name,	\
	}

/*
 * An emulated-OS sigset_t.  Indexed by emulated signal number - 1.
 */
struct emsigset {
  uint32_t bits[4];
  } ;

/*
 * When reading NUL-terminated strings out of emulated-machine memory,
 *  we need to take cleanup actions before returning.  This
 *  encapsualtes the state for one such string.  That is, there is
 *  normally one of these for each such string.
 */
struct nulterm_status {
  char *tofree;
  } ;

/*
 * One signal's worth of pseudo-kernel signal handling state.  This is
 *  basically a mirror of emulated struct sigaction.
 */
struct sig {
  uint32_t handler;
  EMSIGSET mask;
  uint32_t flags;
  } ;

/*
 * State needed to return from a syscall.  A syscall can return zero,
 *  one, or two values, or it can return an error.  If err is nonzero,
 *  it's returning an error; otherwise, rv and/or rv2 indicate the
 *  return values, if any (SCRV_RVSET/SCRV_RV2SET indicate which are
 *  meaningful).  SCRV_G2R and SCRV_G7R support the SYSCALL_G2RFLAG and
 *  SYSCALL_G7RFLAG bits userland can OR into the syscall number (they
 *  indicate that, in the no-error case, the syscall is to return to
 *  %g2 or %g7 rather than to %pc).  SCRV_BYPASS indicates that the
 *  syscall return code should bypass all the state fiddling that
 *  normally happens and just return to the emulator main loop.
 */
struct scrv {
  uint32_t err;
  unsigned int flags;
#define SCRV_G2R    0x00000001
#define SCRV_G7R    0x00000002
#define SCRV_RVSET  0x00000004
#define SCRV_RV2SET 0x00000008
#define SCRV_BYPASS 0x00000010
  uint32_t rv;
  uint32_t rv2;
  } ;

#if 0
struct mfblk {
  char *buf;
  int alloc;
  int len;
  char **strptr;
  int *allocptr;
  } ;
#endif

/*
 * A block of accesses to memory.  Memory accesses tend to occur in
 *  contiguous blocks; collapsing them has proved pragmatically useful.
 *  A MEMACC represents such a block of accesses.
 */
struct memacc {
  uint32_t a1;
  uint32_t a2;
  uint8_t *vp;
  int a;
  int n;
  char rw;
  } ;

/*
 * There is one of these per type of tracing.  flags is various flags:
 *
 *	TRCF_NO_GENERIC_UI
 *		This type of tracing should not use the generic UI
 *		code; it is managed by special-case code.
 *
 *  f is a FILE * for writing tracing of this type to, or nil if it's
 *  turned off.  inx is the index of this TRC in the trace[] array.
 *  dest is a human-readable string describing where output goes, for
 *  reporting.
 */
struct trc {
  const char * const name;
  unsigned int flags;
#define TRCF_NO_GENERIC_UI 0x00000001
  FILE *f;
  int inx;
  char *dest;
  char *pl;
  int pla;
  int pln;
  } ;

/*
 * Private data for a MEMSEG allocated with malloc, suitable for such
 *  things as non-shared anonymous mmap and grown data segments.
 */
struct memseg_priv_malloc {
  void *tofree;
  } ;

/*
 * Private data for a MEMSEG arising from an emulated mmap() call.  We
 *  refcount these, since splits can lead to multiple MEMSEGs referring
 *  to a single underlying mmap()ped area.
 */
struct memseg_priv_mmap {
  int refcnt;
  char *mapped;
  uint32_t size;
  uint32_t mapflags;
  } ;

/*
 * Private data for a malloc-arena MEMSEG.  There normally is only one
 *  such MEMSEG, but there can be multiple if something punches a hole
 *  in it.  But there's only one arena.
 *
 * refcnt is the reference count.
 * mem is the underlying memory.
 * free is the list of free space.
 * live is the AVL tree of live blocks.
 * old is the DLL of freed but not yet reclaimed blocks.
 *
 * See the comment on MALBLOCK for more.
 */
struct memseg_priv_arena {
#define ARENA_SIZE (1U<<28)
#define ARENA_STACK_GAP (1U<<24)
  MEMSEG *seg;
  MALBLOCK *free;
  MALBLOCK *live;
  MALBLOCK *old;
  } ;

/*
 * A block in the emulated malloc arena.
 *
 * Free blocks are kept in a doubly linked list.  Live blocks are kept
 *  in an AVL tree.  Freed but not yet reused blocks are kept in a
 *  doubly linked list; when we run out of unused space, the oldest
 *  half of this list is reclaimed, with adjacent blocks merged to the
 *  extent possible, to become the free list.
 *
 * In a DLL, l is the backward link and r is the forward link.  In an
 *  AVL tree, l/r/u are the left/right/up pointers and bal is the
 *  balance value, with negative indicating the left subtree is deeper.
 *  kind indicates which of these a given block is.
 *
 * For free blocks, base/size/end describe the block; rz1 and rz2 are
 *  meaningless.  For live blocks, base/size/end describe the emulated
 *  program's view of the block; the redzone before the block is
 *  [rz1..base) and the redzone after it is [end..rz2).  For old
 *  blocks, rz1/base/size/end/rz2 retain the values they had when it
 *  was live.
 */
struct malblock {
  char kind;
#define MBK_FREE 1
#define MBK_LIVE 2
#define MBK_OLD  3
  signed char bal;
  MALBLOCK *l;
  MALBLOCK *r;
  MALBLOCK *u;
  uint32_t rz1;
  uint32_t base;
  uint32_t size;
  uint32_t end;
  uint32_t rz2;
  } ;
#define REDZONE 64 // in bytes
#define ALLOC_GRAIN 8

/*
 * An emulated file descriptor.
 */
struct fd {
  int fd;
  unsigned int prot; // P_R and/or P_W
  unsigned int flags;
#define FDF_CLEX 0x00000001
  } ;

/*
 * A record of something done in a vfork child that we may need to back
 *  out, or otherwise fix up, to compensate for some "kernel" state
 *  being kept in userland and thus not auto-backed-out during vfork.
 */
struct vforkbackout {
  VFORKBACKOUT *link;
  VFBKIND kind;
  uint32_t emfd;
  FD fd;
  int level;
  } ;

/*
 * This is the method vector for a MEMSEG (qv).
 *
 * done is used to destroy whatever is backing the MEMSEG.  It is
 *  responsible for freeing any underlying resources, including the
 *  private data pointer when applicable.
 *
 * curtail shrinks a MEMSEG at the end (highest addresses), given the
 *  number of bytes to shrink it by.  That is, this lowers end and
 *  size, leaving base unchanged.
 *
 * behead shrinks a MEMSEG at the beginning (lowest addresses), given
 *  the number of bytes to shrink it by.  That is, this raises base and
 *  lowers size, leaving end unchanged.
 *
 * split punches a hole in the middle of a MEMSEG.  The second arg is
 *  the number of bytes to retain at the low end; the third, at the
 *  high end.  This assumes the old MEMSEG is modified and a new MEMSEG
 *  created; the new MEMSEG, which must not be linked into vm, is
 *  returned.
 *
 * postexec handles any cleanup after an exec().  Most MEMSEGs go away
 *  upon exec(), though some can be set up to stick around.  If this
 *  returns nonzero, the MEMSEG is preserved upon exec(); if zero, it
 *  is destroyed.
 *
 * merge is responsible for merging sufficiently similar MEMSEGs when
 *  they abut.  When (*merge)(a,b) is called, a and b will (a) have the
 *  same MEMSEGOPS, (b) have identical protection bits, and (c) a->end
 *  will equal b->base.  The merge function should either (i) do
 *  nothing and return zero or (ii) destroy b (including updating link
 *  fields and freeing it), merging it into the enlarged a, and return
 *  nonzero.
 *
 * check checks an attempted memory access.  It is passed the MEMSEG
 *  pointer, the address of the first byte accessed relative to the
 *  MEMSEG's base, the number of bytes, and the kind of access
 *  contemplated (P_R, P_W, or P_X).  It can assume that the access
 *  lies entirely within the MEMSEG and that the MEMSEG's protection
 *  has already been checked.
 *
 * desc prints a text description of the MEMSEG.  It is passed the
 *  MEMSEG pointer and a FILE * to print the text to.  It should not
 *  include a newline in its output.
 */
struct memsegops {
  const char *name;
  void (*done)(MEMSEG *);
  void (*curtail)(MEMSEG *, uint32_t);
  void (*behead)(MEMSEG *, uint32_t);
  MEMSEG *(*split)(MEMSEG *, uint32_t, uint32_t);
  int (*postexec)(MEMSEG *);
  int (*merge)(MEMSEG *, MEMSEG *);
  void (*check)(MEMSEG *, uint32_t, uint32_t, unsigned int);
  void (*desc)(MEMSEG *, FILE *);
  } ;
#define MEMSEGOPS_INIT(name) {\
	#name,							\
	&memseg_done_##name,					\
	&memseg_curtail_##name,					\
	&memseg_behead_##name,					\
	&memseg_split_##name,					\
	&memseg_postexec_##name,				\
	&memseg_merge_##name,					\
	&memseg_check_##name,					\
	&memseg_desc_##name,					\
	}

/*
 * A MEMSEG is a piece of emulated virtual space.  An important
 *  invariant is end = base + size.  Another is that MEMSEGs do not
 *  overlap - at most one MEMSEG maps any particular virtual address.
 *  A third is any given MEMSEG always maps an integral number of
 *  PAGE_SIZE pages; base, size, and end must all be multiples of
 *  PAGE_SIZE.
 *
 * MEMSEGs are implemented in an OO style; this struct contains common
 *  data and an ops vector, with any type-specific private data being
 *  behind the priv pointer.
 */
struct memseg {
  MEMSEG *link;
  uint32_t base;
  uint32_t size;
  uint32_t end;
  unsigned char prot; // zero or more of P_[RWX]
  uint8_t *data;
  void *priv;
  MEMSEGOPS *ops;
  } ;

/*
 * One window's worth of windowed registers.  This is the unit of stack
 *  spill and fill.
 */
struct regwin {
  uint32_t l[8];
  uint32_t i[8];
  } ;

/*
 * Emulated machine state.  This includes (by intent, at least)
 *  everything that needs to be kept separate between parent and child
 *  in a vfork(), but it gets used most heavily for the sort of thing
 *  that would normally be thought of as hardware state, mostly meaning
 *  machine registers.
 *
 * There is no particular reason we have to match the hardware's CWP
 *  and WIM semantics.  Our CWP works like the hardware's, but we don't
 *  actually keep a WIM.  This is because, the way we manage it, the
 *  conceptual WIM always has exactly one bit set, so we can save
 *  bother by just recording the bit number of that bit.
 *
 * For reference, here is what the hardware doc says.
 *
 *	The CWP is incremented by a RESTORE (or RETT) instruction and
 *	decremented by a SAVE instruction or a trap.  ...  Each window
 *	shares its ins and outs with the two adjacent windows.  The
 *	outs of the CWP+1 window are addressable as the ins of the
 *	current window, and the outs in the current window are the ins
 *	of the CWP-1 window.  ... CWP arithmetic is performed modulo
 *	NWINDOWS ...  If [a] SAVE, RESTORE, or RETT instruction would
 *	cause the CWP to point to an "invalid" register set, that is,
 *	one whose corresponding WIM bit equals 1 [], a window_overflow
 *	or window_underflow trap is caused.
 *
 * Four registers have architecture-fixed functionality.  %g0 is fixed
 *  at zero.  CALL writes its own address into %o7.  Traps write PC and
 *  nPC into %l1 and %l2 of the trap window.
 *
 * Traps, including window overflow and underflow traps, do the effect
 *  of a SAVE but without checking for window overflow.
 *
 * If a window overflow or underflow trap is taken, CWP is not changed
 *  by the trapping instruction, but is changed by the usual trap
 *  handling.  The ADD effect of save/restore also does not happen on
 *  window underflow/overflow.  But, after the spill/fill happens, the
 *  save/restore will normally be re-executed, and its effects will
 *  happen then.
 *
 * Of course, the stuff about traps doesn't matter to us, because we
 *  are strictly a userland emulator.  Anything that traps gets
 *  emulated.
 *
 * We do maintain a "we use the FPU" bit.  But we don't maintain the
 *  "FPU disabled" bit the hardware does; instead, we initialize the
 *  emulated FPU registers along with all the others in clean_regs()
 *  and just set SF_FPU when code touches them.  If you like, you can
 *  think of our "FP disabled" implementation as being partly in
 *  clean_regs() and partly in the SF_FPU settings.
 */
struct state {
  // The condition code bits.
  unsigned int cc;
#define CC_N 8
#define CC_Z 4
#define CC_V 2
#define CC_C 1
  // The FPU condition code bits.
  unsigned int fcc;
#define FCC_UN 3 // unordered
#define FCC_GT 2 // >
#define FCC_LT 1 // <
#define FCC_EQ 0 // =
  // Flags.
  // SF_ANNUL means "next instruction is annulled".
  // SF_FPU means "touched FPU" (eg, must save/restore state)
  // SF_SIGRESTART means "pending syscall restart on EINTR"
  // SF_EMU_MAGIC means "emulator-detect magic is enabled"
  unsigned int flags;
#define SF_ANNUL      0x00000001
#define SF_FPU        0x00000002
#define SF_SIGRESTART 0x00000004
#define SF_EMU_MAGIC  0x00000008
  // pc, npc, and y emulate the hardware registers of the same names.
  uint32_t pc;
  uint32_t npc;
  uint32_t y;
  // xa is the address of the instruction currently being executed.
  uint32_t xa;
  // The general-purpose hardware registers.
  uint32_t regs[32];
  REGWIN rw[NWINDOWS];
  unsigned int cwp;
  unsigned int iwp;
#define R_G0 0
#define R_G1 1
#define R_G2 2
#define R_G3 3
#define R_G4 4
#define R_G5 5
#define R_G6 6
#define R_G7 7
#define R_O0 8
#define R_O1 9
#define R_O2 10
#define R_O3 11
#define R_O4 12
#define R_O5 13
#define R_O6 14
#define R_O7 15
#define R_L0 16
#define R_L1 17
#define R_L2 18
#define R_L3 19
#define R_L4 20
#define R_L5 21
#define R_L6 22
#define R_L7 23
#define R_I0 24
#define R_I1 25
#define R_I2 26
#define R_I3 27
#define R_I4 28
#define R_I5 29
#define R_I6 30
#define R_I7 31
#define R_SP R_O6
#define R_FP R_I6
  // The FPU registers.
  uint32_t fregs[32];
  // Count of instructions executed.
  unsigned long long int instrs;
  // Signal mask (in the sigprocmask sense).
  // Indexed by emulated signal number.
  uint64_t sigmask;
  // Signal handling settings.
  // Indexed by emulated signal number.
  SIG sigh[em__NSIG];
  // True iff cannot interact with the user.
  int noninteractive;
  // sigpend[i] true iff signal i is awaiting delivery.
  // Indexed by emulated signal number.
  volatile sig_atomic_t sigpend[em__NSIG];
  // ignsigs is a mask of signals which are set ignored and thus _we_
  // ignore.  Indexed by emulated signal number.
  uint64_t ignsigs;
  // True iff we're currently executing on the signal stack.
  // (We don't currently implement signal stacks.)
  int onsigstack;
  // Is delivery on the signal stack enabled?
  int sigstack_enabled;
  // The signal stack base and size.
  uint32_t sigstack_base;
  uint32_t sigstack_size;
  // Last path successfully exec()ed.  For debugging.
  char *lastexec;
  } ;

/*
 * Saved machine states are kept in a stack during vfork()s.  The stack
 *  rarely gets very deep - there's only one saved state unless a
 *  vforked child itself vforks - but the generality is cheap.
 */
struct statestack {
  STATESTACK *link;
  STATE state;
  } ;

// Command-line args.
static const char *exe = 0;
static char **cl_args = 0;
static int cl_nargs = 0;
static char **cl_envp = 0;
static int cl_nenvp = 0;
// Live machine staet.
static STATE s;
/*
 * Breakpoint state variables.
 *
 * bpts points to the array of breakpoints, which are kept sorted by
 *  address (to speed checking whether a pc value is in the list).
 *
 * abpts is the number of BPTs bpts points to (the number malloc()ed).
 *
 * nbpts is the number of BPTs in bpts which are valid (always in the
 *  range [0..abpts]).
 *
 * bpt_suppress is a count of emulator cycles during which we should
 *  ignore all breakpoints.  This gets reset each time we enter the UI.
 *
 * We do breakpoints transparently.  Real debuggers typically replace
 *  instructions with trap instructions; we could do that, but if some
 *  code reads the instruction stream it could notice that.  Instead,
 *  we check the pc value against the breakpoint list every emulator
 *  cycle.  (If the time penalty of checking the whole list each cycle
 *  gets too large, we may need to use a smarter data structure to
 *  store breakpoints.)
 */
static BPT *bpts;
static int abpts;
static int nbpts;
static int bpt_suppress;
// The VM environment.
static VM vm;
// File descriptors.
static FD **fds;
static int nfds;
// The PID of this process.
static int mypid;
// Call this to throw out on error.
static void (*err_jmp)(void) = 0;
// Types of tracing.
static TRC trace[]
 = { { "instr" },
#define TRC_INSTR 0
     { "chg" },
#define TRC_CHG 1
     { "mem" },
#define TRC_MEM 2
     { "syscall" },
#define TRC_SYSCALL 3
     { "stack" },
#define TRC_STACK 4
     { "vfork" },
#define TRC_VFORK 5
     { "signal" },
#define TRC_SIGNAL 6
     { "exec" },
#define TRC_EXEC 7
     { "vm" },
#define TRC_VM 8
     { "window" },
#define TRC_WINDOW 9
     { "err" },
#define TRC_ERR 10
     { "fp" },
#define TRC_FP 11
     { "proc" },
#define TRC_PROC 12
     { "arena" },
#define TRC_ARENA 13
     { "io", TRCF_NO_GENERIC_UI },
#define TRC_IO 14
     { "magic" },
#define TRC_MAGIC 15
     { 0 } };
#define TRC__N 16
// Size of I/O data to be dumped.
static int io_trace_size;
// Elastic array holding memory accesses.
static MEMACC *memacc;
static int amemacc;
static int nmemacc;
/*
 * If this is set, memory accesses aren't recorded even when they
 *  normally would be (ie, when TRC_MEM tracing is on).  This is used
 *  when, for example, printing syscall arguments in the syscall entry
 *  code.
 */
int nomemacc;
// Active memory watchpoints.
static MEMWATCH *memwatches;
/*
 * Address of the signal-handling trampoline.  Conceptually, this
 *  should be part of STATE, but it's always at the same place, so
 *  there's no point.
 */
static uint32_t sigtramp;
// All signals we have values for.  Indexed by emulated signal number.
#define SIG_ALLMASK ((uint64_t)(\
	(1ULL << em_SIGHUP) |		\
	(1ULL << em_SIGINT) |		\
	(1ULL << em_SIGQUIT) |		\
	(1ULL << em_SIGILL) |		\
	(1ULL << em_SIGTRAP) |		\
	(1ULL << em_SIGABRT) |		\
	(1ULL << em_SIGEMT) |		\
	(1ULL << em_SIGFPE) |		\
	(1ULL << em_SIGKILL) |		\
	(1ULL << em_SIGBUS) |		\
	(1ULL << em_SIGSEGV) |		\
	(1ULL << em_SIGSYS) |		\
	(1ULL << em_SIGPIPE) |		\
	(1ULL << em_SIGALRM) |		\
	(1ULL << em_SIGTERM) |		\
	(1ULL << em_SIGURG) |		\
	(1ULL << em_SIGSTOP) |		\
	(1ULL << em_SIGTSTP) |		\
	(1ULL << em_SIGCONT) |		\
	(1ULL << em_SIGCHLD) |		\
	(1ULL << em_SIGTTIN) |		\
	(1ULL << em_SIGTTOU) |		\
	(1ULL << em_SIGIO) |		\
	(1ULL << em_SIGXCPU) |		\
	(1ULL << em_SIGXFSZ) |		\
	(1ULL << em_SIGVTALRM) |	\
	(1ULL << em_SIGPROF) |		\
	(1ULL << em_SIGWINCH) |		\
	(1ULL << em_SIGINFO) |		\
	(1ULL << em_SIGUSR1) |		\
	(1ULL << em_SIGUSR2) |		\
	(1ULL << em_SIGPWR) ))
// All signals userland can block.  Indexed by emulated signal number.
#define SIG_CANBLOCK (SIG_ALLMASK & ~(uint64_t)((1ULL << em_SIGKILL) | (1ULL << em_SIGSTOP)))
/*
 * Default signal actions, that is, the actions taken when the handler
 *  is set to SIG_DFL.  The SIGDEF_* values must all be nonzero, so
 *  that holes in sigdef[] can be detected by noticing zero values.
 *  There are four possible default actions: kill the process (eg,
 *  SIGTERM), kill the process with a coredump (eg, SIGABRT), ignore
 *  the signal (sg, SIGCONT), and stop the process (eg, SIGTTIN).
 *
 * Because _we_ can't catch SIGKILL and SIGSTOP, their entries here
 *  never matter; they're here for completeness more than correctness.
 *
 * Indexed by emulated signal number.
 */
#define SIGDEF_HOLE   0 // used to detect holes
#define SIGDEF_KILL   1
#define SIGDEF_CORE   2
#define SIGDEF_IGNORE 3
#define SIGDEF_STOP   4
static const unsigned int sigdef[]
 = { [em_SIGHUP] = SIGDEF_KILL,
     [em_SIGINT] = SIGDEF_KILL,
     [em_SIGQUIT] = SIGDEF_CORE,
     [em_SIGILL] = SIGDEF_CORE,
     [em_SIGTRAP] = SIGDEF_CORE,
     [em_SIGABRT] = SIGDEF_CORE,
     [em_SIGEMT] = SIGDEF_CORE,
     [em_SIGFPE] = SIGDEF_CORE,
     [em_SIGKILL] = SIGDEF_KILL, // never matters
     [em_SIGBUS] = SIGDEF_CORE,
     [em_SIGSEGV] = SIGDEF_CORE,
     [em_SIGSYS] = SIGDEF_CORE,
     [em_SIGPIPE] = SIGDEF_KILL,
     [em_SIGALRM] = SIGDEF_KILL,
     [em_SIGTERM] = SIGDEF_KILL,
     [em_SIGURG] = SIGDEF_IGNORE,
     [em_SIGSTOP] = SIGDEF_STOP, // never matters
     [em_SIGTSTP] = SIGDEF_STOP,
     [em_SIGCONT] = SIGDEF_IGNORE,
     [em_SIGCHLD] = SIGDEF_IGNORE,
     [em_SIGTTIN] = SIGDEF_STOP,
     [em_SIGTTOU] = SIGDEF_STOP,
     [em_SIGIO] = SIGDEF_IGNORE,
     [em_SIGXCPU] = SIGDEF_KILL,
     [em_SIGXFSZ] = SIGDEF_KILL,
     [em_SIGVTALRM] = SIGDEF_KILL,
     [em_SIGPROF] = SIGDEF_KILL,
     [em_SIGWINCH] = SIGDEF_IGNORE,
     [em_SIGINFO] = SIGDEF_IGNORE,
     [em_SIGUSR1] = SIGDEF_KILL,
     [em_SIGUSR2] = SIGDEF_KILL,
     [em_SIGPWR] = SIGDEF_IGNORE };
/*
 * Normally zero.  Immediately after a restartable syscall which
 *  returns EINTR, set true; signal delivery notices this and arranges
 *  to restart the syscall.
 */
static int syscall_restartable;
/*
 * When set nonzero, anysigpend indicates there is probably a signal
 *  pending delivery.  Basically, it means "it's worth checking".
 */
static volatile sig_atomic_t anysigpend;
/*
 * Indicates run() should check for rare events.  There are various
 *  things that happen rarely and asynchronously but that require run()
 *  to handle them.  Rather than have a bunch of variables that run()
 *  checks each time around its loop, we have this, which reduces the
 *  overhead on most loops to one check.
 */
static volatile sig_atomic_t alert_run;
/*
 * A bunch of 0x00 octets.  This is useful mostly for padding syscall
 *  output buffers; for internal things, we can just bzero(), but for
 *  userland, it's easier to copyout() from here than to write a
 *  bzeroout().
 */
static const char nulbuf[PAGE_SIZE] = { 0 };
// Indicates run should do just-post-execve() actions, like TRC_STACK.
static int postexec;
/*
 * When set, we do a debugger-assist loop after forking.  With vfork,
 *  this happens when exec breaks the vfork link (we do it right after
 *  vforkbreak()).
 */
static int forkwait;
/*
 * When set, panic() does a debugger-assist infinite loop.
 */
static int panicloop;
/*
 * State of the initial-exec machinery.  See the comment on
 *  INIT_EXEC_STATE, above.
 */
static INIT_EXEC_STATE initial_exec_state = IES_INITIAL;
/*
 * vfork() requires some careful attention to control flow, to ensure
 *  we don't return from the stack frame in which _we_ vfork until the
 *  vfork sharing-and-wait assocation is broken.  We also have to be
 *  careful to deal with the effects of our underlying OS's memory
 *  sharing on vfork - there are a number of things that a real OS
 *  keeps separate between parent and child, but which we keep in our
 *  VM and thus need to fix up after vfork().  Most of these have been
 *  moved into STATE and thus are dealt with the same way (emulated)
 *  machine registers are, by restoring pre-vfork state once the parent
 *  resumes, but some require special attention.
 *
 * vfork_stage simply says where we are in the control flow, which
 *  bounces around a bit more than we might wish (the underlying
 *  vfork() has to happen in run(), but that's something like three
 *  call frames above the point where we discover the emulated machine
 *  is vforking).  It is VFORK_NONE during ordinary execution.  When
 *  the emulator vforks, the syscall implementation sets it to
 *  VFORK_START.  run() then vforks in the emulator and sets
 *  vfork_stage to VFORK_FAIL (if the vfork failed) or VFORK_SUCCESS
 *  (if it worked) and arranges for sc___vfork14 to be re-entered.
 *  This last is not strictly necessary; run() could set up the
 *  emulator state directly - but doing it this way lets us leverage
 *  the existing syscall-return code, rather than having to duplicate
 *  it in run() or factor it out.
 *
 * vfork_states is used to restore machine state (and some emulated-OS
 *  state) in the parent, as sketched above.  It's a stack, not just a
 *  single saved state, to deal with the case where a vforked child
 *  vforks again while it's still borrowing the parent's resources.
 *
 * vfork_dropvm is used to deal with execve().  Normally, execve just
 *  drops the old VM, replacing it with the new.  But it's not that
 *  simple for execve in a vforked child, because that would leave us,
 *  in the parent, with the child's VM.  So when the implementation of
 *  emulated execve breaks the underlying-OS vfork association, it
 *  makes sure the parent's VM is in the global vm variable.  After
 *  vforkbreak()ing, it then drops the parent VM and switches to the
 *  new one - but the parent is then stuck with having both VM spaces
 *  set up.  The execve() code sticks the child's VM in vfork_dropvm
 *  before vforkbreak()ing; when the parent resumes post-vfork, it
 *  discards any VM it finds in vfork_dropvm.
 *
 * vfork_value is used to communicate the errno (if vfork_stage is
 *  VFORK_FAIL) or return PID (if vfork_stage is VFORK_SUCCESS) to
 *  sc___vfork14 (see the above discussion of vfork_stage).
 *
 * during_vfork is zero during normal operation; it is nonzero if the
 *  emulator is currently emulating a vforked child.  To handle vfork
 *  from a vfork child, it is a counter, not just a boolean,
 *  incremented when vfork() succeeds and decremented when a vfork
 *  child execs.
 *
 * vfb and vfbtm are lists of things done by a vforked child that need
 *  fixups in the parent.  For example, if a vforked child closes a
 *  file descriptor, in a real OS the file descriptor in the parent is
 *  unaffected.  For us, the underlying descriptor is unaffected, but
 *  we have state in VM as well (see fds, above), so we need to fix
 *  things up a bit.  vfb holds most of these records.  But, in order
 *  to get tracing back in order as soon as possible, we have to do the
 *  TRCMGR entries first.  Rather than scan the list twice, doing the
 *  TRCMGR entries the first time and the rest the second, we keep two
 *  lists, vfb for most entries and vfbtm for TRCMGR entries.
 */
static VFORKSTAGE vfork_stage;
static STATESTACK *vfork_states;
static VM vfork_dropvm;
static uint32_t vfork_value;
static int during_vfork;
static VFORKBACKOUT *vfb;
static VFORKBACKOUT *vfbtm;
/*
 * vm_changed is set to indicate that the VM mapping has changed and,
 *  if TRC_VM, should be reported.
 */
static int vm_changed;
/*
 * elf_stab holds the symbol table entries from the ELF file currently
 *  being executed, so that, for example, a disasebled call instruction
 *  can report the symbol, if any, corresponding to the target.  It
 *  doesn't work when executing out of a .so, but it's enough to help.
 */
static STAB elf_stab = STAB_INIT_EMPTY;

/*
 * Byteswap values from ELF files.  These are needed because loading
 *  values from ELF files is done with read() (or moral equivalent),
 *  not mem_get_*(), so byte-sex differences between the emulated SPARC
 *  and the emulator CPU, if any, become visible.  We are perhaps
 *  fortunate that the machine we're emulating uses network byte order,
 *  letting us (ab)use the ntoh*() macros; if we were emulating a
 *  little-endian machine, we'd need to write some kind of letoh*()
 *  operations.
 */
#define ELF_HALF_TO_NATIVE(x) ntohs((x))
#define ELF_WORD_TO_NATIVE(x) ntohl((x))
#define ELF_ADDR_TO_NATIVE(x) ntohl((x))
#define ELF_OFFSET_TO_NATIVE(x) ntohl((x))

/*
 * Names of machine registers.  The extra four at the end are mostly
 *  for the benefit of print_regs().  For the main 32 registers, this
 *  is indexed by the R_* values defined in the STATE struct, above;
 *  the FPU registers are indexed starting at PRINT_REGS_Fbase.  The
 *  extra values (y, etc) have PRINT_REGS_* definitions for their
 *  indices.  The other PRINT_REGS* values are for the convenience of
 *  print_regs() and related code.
 */
static const char * const regnames[]
 = { "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
     "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
     "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
     "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
     "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
     "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
     "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
     "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
     "y", "pc", "npc", "cc" };
#define PRINT_REGS_Fbase 32
#define PRINT_REGS_Y     64
#define PRINT_REGS_PC    65
#define PRINT_REGS_NPC   66
#define PRINT_REGS_CC    67
#define PRINT_REGS__N (sizeof(regnames)/sizeof(regnames[0]))

// Printed forms of integer condition code conditions.
static const char * const icc[]
  = { "n", "e", "le", "l", "leu", "lu/cs", "neg", "vs",
      "a", "ne", "g", "ge", "gu", "geu/cc", "pos", "vc" };

// Printed forms of FPU condition code conditions.
static const char * const fcc[]
 = { "n", "ne", "lg", "ul", "l", "ug", "g", "u",
     "a", "e", "ue", "ge", "uge", "le", "ule", "o" };

// Printed forms of coprocessor condition code conditions.
static const char * const ccc[]
 = { "n", "123", "12", "13", "1", "23", "2", "3",
     "a", "0", "03", "02", "023", "01", "013", "012" };

/*
 * The signal trampoline code.  This gets copied out to each executed
 *  process's stack; signal delivery sets %pc to point to the beginning
 *  of it.  The comments are from 1.4T's sys/arch/sparc/sparc/locore.s,
 *  where this comes from.
 */
static const uint32_t sigcode[] = {
0x9de3bf18,	//	save	%sp, -CCFSZ-136, %sp
0xa4100002,	//	mov	%g2, %l2
0xa6100003,	//	mov	%g3, %l3
0xa8100004,	//	mov	%g4, %l4
0xaa100005,	//	mov	%g5, %l5
0xac100006,	//	mov	%g6, %l6
0xae100007,	//	mov	%g7, %l7
0xe007a064,	//	ld	[%fp+64+16+SC_PSR_OFFSET], %l0
0x23000004,	//	sethi	%hi(PSR_EF), %l1
0xa08c0011,	//	andcc	%l0, %l1, %l0
0x02800013,	//	be	1f
0xa3400000,	//	 rd	%y, %l1
0xc12ba060,	//	st	%fsr, [%sp+CCFSZ+0]
0xc13ba068,	//	std	%f0, [%sp+CCFSZ+8]
0xc53ba070,	//	std	%f2, [%sp+CCFSZ+16]
0xc93ba078,	//	std	%f4, [%sp+CCFSZ+24]
0xcd3ba080,	//	std	%f6, [%sp+CCFSZ+32]
0xd13ba088,	//	std	%f8, [%sp+CCFSZ+40]
0xd53ba090,	//	std	%f10, [%sp+CCFSZ+48]
0xd93ba098,	//	std	%f12, [%sp+CCFSZ+56]
0xdd3ba0a0,	//	std	%f14, [%sp+CCFSZ+64]
0xe13ba0a8,	//	std	%f16, [%sp+CCFSZ+72]
0xe53ba0b0,	//	std	%f18, [%sp+CCFSZ+80]
0xe93ba0b8,	//	std	%f20, [%sp+CCFSZ+88]
0xed3ba0c0,	//	std	%f22, [%sp+CCFSZ+96]
0xf13ba0c8,	//	std	%f24, [%sp+CCFSZ+104]
0xf53ba0d0,	//	std	%f26, [%sp+CCFSZ+112]
0xf93ba0d8,	//	std	%f28, [%sp+CCFSZ+120]
0xfd3ba0e0,	//	std	%f30, [%sp+CCFSZ+128]
0xd01fa040,	// 1:	ldd	[%fp+64], %O0
0xd607a04c,	//	ld	[%fp+76], %o3
0x9fc04000,	//	call	%g1
0x9407a050,	//	 add	%fp, 64+16, %o2
0x80940000,	//	tst	%l0
0x02800013,	//	be	1f
0x81844000,	//	 wr	%l1, %g0, %y
0xc10ba060,	//	ld	[%sp+CCFSZ+0], %fsr
0xc11ba068,	//	ldd	[%sp+CCFSZ+8], %f0
0xc51ba070,	//	ldd	[%sp+CCFSZ+16], %f2
0xc91ba078,	//	ldd	[%sp+CCFSZ+24], %f4
0xcd1ba080,	//	ldd	[%sp+CCFSZ+32], %f6
0xd11ba088,	//	ldd	[%sp+CCFSZ+40], %f8
0xd51ba090,	//	ldd	[%sp+CCFSZ+48], %f10
0xd91ba098,	//	ldd	[%sp+CCFSZ+56], %f12
0xdd1ba0a0,	//	ldd	[%sp+CCFSZ+64], %f14
0xe11ba0a8,	//	ldd	[%sp+CCFSZ+72], %f16
0xe51ba0b0,	//	ldd	[%sp+CCFSZ+80], %f18
0xe91ba0b8,	//	ldd	[%sp+CCFSZ+88], %f20
0xed1ba0c0,	//	ldd	[%sp+CCFSZ+96], %f22
0xf11ba0c8,	//	ldd	[%sp+CCFSZ+104], %f24
0xf51ba0d0,	//	ldd	[%sp+CCFSZ+112], %f26
0xf91ba0d8,	//	ldd	[%sp+CCFSZ+120], %f28
0xfd1ba0e0,	//	ldd	[%sp+CCFSZ+128], %f30
0x84100012,	//	mov	%l2, %g3
0x86100013,	//	mov	%l3, %g3
0x88100014,	//	mov	%l4, %g4
0x8a100015,	//	mov	%l5, %g5
0x8c100016,	//	mov	%l6, %g6
0x8e100017,	//	mov	%l7, %g7
0x83e82127,	//	restore	%g0, SYS___sigreturn14, %g1
0x9003a050,	//	add	%sp, 64+16, %g0
0x91d02000,	//	t	ST_SYSCALL
0x82102001,	//	mov	SYS_EXIT, %g1
0x91d02000,	//	t	ST_SYSCALL
};
#define SZSIGCODE sizeof(sigcode)

/*
 * conds[] is here to automate condition-code testing.  It is indexed
 *  by the condition value from a branch instruction; the resulting
 *  value is, conceptually, an array of 16 bits indexed by the four-bit
 *  number formed by concatenating the condition-code bits.  The
 *  resulting bit says whether the condition passes or not.
 *
 * The CMASK_* definitions here assume that the treatment of the 16-bit
 *  value as an array of 16 bits maps the LSB to the [0] element, the
 *  MSB to the [15] element.
 */
/* The conds[] initialization assumes these */
#if (CC_N != 8) || (CC_Z != 4) || (CC_V != 2) || (CC_C != 1)
#error "conds[] assumptions invalid"
#endif
#define CMASK_FROM_CC(x) ((0xffff/((1<<(x))+1))^0xffff)
#define CMASK_N CMASK_FROM_CC(CC_N)
#define CMASK_Z CMASK_FROM_CC(CC_Z)
#define CMASK_V CMASK_FROM_CC(CC_V)
#define CMASK_C CMASK_FROM_CC(CC_C)
static const uint16_t conds[16]
 = { 0,						// never
     CMASK_Z,					// eq
     CMASK_Z | (CMASK_N ^ CMASK_V),		// le
     CMASK_N ^ CMASK_V,				// lt
     CMASK_C | CMASK_Z,				// leu
     CMASK_C,					// ltu, cs
     CMASK_N,					// neg
     CMASK_V,					// vs
     0xffff,					// always
     0xffff ^ CMASK_Z,				// ne
     0xffff ^ (CMASK_Z | (CMASK_N ^ CMASK_V)),	// gt
     0xffff ^ CMASK_N ^ CMASK_V,		// ge
     0xffff ^ (CMASK_C | CMASK_Z),		// gtu
     0xffff ^ CMASK_C,				// geu, cc
     0xffff ^ CMASK_N,				// pos
     0xffff ^ CMASK_V };			// vc
#undef CMASK_N
#undef CMASK_Z
#undef CMASK_V
#undef CMASK_C
#undef CMASK_FROM_CC

/*
 * fconds[] is just like conds[], except that it's for floating-point
 *  conditional branches rather than integer conditional branches.
 *  This means there are only two cc bits, not four, and thus each
 *  entry needs only four bits, not sixteen.
 */
/*
 * The fconds[] initialization assumes these.  Actually, strictly, it
 *  can deal with FCC_* being any permutation of the numbers 0,1,2,3,
 *  but that is difficult to express compactly in cpp.
 */
#if (FCC_EQ != 0) || (FCC_LT != 1) || (FCC_GT != 2) || (FCC_UN != 3)
#error "fconds[] assumptions invalid"
#endif
#define FCM_EQ (1<<FCC_EQ)
#define FCM_LT (1<<FCC_LT)
#define FCM_GT (1<<FCC_GT)
#define FCM_UN (1<<FCC_UN)
static const uint8_t fconds[16]
 = { 0,						// 0000 = never (unused)
	      FCM_LT | FCM_GT | FCM_UN,		// 0001 = ne
	      FCM_LT | FCM_GT         ,		// 0010 = lg
	      FCM_LT |          FCM_UN,		// 0011 = ul
	      FCM_LT                  ,		// 0100 = l
		       FCM_GT | FCM_UN,		// 0101 = ug
		       FCM_GT         ,		// 0110 = g
				FCM_UN,		// 0111 = u
     FCM_EQ | FCM_LT | FCM_GT | FCM_UN,		// 1000 = always (unused)
     FCM_EQ                           ,		// 1001 = e
     FCM_EQ |                   FCM_UN,		// 1010 = ue
     FCM_EQ |          FCM_GT         ,		// 1011 = ge
     FCM_EQ |          FCM_GT | FCM_UN,		// 1100 = uge
     FCM_EQ | FCM_LT                  ,		// 1101 = le
     FCM_EQ | FCM_LT |          FCM_UN,		// 1110 = ule
     FCM_EQ | FCM_LT | FCM_GT           };	// 1111 = o
#undef FCM_EQ
#undef FCM_LT
#undef FCM_GT
#undef FCM_UN

/*
 * List of sigaction(2) flag bits, for debugging output.
 */
static const BITVAL sigaction_flags[]
 = {
#define FOO(n) { .name=#n, .bit=em_##n }
     FOO(SA_ONSTACK),
     FOO(SA_RESTART),
     FOO(SA_RESETHAND),
     FOO(SA_NOCLDSTOP),
     FOO(SA_NODEFER),
     FOO(SA_NOCLDWAIT),
     FOO(SA_SIGINFO),
#undef FOO
     { 0 } };

// Versions of <ctype.h> tests we can pass plain char to.
#define Cisspace(x) isspace((unsigned char)(x))
#define Cisdigit(x) isdigit((unsigned char)(x))

/*
 * This is called upon the emulator bugchecking.  This actually getting
 *  called indicates there is a bug somewhere.
 *
 * We fflush multiple times because we use stdio wrapper streams and we
 *  have no reason to think fflush(0) will flush the inner streams
 *  after flushing the outer streams.
 */
void (panic)(const char *fn, int lno, const char *fmt, ...)
{
 va_list ap;

 fprintf(stderr,"%d: panic (\"%s\", line %d): ",mypid,fn,lno);
 va_start(ap,fmt);
 vfprintf(stderr,fmt,ap);
 va_end(ap);
 fprintf(stderr,"\n");
 fflush(0);
 fflush(0);
 fflush(0);
 if (panicloop)
  { volatile int v;
    fprintf(stderr,"%s: panic PID %d\n",__progname,(int)getpid());
    fflush(0);
    v = 1;
    while (v) poll(0,0,100);
  }
 signal(SIGSEGV,SIG_DFL);
 signal(SIGBUS,SIG_DFL);
 (void)*(volatile char *)0;
 abort();
 exit(1);
}

static void gdbloop(void)
{
 volatile int go;

 go = 0;
 while (! go) poll(0,0,100);
}

// Forward.  See the comment on the definition, below.
static void full_tracing(void) __attribute__((__used__));

/*
 * This is called to return to the top-level loop.  The test is to
 *  handle the case where it's called before the top-level loop is
 *  entered; I'm not sure this can happen, but it's a cheap check.
 */
static void top(void) __attribute__((__noreturn__));
static void top(void)
{
 if (! err_jmp) exit(1);
 (*err_jmp)();
 panic("err_jmp returned");
}

/*
 * Most tracing is generated with trc(TRC_*,...) calls.  But sometimes
 *  it's more convenient to write to a FILE * (as, for example, when
 *  calling a print-something function that takes a FILE * for the
 *  destination).  This returns the FILE * output for the given tracing
 *  kind should be sent to, or nil if that tracing is turned off.
 *
 * The returned FILE * should never be closed by the caller.
 */
static FILE *trc_f(int which)
{
 if ((which < 0) || (which >= TRC__N)) abort();
 return(trace[which].f);
}

/*
 * Usually, tracing should just call trc().  But, sometimes, tracing
 *  does something complicated or expensive enough that it should be
 *  skipped if tracing is off.  This performs that test.
 */
static int trc_if(int which)
{
 if ((which < 0) || (which >= TRC__N)) abort();
 return(!!trace[which].f);
}

/*
 * Generate trace output.  Conceptually, this is semantically
 *  equivalent to calling trc_f() and, if the returned value is
 *  non-nil, fprintf()ing to it - but this is more convenient in msot
 *  cases.
 */
static void trc(int, const char *, ...)
	__attribute__((__format__(__printf__,2,3)));
static void trc(int which, const char *fmt, ...)
{
 FILE *f;
 va_list ap;

 f = trc_f(which);
 if (f == 0) return;
 va_start(ap,fmt);
 vfprintf(f,fmt,ap);
 va_end(ap);
}

/*
 * Record a memory access.  This is not called unless TRC_MEM tracing
 *  is turned on.  This handles collapsing successive adjacent accesses
 *  into a single MEMACC - though it does so only when the later access
 *  is after the earlier; some memory accesses are done upwards instead
 *  of downwards specifically so that they will collapse nicely here.
 *  (Arguably we should arrange to handle accesses immediately below,
 *  as well as immediately above, existing MEMACCs, though the
 *  realloc() interface makes that a bit annoying.)
 */
static void mem_rw(char rw, uint32_t a, uint8_t v)
{
 MEMACC *m;

 if (nomemacc) return;
 if (nmemacc >= amemacc)
  { int i;
    i = amemacc;
    memacc = realloc(memacc,(amemacc=nmemacc+16)*sizeof(*memacc));
    for (;i<amemacc;i++)
     { m = &memacc[i];
       m->vp = malloc((m->a=8)*sizeof(uint8_t));
     }
  }
 if ( (nmemacc > 0) &&
      (a == (m=&memacc[nmemacc-1])->a2) &&
      (rw == m->rw) )
  { if (m->n >= m->a) m->vp = realloc(m->vp,(m->a=m->n+8)*sizeof(uint8_t));
    m->vp[m->n++] = v;
    m->a2 ++;
    return;
  }
 m = &memacc[nmemacc++];
 m->a1 = a;
 m->a2 = a + 1;
 m->n = 1;
 m->vp[0] = v;
 m->rw = rw;
}

/*
 * Find the MEMSEG that maps a given address.  This also centralizes
 *  alignment checks.
 *
 * If op is nil, this returns false for unmapped addresses; if not, it
 *  complains and throws out (and thus cannot return nil).
 *
 * XXX Arguably we should do better than linear search, even with the
 *  optimization that accesses will tend to stay nearby and thus moving
 *  the accessed MEMSEG to the head of the list will cut down on
 *  searches.  Maybe an array indexed by address/PAGE_SIZE?  But, so
 *  far, there are typically few enough MEMSEGs I'm not sure it's worth
 *  the bother.
 */
static MEMSEG *memseg_find(uint32_t addr, uint32_t align, const char *op)
{
 MEMSEG *ms;
 MEMSEG **msp;

 if (addr & align)
  { printf("%d: %s %08lx: not aligned\n",mypid,op,(ULI)addr);
    trc(TRC_ERR,"%s %08lx: not aligned\n",op,(ULI)addr);
    top();
  }
 msp = &vm.m;
 while ((ms = *msp))
  { if ((addr >= ms->base) && (addr < ms->end))
     { *msp = ms->link;
       ms->link = vm.m;
       vm.m = ms;
       return(ms);
     }
    else
     { msp = &ms->link;
     }
  }
 if (! op) return(0);
 printf("%d: %s %08lx: not mapped\n",mypid,op,(ULI)addr);
 trc(TRC_ERR,"%s %08lx: not mapped\n",op,(ULI)addr);
 top();
}

/*
 * Return a uint8_t pointer to the memory at emulated virtual address
 *  addr.  align is the alignment mask (0 for no alignment, 1 for
 *  2-byte, 3 for 4-byte, 7 for 8-byte, etc).  op is a text name for
 *  the operation, for error messages.  prot is the type of access
 *  contemplated.
 *
 * This is like memseg_find except that it does protection checks and
 *  the return value is a pointer to the memory rather than the
 *  relevant MEMSEG pointer.
 */
static uint8_t *mem_find(uint32_t addr, uint32_t align, const char *op, unsigned int prot)
{
 MEMSEG *ms;

 ms = memseg_find(addr,align,op);
 if (! (ms->prot & prot))
  { printf("%d: %s %08lx: not accessible\n",mypid,op,(ULI)addr);
    trc(TRC_ERR,"%s %08lx: not accessible\n",op,(ULI)addr);
    top();
  }
 (*ms->ops->check)(ms,addr-ms->base,align+1,prot);
 return(ms->data+(addr-ms->base));
}

/*
 * Set and get memory values.  Addresses must be aligned correctly for
 *  the data type in question, and these handle the way the emulated
 *  machine is big-endian regardless of the emulating CPU's endianness.
 */

// Get an 8-byte data value.
uint64_t mem_get_8(uint32_t addr)
{
 uint8_t *p;

 p = mem_find(addr,7,"get_8",P_R);
 if (trc_if(TRC_MEM))
  { mem_rw('r',addr,p[0]);
    mem_rw('r',addr+1,p[1]);
    mem_rw('r',addr+2,p[2]);
    mem_rw('r',addr+3,p[3]);
    mem_rw('r',addr+4,p[4]);
    mem_rw('r',addr+5,p[5]);
    mem_rw('r',addr+6,p[6]);
    mem_rw('r',addr+7,p[7]);
  }
 return( (p[0] * 0x0100000000000000ULL) |
	 (p[1] * 0x0001000000000000ULL) |
	 (p[2] * 0x0000010000000000ULL) |
	 (p[3] * 0x0000000100000000ULL) |
	 (p[4] * 0x0000000001000000ULL) |
	 (p[5] * 0x0000000000010000ULL) |
	 (p[6] * 0x0000000000000100ULL) |
	 p[7] );
}

// Get a 4-byte data value.
uint32_t mem_get_4(uint32_t addr)
{
 uint8_t *p;

 p = mem_find(addr,3,"get_4",P_R);
 if (trc_if(TRC_MEM))
  { mem_rw('r',addr,p[0]);
    mem_rw('r',addr+1,p[1]);
    mem_rw('r',addr+2,p[2]);
    mem_rw('r',addr+3,p[3]);
  }
 return((p[0]*0x01000000)|(p[1]*0x00010000)|(p[2]*0x00000100)|p[3]);
}

// Get a 4-byte code value.
// Just like mem_get_4 except that it's P_X instead of P_R.
uint32_t mem_exe_4(uint32_t addr)
{
 uint8_t *p;

 p = mem_find(addr,3,"exe_4",P_X);
 if (trc_if(TRC_MEM))
  { mem_rw('x',addr,p[0]);
    mem_rw('x',addr+1,p[1]);
    mem_rw('x',addr+2,p[2]);
    mem_rw('x',addr+3,p[3]);
  }
 return((p[0]*0x01000000)|(p[1]*0x00010000)|(p[2]*0x00000100)|p[3]);
}

// Set a 4-byte value.
void mem_set_4(uint32_t addr, uint32_t v)
{
 uint8_t *p;

 p = mem_find(addr,3,"set_4",P_W);
 p[0] = v >> 24;
 p[1] = v >> 16;
 p[2] = v >> 8;
 p[3] = v;
 if (trc_if(TRC_MEM))
  { mem_rw('w',addr,p[0]);
    mem_rw('w',addr+1,p[1]);
    mem_rw('w',addr+2,p[2]);
    mem_rw('w',addr+3,p[3]);
  }
}

// Set an 8-byte value.
void mem_set_8(uint32_t addr, uint64_t v)
{
 uint8_t *p;

 p = mem_find(addr,7,"set_8",P_W);
 p[0] = v >> 56;
 p[1] = v >> 48;
 p[2] = v >> 40;
 p[3] = v >> 32;
 p[4] = v >> 24;
 p[5] = v >> 16;
 p[6] = v >> 8;
 p[7] = v;
 if (trc_if(TRC_MEM))
  { mem_rw('w',addr,p[0]);
    mem_rw('w',addr+1,p[1]);
    mem_rw('w',addr+2,p[2]);
    mem_rw('w',addr+3,p[3]);
    mem_rw('w',addr+4,p[4]);
    mem_rw('w',addr+5,p[5]);
    mem_rw('w',addr+6,p[6]);
    mem_rw('w',addr+7,p[7]);
  }
}

// Get a 2-byte data value.
uint16_t mem_get_2(uint32_t addr)
{
 uint8_t *p;

 p = mem_find(addr,1,"get_2",P_R);
 if (trc_if(TRC_MEM))
  { mem_rw('r',addr,p[0]);
    mem_rw('r',addr+1,p[1]);
  }
 return((p[0]*0x0100)|p[1]);
}

// Set a 2-byte data value.
void mem_set_2(uint32_t addr, uint16_t v)
{
 uint8_t *p;

 p = mem_find(addr,1,"set_2",P_W);
 p[0] = v >> 8;
 p[1] = v;
 if (trc_if(TRC_MEM))
  { mem_rw('w',addr,p[0]);
    mem_rw('w',addr+1,p[1]);
  }
}

// Get a 1-byte data value.
uint8_t mem_get_1(uint32_t addr)
{
 uint8_t *p;

 p = mem_find(addr,0,"get_1",P_R);
 if (trc_if(TRC_MEM)) mem_rw('r',addr,*p);
 return(*p);
}

// Set a 1-byte data value.
void mem_set_1(uint32_t addr, uint8_t v)
{
 uint8_t *p;

 p = mem_find(addr,0,"set_1",P_W);
 if (trc_if(TRC_MEM)) mem_rw('w',addr,v);
 *p = v;
}

/*
 * Convert an underlying OS errno to an emulated-OS errno.
 *
 * We do not assume the underlying OS supports all the errnos the
 *  emulator does.
 */
static uint32_t os2em_errno(int err)
{
 switch (errno)
  {
#ifdef EPERM
    case EPERM:           return(em_EPERM);           break;
#endif
#ifdef ENOENT
    case ENOENT:          return(em_ENOENT);          break;
#endif
#ifdef ESRCH
    case ESRCH:           return(em_ESRCH);           break;
#endif
#ifdef EINTR
    case EINTR:           return(em_EINTR);           break;
#endif
#ifdef EIO
    case EIO:             return(em_EIO);             break;
#endif
#ifdef ENXIO
    case ENXIO:           return(em_ENXIO);           break;
#endif
#ifdef E2BIG
    case E2BIG:           return(em_E2BIG);           break;
#endif
#ifdef ENOEXEC
    case ENOEXEC:         return(em_ENOEXEC);         break;
#endif
#ifdef EBADF
    case EBADF:           return(em_EBADF);           break;
#endif
#ifdef ECHILD
    case ECHILD:          return(em_ECHILD);          break;
#endif
#ifdef EDEADLK
    case EDEADLK:         return(em_EDEADLK);         break;
#endif
#ifdef ENOMEM
    case ENOMEM:          return(em_ENOMEM);          break;
#endif
#ifdef EACCES
    case EACCES:          return(em_EACCES);          break;
#endif
#ifdef EFAULT
    case EFAULT:          return(em_EFAULT);          break;
#endif
#ifdef ENOTBLK
    case ENOTBLK:         return(em_ENOTBLK);         break;
#endif
#ifdef EBUSY
    case EBUSY:           return(em_EBUSY);           break;
#endif
#ifdef EEXIST
    case EEXIST:          return(em_EEXIST);          break;
#endif
#ifdef EXDEV
    case EXDEV:           return(em_EXDEV);           break;
#endif
#ifdef ENODEV
    case ENODEV:          return(em_ENODEV);          break;
#endif
#ifdef ENOTDIR
    case ENOTDIR:         return(em_ENOTDIR);         break;
#endif
#ifdef EISDIR
    case EISDIR:          return(em_EISDIR);          break;
#endif
#ifdef EINVAL
    case EINVAL:          return(em_EINVAL);          break;
#endif
#ifdef ENFILE
    case ENFILE:          return(em_ENFILE);          break;
#endif
#ifdef EMFILE
    case EMFILE:          return(em_EMFILE);          break;
#endif
#ifdef ENOTTY
    case ENOTTY:          return(em_ENOTTY);          break;
#endif
#ifdef ETXTBSY
    case ETXTBSY:         return(em_ETXTBSY);         break;
#endif
#ifdef EFBIG
    case EFBIG:           return(em_EFBIG);           break;
#endif
#ifdef ENOSPC
    case ENOSPC:          return(em_ENOSPC);          break;
#endif
#ifdef ESPIPE
    case ESPIPE:          return(em_ESPIPE);          break;
#endif
#ifdef EROFS
    case EROFS:           return(em_EROFS);           break;
#endif
#ifdef EMLINK
    case EMLINK:          return(em_EMLINK);          break;
#endif
#ifdef EPIPE
    case EPIPE:           return(em_EPIPE);           break;
#endif
#ifdef EDOM
    case EDOM:            return(em_EDOM);            break;
#endif
#ifdef ERANGE
    case ERANGE:          return(em_ERANGE);          break;
#endif
#ifdef EAGAIN
    case EAGAIN:          return(em_EAGAIN);          break;
#endif
#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EAGAIN != EWOULDBLOCK))
    case EWOUDBLOCK:      return(em_EWOULDBLOCK);     break;
#endif
#ifdef EINPROGRESS
    case EINPROGRESS:     return(em_EINPROGRESS);     break;
#endif
#ifdef EALREADY
    case EALREADY:        return(em_EALREADY);        break;
#endif
#ifdef ENOTSOCK
    case ENOTSOCK:        return(em_ENOTSOCK);        break;
#endif
#ifdef EDESTADDRREQ
    case EDESTADDRREQ:    return(em_EDESTADDRREQ);    break;
#endif
#ifdef EMSGSIZE
    case EMSGSIZE:        return(em_EMSGSIZE);        break;
#endif
#ifdef EPROTOTYPE
    case EPROTOTYPE:      return(em_EPROTOTYPE);      break;
#endif
#ifdef ENOPROTOOPT
    case ENOPROTOOPT:     return(em_ENOPROTOOPT);     break;
#endif
#ifdef EPROTONOSUPPORT
    case EPROTONOSUPPORT: return(em_EPROTONOSUPPORT); break;
#endif
#ifdef ESOCKTNOSUPPORT
    case ESOCKTNOSUPPORT: return(em_ESOCKTNOSUPPORT); break;
#endif
#ifdef EOPNOTSUPP
    case EOPNOTSUPP:      return(em_EOPNOTSUPP);      break;
#endif
#ifdef EPFNOSUPPORT
    case EPFNOSUPPORT:    return(em_EPFNOSUPPORT);    break;
#endif
#ifdef EAFNOSUPPORT
    case EAFNOSUPPORT:    return(em_EAFNOSUPPORT);    break;
#endif
#ifdef EADDRINUSE
    case EADDRINUSE:      return(em_EADDRINUSE);      break;
#endif
#ifdef EADDRNOTAVAIL
    case EADDRNOTAVAIL:   return(em_EADDRNOTAVAIL);   break;
#endif
#ifdef ENETDOWN
    case ENETDOWN:        return(em_ENETDOWN);        break;
#endif
#ifdef ENETUNREACH
    case ENETUNREACH:     return(em_ENETUNREACH);     break;
#endif
#ifdef ENETRESET
    case ENETRESET:       return(em_ENETRESET);       break;
#endif
#ifdef ECONNABORTED
    case ECONNABORTED:    return(em_ECONNABORTED);    break;
#endif
#ifdef ECONNRESET
    case ECONNRESET:      return(em_ECONNRESET);      break;
#endif
#ifdef ENOBUFS
    case ENOBUFS:         return(em_ENOBUFS);         break;
#endif
#ifdef EISCONN
    case EISCONN:         return(em_EISCONN);         break;
#endif
#ifdef ENOTCONN
    case ENOTCONN:        return(em_ENOTCONN);        break;
#endif
#ifdef ESHUTDOWN
    case ESHUTDOWN:       return(em_ESHUTDOWN);       break;
#endif
#ifdef ETOOMANYREFS
    case ETOOMANYREFS:    return(em_ETOOMANYREFS);    break;
#endif
#ifdef ETIMEDOUT
    case ETIMEDOUT:       return(em_ETIMEDOUT);       break;
#endif
#ifdef ECONNREFUSED
    case ECONNREFUSED:    return(em_ECONNREFUSED);    break;
#endif
#ifdef ELOOP
    case ELOOP:           return(em_ELOOP);           break;
#endif
#ifdef ENAMETOOLONG
    case ENAMETOOLONG:    return(em_ENAMETOOLONG);    break;
#endif
#ifdef EHOSTDOWN
    case EHOSTDOWN:       return(em_EHOSTDOWN);       break;
#endif
#ifdef EHOSTUNREACH
    case EHOSTUNREACH:    return(em_EHOSTUNREACH);    break;
#endif
#ifdef ENOTEMPTY
    case ENOTEMPTY:       return(em_ENOTEMPTY);       break;
#endif
#ifdef EPROCLIM
    case EPROCLIM:        return(em_EPROCLIM);        break;
#endif
#ifdef EUSERS
    case EUSERS:          return(em_EUSERS);          break;
#endif
#ifdef EDQUOT
    case EDQUOT:          return(em_EDQUOT);          break;
#endif
#ifdef ESTALE
    case ESTALE:          return(em_ESTALE);          break;
#endif
#ifdef EREMOTE
    case EREMOTE:         return(em_EREMOTE);         break;
#endif
#ifdef EBADRPC
    case EBADRPC:         return(em_EBADRPC);         break;
#endif
#ifdef ERPCMISMATCH
    case ERPCMISMATCH:    return(em_ERPCMISMATCH);    break;
#endif
#ifdef EPROGUNAVAIL
    case EPROGUNAVAIL:    return(em_EPROGUNAVAIL);    break;
#endif
#ifdef EPROGMISMATCH
    case EPROGMISMATCH:   return(em_EPROGMISMATCH);   break;
#endif
#ifdef EPROCUNAVAIL
    case EPROCUNAVAIL:    return(em_EPROCUNAVAIL);    break;
#endif
#ifdef ENOLCK
    case ENOLCK:          return(em_ENOLCK);          break;
#endif
#ifdef ENOSYS
    case ENOSYS:          return(em_ENOSYS);          break;
#endif
#ifdef EFTYPE
    case EFTYPE:          return(em_EFTYPE);          break;
#endif
#ifdef EAUTH
    case EAUTH:           return(em_EAUTH);           break;
#endif
#ifdef ENEEDAUTH
    case ENEEDAUTH:       return(em_ENEEDAUTH);       break;
#endif
#ifdef EIDRM
    case EIDRM:           return(em_EIDRM);           break;
#endif
#ifdef ENOMSG
    case ENOMSG:          return(em_ENOMSG);          break;
#endif
#ifdef EOVERFLOW
    case EOVERFLOW:       return(em_EOVERFLOW);       break;
#endif
#ifdef ENOTPLAIN
    case ENOTPLAIN:       return(em_ENOTPLAIN);       break;
#endif
  }
 printf("Unmappable errno %d\n",err);
 top();
}

/*
 * Convert an emulated-OS signal number to an underlying-OS signal
 *  number.
 */
static int em2os_signal(uint32_t sig)
{
 switch (sig)
  { case em_SIGHUP:    return(SIGHUP);    break;
    case em_SIGINT:    return(SIGINT);    break;
    case em_SIGQUIT:   return(SIGQUIT);   break;
    case em_SIGILL:    return(SIGILL);    break;
    case em_SIGTRAP:   return(SIGTRAP);   break;
    case em_SIGABRT:   return(SIGABRT);   break;
    case em_SIGEMT:    return(SIGEMT);    break;
    case em_SIGFPE:    return(SIGFPE);    break;
    case em_SIGKILL:   return(SIGKILL);   break;
    case em_SIGBUS:    return(SIGBUS);    break;
    case em_SIGSEGV:   return(SIGSEGV);   break;
    case em_SIGSYS:    return(SIGSYS);    break;
    case em_SIGPIPE:   return(SIGPIPE);   break;
    case em_SIGALRM:   return(SIGALRM);   break;
    case em_SIGTERM:   return(SIGTERM);   break;
    case em_SIGURG:    return(SIGURG);    break;
    case em_SIGSTOP:   return(SIGSTOP);   break;
    case em_SIGTSTP:   return(SIGTSTP);   break;
    case em_SIGCONT:   return(SIGCONT);   break;
    case em_SIGCHLD:   return(SIGCHLD);   break;
    case em_SIGTTIN:   return(SIGTTIN);   break;
    case em_SIGTTOU:   return(SIGTTOU);   break;
    case em_SIGIO:     return(SIGIO);     break;
    case em_SIGXCPU:   return(SIGXCPU);   break;
    case em_SIGXFSZ:   return(SIGXFSZ);   break;
    case em_SIGVTALRM: return(SIGVTALRM); break;
    case em_SIGPROF:   return(SIGPROF);   break;
    case em_SIGWINCH:  return(SIGWINCH);  break;
    case em_SIGINFO:   return(SIGINFO);   break;
    case em_SIGUSR1:   return(SIGUSR1);   break;
    case em_SIGUSR2:   return(SIGUSR2);   break;
    case em_SIGPWR:    return(SIGPWR);    break;
  }
 return(0);
}

/*
 * Convert an underlying-OS signal number to an emulated-OS signal
 *  number.
 */
static int os2em_signal(uint32_t sig)
{
 switch (sig)
  { case SIGHUP:    return(em_SIGHUP);    break;
    case SIGINT:    return(em_SIGINT);    break;
    case SIGQUIT:   return(em_SIGQUIT);   break;
    case SIGILL:    return(em_SIGILL);    break;
    case SIGTRAP:   return(em_SIGTRAP);   break;
    case SIGABRT:   return(em_SIGABRT);   break;
    case SIGEMT:    return(em_SIGEMT);    break;
    case SIGFPE:    return(em_SIGFPE);    break;
    case SIGKILL:   return(em_SIGKILL);   break;
    case SIGBUS:    return(em_SIGBUS);    break;
    case SIGSEGV:   return(em_SIGSEGV);   break;
    case SIGSYS:    return(em_SIGSYS);    break;
    case SIGPIPE:   return(em_SIGPIPE);   break;
    case SIGALRM:   return(em_SIGALRM);   break;
    case SIGTERM:   return(em_SIGTERM);   break;
    case SIGURG:    return(em_SIGURG);    break;
    case SIGSTOP:   return(em_SIGSTOP);   break;
    case SIGTSTP:   return(em_SIGTSTP);   break;
    case SIGCONT:   return(em_SIGCONT);   break;
    case SIGCHLD:   return(em_SIGCHLD);   break;
    case SIGTTIN:   return(em_SIGTTIN);   break;
    case SIGTTOU:   return(em_SIGTTOU);   break;
    case SIGIO:     return(em_SIGIO);     break;
    case SIGXCPU:   return(em_SIGXCPU);   break;
    case SIGXFSZ:   return(em_SIGXFSZ);   break;
    case SIGVTALRM: return(em_SIGVTALRM); break;
    case SIGPROF:   return(em_SIGPROF);   break;
    case SIGWINCH:  return(em_SIGWINCH);  break;
    case SIGINFO:   return(em_SIGINFO);   break;
    case SIGUSR1:   return(em_SIGUSR1);   break;
    case SIGUSR2:   return(em_SIGUSR2);   break;
    case SIGPWR:    return(em_SIGPWR);    break;
  }
 return(0);
}

/*
 * Convert an emulated-OS struct termios to an underlying-OS struct
 *  termios.
 */
static void em2os_termios(uint32_t em, struct termios *os)
{
 uint32_t v;
 int i;

 v = mem_get_4(em);
 os->c_iflag =	((v & em_IGNBRK) ? IGNBRK : 0) |
		((v & em_BRKINT) ? BRKINT : 0) |
		((v & em_IGNPAR) ? IGNPAR : 0) |
		((v & em_PARMRK) ? PARMRK : 0) |
		((v & em_INPCK) ? INPCK : 0) |
		((v & em_ISTRIP) ? ISTRIP : 0) |
		((v & em_INLCR) ? INLCR : 0) |
		((v & em_IGNCR) ? IGNCR : 0) |
		((v & em_ICRNL) ? ICRNL : 0) |
		((v & em_IXON) ? IXON : 0) |
		((v & em_IXOFF) ? IXOFF : 0) |
		((v & em_IXANY) ? IXANY : 0) |
		((v & em_IMAXBEL) ? IMAXBEL : 0);
 v = mem_get_4(em+4);
 os->c_oflag =	((v & em_OPOST) ? OPOST : 0) |
		((v & em_ONLCR) ? ONLCR : 0) |
		((v & em_OXTABS) ? OXTABS : 0) |
		((v & em_ONOEOT) ? ONOEOT : 0) |
		((v & em_OCRNL) ? OCRNL : 0) |
		((v & em_ONOCR) ? ONOCR : 0) |
		((v & em_ONLRET) ? ONLRET : 0);
 v = mem_get_4(em+8);
 os->c_cflag =	((v & em_CIGNORE) ? CIGNORE : 0) |
		(((v & em_CSIZE) == em_CS5) ? CS5 : 0) |
		(((v & em_CSIZE) == em_CS6) ? CS6 : 0) |
		(((v & em_CSIZE) == em_CS7) ? CS7 : 0) |
		(((v & em_CSIZE) == em_CS8) ? CS8 : 0) |
		((v & em_CSTOPB) ? CSTOPB : 0) |
		((v & em_CREAD) ? CREAD : 0) |
		((v & em_PARENB) ? PARENB : 0) |
		((v & em_PARODD) ? PARODD : 0) |
		((v & em_HUPCL) ? HUPCL : 0) |
		((v & em_CLOCAL) ? CLOCAL : 0) |
		((v & em_CRTSCTS) ? CRTSCTS : 0) |
		((v & em_CDTRCTS) ? CDTRCTS : 0) |
		((v & em_MDMBUF) ? MDMBUF : 0);
 v = mem_get_4(em+12);
 os->c_lflag =	((v & em_ECHOKE) ? ECHOKE : 0) |
		((v & em_ECHOE) ? ECHOE : 0) |
		((v & em_ECHOK) ? ECHOK : 0) |
		((v & em_ECHO) ? ECHO : 0) |
		((v & em_ECHONL) ? ECHONL : 0) |
		((v & em_ECHOPRT) ? ECHOPRT : 0) |
		((v & em_ECHOCTL) ? ECHOCTL : 0) |
		((v & em_ISIG) ? ISIG : 0) |
		((v & em_ICANON) ? ICANON : 0) |
		((v & em_ALTWERASE) ? ALTWERASE : 0) |
		((v & em_IEXTEN) ? IEXTEN : 0) |
		((v & em_EXTPROC) ? EXTPROC : 0) |
		((v & em_TOSTOP) ? TOSTOP : 0) |
		((v & em_FLUSHO) ? FLUSHO : 0) |
		((v & em_NOKERNINFO) ? NOKERNINFO : 0) |
		((v & em_PENDIN) ? PENDIN : 0) |
		((v & em_NOFLSH) ? NOFLSH : 0);
 for (i=(sizeof(os->c_cc)/sizeof(os->c_cc[0]))-1;i>=0;i--) os->c_cc[i] = _POSIX_VDISABLE;
 os->c_cc[VEOF] = mem_get_1(em+16+em_VEOF);
 os->c_cc[VEOL] = mem_get_1(em+16+em_VEOL);
 os->c_cc[VEOL2] = mem_get_1(em+16+em_VEOL2);
 os->c_cc[VERASE] = mem_get_1(em+16+em_VERASE);
 os->c_cc[VWERASE] = mem_get_1(em+16+em_VWERASE);
 os->c_cc[VKILL] = mem_get_1(em+16+em_VKILL);
 os->c_cc[VREPRINT] = mem_get_1(em+16+em_VREPRINT);
 os->c_cc[VINTR] = mem_get_1(em+16+em_VINTR);
 os->c_cc[VQUIT] = mem_get_1(em+16+em_VQUIT);
 os->c_cc[VSUSP] = mem_get_1(em+16+em_VSUSP);
 os->c_cc[VDSUSP] = mem_get_1(em+16+em_VDSUSP);
 os->c_cc[VSTART] = mem_get_1(em+16+em_VSTART);
 os->c_cc[VSTOP] = mem_get_1(em+16+em_VSTOP);
 os->c_cc[VLNEXT] = mem_get_1(em+16+em_VLNEXT);
 os->c_cc[VDISCARD] = mem_get_1(em+16+em_VDISCARD);
 os->c_cc[VMIN] = mem_get_1(em+16+em_VMIN);
 os->c_cc[VTIME] = mem_get_1(em+16+em_VTIME);
 os->c_cc[VSTATUS] = mem_get_1(em+16+em_VSTATUS);
 os->c_ispeed = mem_get_4(em+36);
 os->c_ospeed = mem_get_4(em+40);
}

/*
 * Convert an underlying-OS struct termios to an emulated-OS struct
 *  termios.
 */
static void os2em_termios(struct termios *os, uint32_t em)
{
 mem_set_4(em, // c_iflag
		((os->c_iflag & IGNBRK) ? em_IGNBRK : 0) |
		((os->c_iflag & BRKINT) ? em_BRKINT : 0) |
		((os->c_iflag & IGNPAR) ? em_IGNPAR : 0) |
		((os->c_iflag & PARMRK) ? em_PARMRK : 0) |
		((os->c_iflag & INPCK) ? em_INPCK : 0) |
		((os->c_iflag & ISTRIP) ? em_ISTRIP : 0) |
		((os->c_iflag & INLCR) ? em_INLCR : 0) |
		((os->c_iflag & IGNCR) ? em_IGNCR : 0) |
		((os->c_iflag & ICRNL) ? em_ICRNL : 0) |
		((os->c_iflag & IXON) ? em_IXON : 0) |
		((os->c_iflag & IXOFF) ? em_IXOFF : 0) |
		((os->c_iflag & IXANY) ? em_IXANY : 0) |
		((os->c_iflag & IMAXBEL) ? em_IMAXBEL : 0) );
 mem_set_4(em+4, // c_oflag
		((os->c_oflag & OPOST) ? em_OPOST : 0) |
		((os->c_oflag & ONLCR) ? em_ONLCR : 0) |
		((os->c_oflag & OXTABS) ? em_OXTABS : 0) |
		((os->c_oflag & ONOEOT) ? em_ONOEOT : 0) |
		((os->c_oflag & OCRNL) ? em_OCRNL : 0) |
		((os->c_oflag & ONOCR) ? em_ONOCR : 0) |
		((os->c_oflag & ONLRET) ? em_ONLRET : 0) );
 mem_set_4(em+8, // c_cflag
		((os->c_cflag & CIGNORE) ? em_CIGNORE : 0) |
		(((os->c_cflag & CSIZE) == CS5) ? em_CS5 : 0) |
		(((os->c_cflag & CSIZE) == CS6) ? em_CS6 : 0) |
		(((os->c_cflag & CSIZE) == CS7) ? em_CS7 : 0) |
		(((os->c_cflag & CSIZE) == CS8) ? em_CS8 : 0) |
		((os->c_cflag & CSTOPB) ? em_CSTOPB : 0) |
		((os->c_cflag & CREAD) ? em_CREAD : 0) |
		((os->c_cflag & PARENB) ? em_PARENB : 0) |
		((os->c_cflag & PARODD) ? em_PARODD : 0) |
		((os->c_cflag & HUPCL) ? em_HUPCL : 0) |
		((os->c_cflag & CLOCAL) ? em_CLOCAL : 0) |
		((os->c_cflag & CRTSCTS) ? em_CRTSCTS : 0) |
		((os->c_cflag & CDTRCTS) ? em_CDTRCTS : 0) |
		((os->c_cflag & MDMBUF) ? em_MDMBUF : 0) );
 mem_set_4(em+12, // c_lflag
		((os->c_lflag & ECHOKE) ? em_ECHOKE : 0) |
		((os->c_lflag & ECHOE) ? em_ECHOE : 0) |
		((os->c_lflag & ECHOK) ? em_ECHOK : 0) |
		((os->c_lflag & ECHO) ? em_ECHO : 0) |
		((os->c_lflag & ECHONL) ? em_ECHONL : 0) |
		((os->c_lflag & ECHOPRT) ? em_ECHOPRT : 0) |
		((os->c_lflag & ECHOCTL) ? em_ECHOCTL : 0) |
		((os->c_lflag & ISIG) ? em_ISIG : 0) |
		((os->c_lflag & ICANON) ? em_ICANON : 0) |
		((os->c_lflag & ALTWERASE) ? em_ALTWERASE : 0) |
		((os->c_lflag & IEXTEN) ? em_IEXTEN : 0) |
		((os->c_lflag & EXTPROC) ? em_EXTPROC : 0) |
		((os->c_lflag & TOSTOP) ? em_TOSTOP : 0) |
		((os->c_lflag & FLUSHO) ? em_FLUSHO : 0) |
		((os->c_lflag & NOKERNINFO) ? em_NOKERNINFO : 0) |
		((os->c_lflag & PENDIN) ? em_PENDIN : 0) |
		((os->c_lflag & NOFLSH) ? em_NOFLSH : 0) );
 mem_set_1(em+16+em_VEOF,os->c_cc[VEOF]);
 mem_set_1(em+16+em_VEOL,os->c_cc[VEOL]);
 mem_set_1(em+16+em_VEOL2,os->c_cc[VEOL2]);
 mem_set_1(em+16+em_VERASE,os->c_cc[VERASE]);
 mem_set_1(em+16+em_VWERASE,os->c_cc[VWERASE]);
 mem_set_1(em+16+em_VKILL,os->c_cc[VKILL]);
 mem_set_1(em+16+em_VREPRINT,os->c_cc[VREPRINT]);
 mem_set_1(em+16+em_VINTR,os->c_cc[VINTR]);
 mem_set_1(em+16+em_VQUIT,os->c_cc[VQUIT]);
 mem_set_1(em+16+em_VSUSP,os->c_cc[VSUSP]);
 mem_set_1(em+16+em_VDSUSP,os->c_cc[VDSUSP]);
 mem_set_1(em+16+em_VSTART,os->c_cc[VSTART]);
 mem_set_1(em+16+em_VSTOP,os->c_cc[VSTOP]);
 mem_set_1(em+16+em_VLNEXT,os->c_cc[VLNEXT]);
 mem_set_1(em+16+em_VDISCARD,os->c_cc[VDISCARD]);
 mem_set_1(em+16+em_VMIN,os->c_cc[VMIN]);
 mem_set_1(em+16+em_VTIME,os->c_cc[VTIME]);
 mem_set_1(em+16+em_VSTATUS,os->c_cc[VSTATUS]);
 mem_set_4(em+36,os->c_ispeed);
 mem_set_4(em+40,os->c_ospeed);
}

static int em2os_MSG_flags(uint32_t emf)
{
 int os;

 os = 0;
 if (emf & em_MSG_OOB)       os |= MSG_OOB;
 if (emf & em_MSG_PEEK)      os |= MSG_PEEK;
 if (emf & em_MSG_DONTROUTE) os |= MSG_DONTROUTE;
 if (emf & em_MSG_EOR)       os |= MSG_EOR;
 if (emf & em_MSG_TRUNC)     os |= MSG_TRUNC;
 if (emf & em_MSG_CTRUNC)    os |= MSG_CTRUNC;
 if (emf & em_MSG_WAITALL)   os |= MSG_WAITALL;
 if (emf & em_MSG_DONTWAIT)  os |= MSG_DONTWAIT;
 if (emf & em_MSG_BCAST)     os |= MSG_BCAST;
 if (emf & em_MSG_MCAST)     os |= MSG_MCAST;
 if (emf & em_MSG_NOSIGNAL)  os |= MSG_NOSIGNAL;
 if (emf & ~(em_MSG_OOB | em_MSG_PEEK | em_MSG_DONTROUTE | em_MSG_EOR |
	     em_MSG_TRUNC | em_MSG_CTRUNC | em_MSG_WAITALL |
	     em_MSG_DONTWAIT | em_MSG_BCAST | em_MSG_MCAST |
	     em_MSG_NOSIGNAL))
  { printf("Unrecognized emulated MSG_* flag(s): %08lx\n",(ULI)emf);
    top();
  }
 return(os);
}

/*
 * strerror(), except it takse an emulated-OS errno (and the returned
 *  strings include the short names as well as the messages).
 */
static const char *em_strerror(uint32_t e)
{
 switch (e)
  { case em_EPERM:           return("EPERM, Operation not permitted");                                break;
    case em_ENOENT:          return("ENOENT, No such file or directory");                             break;
    case em_ESRCH:           return("ESRCH, No such process");                                        break;
    case em_EINTR:           return("EINTR, Interrupted system call");                                break;
    case em_EIO:             return("EIO, Input/output error");                                       break;
    case em_ENXIO:           return("ENXIO, Device not configured");                                  break;
    case em_E2BIG:           return("E2BIG, Argument list too long");                                 break;
    case em_ENOEXEC:         return("ENOEXEC, Exec format error");                                    break;
    case em_EBADF:           return("EBADF, Bad file descriptor");                                    break;
    case em_ECHILD:          return("ECHILD, No child processes");                                    break;
    case em_EDEADLK:         return("EDEADLK, Resource deadlock avoided");                            break;
    case em_ENOMEM:          return("ENOMEM, Cannot allocate memory");                                break;
    case em_EACCES:          return("EACCES, Permission denied");                                     break;
    case em_EFAULT:          return("EFAULT, Bad address");                                           break;
    case em_ENOTBLK:         return("ENOTBLK, Block device required");                                break;
    case em_EBUSY:           return("EBUSY, Device busy");                                            break;
    case em_EEXIST:          return("EEXIST, File exists");                                           break;
    case em_EXDEV:           return("EXDEV, Cross-device link");                                      break;
    case em_ENODEV:          return("ENODEV, Operation not supported by device");                     break;
    case em_ENOTDIR:         return("ENOTDIR, Not a directory");                                      break;
    case em_EISDIR:          return("EISDIR, Is a directory");                                        break;
    case em_EINVAL:          return("EINVAL, Invalid argument");                                      break;
    case em_ENFILE:          return("ENFILE, Too many open files in system");                         break;
    case em_EMFILE:          return("EMFILE, Too many open files");                                   break;
    case em_ENOTTY:          return("ENOTTY, Inappropriate ioctl for device");                        break;
    case em_ETXTBSY:         return("ETXTBSY, Text file busy");                                       break;
    case em_EFBIG:           return("EFBIG, File too large");                                         break;
    case em_ENOSPC:          return("ENOSPC, No space left on device");                               break;
    case em_ESPIPE:          return("ESPIPE, Illegal seek");                                          break;
    case em_EROFS:           return("EROFS, Read-only file system");                                  break;
    case em_EMLINK:          return("EMLINK, Too many links");                                        break;
    case em_EPIPE:           return("EPIPE, Broken pipe");                                            break;
    case em_EDOM:            return("EDOM, Numerical argument out of domain");                        break;
    case em_ERANGE:          return("ERANGE, Result too large");                                      break;
    case em_EAGAIN:          return("EAGAIN/EWOULDBLOCK, Resource temporarily unavailable");          break;
    case em_EINPROGRESS:     return("EINPROGRESS, Operation now in progress");                        break;
    case em_EALREADY:        return("EALREADY, Operation already in progress");                       break;
    case em_ENOTSOCK:        return("ENOTSOCK, Socket operation on non-socket");                      break;
    case em_EDESTADDRREQ:    return("EDESTADDRREQ, Destination address required");                    break;
    case em_EMSGSIZE:        return("EMSGSIZE, Message too long");                                    break;
    case em_EPROTOTYPE:      return("EPROTOTYPE, Protocol wrong type for socket");                    break;
    case em_ENOPROTOOPT:     return("ENOPROTOOPT, Protocol not available");                           break;
    case em_EPROTONOSUPPORT: return("EPROTONOSUPPORT, Protocol not supported");                       break;
    case em_ESOCKTNOSUPPORT: return("ESOCKTNOSUPPORT, Socket type not supported");                    break;
    case em_EOPNOTSUPP:      return("EOPNOTSUPP, Operation not supported");                           break;
    case em_EPFNOSUPPORT:    return("EPFNOSUPPORT, Protocol family not supported");                   break;
    case em_EAFNOSUPPORT:    return("EAFNOSUPPORT, Address family not supported by protocol family"); break;
    case em_EADDRINUSE:      return("EADDRINUSE, Address already in use");                            break;
    case em_EADDRNOTAVAIL:   return("EADDRNOTAVAIL, Can't assign requested address");                 break;
    case em_ENETDOWN:        return("ENETDOWN, Network is down");                                     break;
    case em_ENETUNREACH:     return("ENETUNREACH, Network is unreachable");                           break;
    case em_ENETRESET:       return("ENETRESET, Network dropped connection on reset");                break;
    case em_ECONNABORTED:    return("ECONNABORTED, Software caused connection abort");                break;
    case em_ECONNRESET:      return("ECONNRESET, Connection reset by peer");                          break;
    case em_ENOBUFS:         return("ENOBUFS, No buffer space available");                            break;
    case em_EISCONN:         return("EISCONN, Socket is already connected");                          break;
    case em_ENOTCONN:        return("ENOTCONN, Socket is not connected");                             break;
    case em_ESHUTDOWN:       return("ESHUTDOWN, Can't send after socket shutdown");                   break;
    case em_ETOOMANYREFS:    return("ETOOMANYREFS, Too many references: can't splice");               break;
    case em_ETIMEDOUT:       return("ETIMEDOUT, Connection timed out");                               break;
    case em_ECONNREFUSED:    return("ECONNREFUSED, Connection refused");                              break;
    case em_ELOOP:           return("ELOOP, Too many levels of symbolic links");                      break;
    case em_ENAMETOOLONG:    return("ENAMETOOLONG, File name too long");                              break;
    case em_EHOSTDOWN:       return("EHOSTDOWN, Host is down");                                       break;
    case em_EHOSTUNREACH:    return("EHOSTUNREACH, No route to host");                                break;
    case em_ENOTEMPTY:       return("ENOTEMPTY, Directory not empty");                                break;
    case em_EPROCLIM:        return("EPROCLIM, Too many processes");                                  break;
    case em_EUSERS:          return("EUSERS, Too many users");                                        break;
    case em_EDQUOT:          return("EDQUOT, Disc quota exceeded");                                   break;
    case em_ESTALE:          return("ESTALE, Stale NFS file handle");                                 break;
    case em_EREMOTE:         return("EREMOTE, Too many levels of remote in path");                    break;
    case em_EBADRPC:         return("EBADRPC, RPC struct is bad");                                    break;
    case em_ERPCMISMATCH:    return("ERPCMISMATCH, RPC version wrong");                               break;
    case em_EPROGUNAVAIL:    return("EPROGUNAVAIL, RPC prog. not avail");                             break;
    case em_EPROGMISMATCH:   return("EPROGMISMATCH, Program version wrong");                          break;
    case em_EPROCUNAVAIL:    return("EPROCUNAVAIL, Bad procedure for program");                       break;
    case em_ENOLCK:          return("ENOLCK, No locks available");                                    break;
    case em_ENOSYS:          return("ENOSYS, Function not implemented");                              break;
    case em_EFTYPE:          return("EFTYPE, Inappropriate file type or format");                     break;
    case em_EAUTH:           return("EAUTH, Authentication error");                                   break;
    case em_ENEEDAUTH:       return("ENEEDAUTH, Need authenticator");                                 break;
    case em_EIDRM:           return("EIDRM, Identifier removed");                                     break;
    case em_ENOMSG:          return("ENOMSG, No message of desired type");                            break;
    case em_EOVERFLOW:       return("EOVERFLOW, Value too large to be stored in data type");          break;
    case em_ENOTPLAIN:       return("ENOTPLAIN, Not a plain file");                                   break;
  }
 return("unknown");
}

/*
 * Given a VFBKIND, return a short string for human consumption
 *  describing it.
 */
static const char *vfb_kind_str(VFBKIND k)
{
 switch (k)
  { case VFB_OPEN:   return("OPEN");   break;
    case VFB_CLOSE:  return("CLOSE");  break;
    case VFB_DUP2:   return("DUP2");   break;
    case VFB_TRCMGR: return("TRCMGR"); break;
  }
 return("unknown");
}

/*
 * Given a VFORKSTAGE, return a short string for human consumption
 *  describing it.
 */
static const char *vfork_stage_str(VFORKSTAGE s)
{
 switch (s)
  { case VFORK_NONE:    return("NONE");    break;
    case VFORK_START:   return("START");   break;
    case VFORK_FAIL:    return("FAIL");    break;
    case VFORK_SUCCESS: return("SUCCESS"); break;
  }
 return("unknown");
}

/*
 * Given an emulated-OS signal number, return its short string name.
 */
static const char *em_signame(uint32_t s, const char *unk)
{
 switch (s)
  { case em_SIGHUP:    return("SIGHUP");    break;
    case em_SIGINT:    return("SIGINT");    break;
    case em_SIGQUIT:   return("SIGQUIT");   break;
    case em_SIGILL:    return("SIGILL");    break;
    case em_SIGTRAP:   return("SIGTRAP");   break;
    case em_SIGABRT:   return("SIGABRT");   break;
    case em_SIGEMT:    return("SIGEMT");    break;
    case em_SIGFPE:    return("SIGFPE");    break;
    case em_SIGKILL:   return("SIGKILL");   break;
    case em_SIGBUS:    return("SIGBUS");    break;
    case em_SIGSEGV:   return("SIGSEGV");   break;
    case em_SIGSYS:    return("SIGSYS");    break;
    case em_SIGPIPE:   return("SIGPIPE");   break;
    case em_SIGALRM:   return("SIGALRM");   break;
    case em_SIGTERM:   return("SIGTERM");   break;
    case em_SIGURG:    return("SIGURG");    break;
    case em_SIGSTOP:   return("SIGSTOP");   break;
    case em_SIGTSTP:   return("SIGTSTP");   break;
    case em_SIGCONT:   return("SIGCONT");   break;
    case em_SIGCHLD:   return("SIGCHLD");   break;
    case em_SIGTTIN:   return("SIGTTIN");   break;
    case em_SIGTTOU:   return("SIGTTOU");   break;
    case em_SIGIO:     return("SIGIO");     break;
    case em_SIGXCPU:   return("SIGXCPU");   break;
    case em_SIGXFSZ:   return("SIGXFSZ");   break;
    case em_SIGVTALRM: return("SIGVTALRM"); break;
    case em_SIGPROF:   return("SIGPROF");   break;
    case em_SIGWINCH:  return("SIGWINCH");  break;
    case em_SIGINFO:   return("SIGINFO");   break;
    case em_SIGUSR1:   return("SIGUSR1");   break;
    case em_SIGUSR2:   return("SIGUSR2");   break;
    case em_SIGPWR:    return("SIGPWR");    break;
  }
 return(unk);
}

/*
 * Handle the command line.
 */
static void handleargs(int ac, char **av)
{
 int skip;
 int errs;

 skip = 0;
 errs = 0;
 for (ac--,av++;ac;ac--,av++)
  { if (skip > 0)
     { skip --;
       continue;
     }
    if (**av != '-')
     { exe = *av;
       break;
     }
    if (0)
     {
needarg:;
       fprintf(stderr,"%s: %s needs a following argument\n",__progname,*av);
       errs = 1;
       continue;
     }
#define WANTARG() do { if (++skip >= ac) goto needarg; } while (0)
    if (!strcmp(*av,"-forkwait") || !strcmp(*av,"-fork-wait"))
     { forkwait = 1;
       continue;
     }
    if (!strcmp(*av,"-panicloop") || !strcmp(*av,"-panic-loop"))
     { panicloop = 1;
       continue;
     }
    if (!strcmp(*av,"-debugwait") || !strcmp(*av,"-debug-wait"))
     { gdbloop();
       continue;
     }
    if (!strcmp(*av,"-delayexec") || !strcmp(*av,"-delay-exec"))
     { initial_exec_state = IES_DELAY;
       continue;
     }
    if (!strcmp(*av,"-real"))
     { WANTARG();
       realthing_setup(av[skip]);
       continue;
     }
#undef WANTARG
    fprintf(stderr,"%s: unrecognized option `%s'\n",__progname,*av);
    errs = 1;
  }
 while (av[0])
  { char *equal;
    equal = index(av[0],'=');
    if (! equal) break;
    cl_nenvp ++;
    cl_envp = realloc(cl_envp,cl_nenvp*sizeof(*cl_envp));
    cl_envp[cl_nenvp-1] = av[0];
    ac --;
    av ++;
  }
 exe = *av;
 if (! exe)
  { fprintf(stderr,"%s: need executable filename\n",__progname);
    errs = 1;
  }
 ac --;
 av ++;
 cl_nargs = ac;
 cl_args = av;
 if (errs) exit(1);
}

/*
 * Find the live block whose base address is given, or nil if no such
 *  live block exists.
 */
static MALBLOCK *arena_find_live(MEMSEG_PRIV_ARENA *a, uint32_t addr)
{
 MALBLOCK *b;

 b = a->live;
 while (1)
  { if (! b) return(0);
    if (addr == b->base) return(b);
    if (addr < b->base) b = b->l; else b = b->r;
  }
}

/*
 * Find the live block into which addr falls, or nil if no such live
 *  block exists.  Redzones are not part of blocks for the purposes of
 *  this function.
 */
static MALBLOCK *arena_find_containing(MEMSEG_PRIV_ARENA *a, uint32_t addr)
{
 MALBLOCK *b;

 b = a->live;
 while (1)
  { if (! b) return(0);
    if ((addr >= b->base) && (addr < b->end)) return(b);
    if (addr < b->base) b = b->l; else b = b->r;
  }
}

static void free_malblocks_avl(MALBLOCK *b)
{
 if (! b) return;
 free_malblocks_avl(b->l);
 free_malblocks_avl(b->r);
 free(b);
}

static void free_malblocks_list(MALBLOCK *l)
{
 MALBLOCK *b;

 while ((b = l))
  { l = b->r;
    free(b);
  }
}

static MEMSEGOPS memseg_ops_malloc; // forward

/*
 * Create and return a new malloc()-backed MEMSEG, given base, size,
 *  and protection.  This does not deal with maintaining the
 *  no-overlapping invariant; the caller must have already dealt with
 *  that.  This does link the new MEMSEG into vm, though.
 *
 * Note that this does not promise anything about the contents of the
 *  new memory.
 *
 * XXX Maybe collapse with any adjacent malloc MEMSEGs with identical
 *  protection?
 */
static MEMSEG *memseg_new_malloc(uint32_t base, uint32_t size, unsigned char prot)
{
 MEMSEG *n;
 MEMSEG_PRIV_MALLOC *p;

 n = malloc(sizeof(MEMSEG));
 p = malloc(sizeof(MEMSEG_PRIV_MALLOC));
 n->base = base;
 n->size = size;
 n->end = base + size;
 n->prot = prot;
 p->tofree = malloc(size);
 n->data = p->tofree;
 n->ops = &memseg_ops_malloc;
 n->priv = p;
 n->link = vm.m;
 vm.m = n;
 return(n);
}

/*
 * The done method for malloc memsegs.  Free the underlying memory and
 *  the private pointer.
 */
static void memseg_done_malloc(MEMSEG *ms)
{
 free(((MEMSEG_PRIV_MALLOC *)ms->priv)->tofree);
 free(ms->priv);
}

/*
 * The curtail method for malloc MEMSEGs.  Just adjust size and end;
 *  don't worry about freeing partial memory blocks - the code
 *  simplicity wins over the memory resource saving.
 */
static void memseg_curtail_malloc(MEMSEG *ms, uint32_t by)
{
 if (by >= ms->size) panic("impossible malloc curtail");
 ms->size -= by;
 ms->end -= by;
}

/*
 * The behead method for malloc MEMSEGs.  Just adjust base, size, and
 *  data; as for curtail, above, it's not worth trying to economize on
 *  memory.
 */
static void memseg_behead_malloc(MEMSEG *ms, uint32_t by)
{
 if (by >= ms->size) panic("impossible malloc behead");
 ms->size -= by;
 ms->base += by;
 ms->data += by;
}

/*
 * The split method for malloc MEMSEGs.  Just allocate a new memory
 *  MEMSEG, copy, and adjust the old one.
 *
 * XXX Arguably should copy the smaller piece.
 *
 * XXX Arguably should refcount the backing memory and have both
 *  MEMSEGs refer to it.
 */
static MEMSEG *memseg_split_malloc(MEMSEG *ms, uint32_t part1, uint32_t part2)
{
 MEMSEG *n;

 if ((part1 > ms->size) || (part2 > ms->size) || (part1+part2 > ms->size)) panic("impossible malloc split");
 n = memseg_new_malloc(ms->end-part2,part2,ms->prot);
 bcopy(ms->data+(ms->size-part2),n->data,part2);
 ms->size = part1;
 ms->end = ms->base + part1;
 return(n);
}

/*
 * The postexec method for malloc MEMSEGs.  This is boring; malloc
 *  MEMSEGs never survive exec()s.
 */
static int memseg_postexec_malloc(MEMSEG *ms __attribute__((__unused__)))
{
 return(0);
}

/*
 * The merge method for malloc MEMSEGs.
 */
static int memseg_merge_malloc(MEMSEG *a, MEMSEG *b)
{
 MEMSEG_PRIV_MALLOC *ap;
 char *newdata;

 if ( (a->ops != &memseg_ops_malloc) ||
      (b->ops != &memseg_ops_malloc) ||
      (a->end != b->base) ||
      (a->prot != b->prot) ) panic("impossible %s",__func__);
 if ((a->size >= (1U<<20)) || (b->size >= (1U<<20))) return(0);
 ap = a->priv;
 newdata = malloc(a->size+b->size);
 bcopy(a->data,newdata,a->size);
 bcopy(b->data,newdata+a->size,b->size);
 free(ap->tofree);
 ap->tofree = newdata;
 a->data = newdata;
 a->size += b->size;
 a->end = a->base + a->size;
 a->link = b->link;
 memseg_done_malloc(b);
 return(1);
}

/*
 * The check method for malloc MEMSEGs.
 */
static void memseg_check_malloc(MEMSEG *ms, uint32_t addr, uint32_t len, unsigned int prot)
{
 (void)ms;
 (void)addr;
 (void)len;
 (void)prot;
}

/*
 * The desc method for malloc MEMSEGs.
 */
static void memseg_desc_malloc(MEMSEG *ms, FILE *to)
{
 (void)ms;
 fprintf(to,"malloc");
}

/*
 * The MEMSEGOPS for malloc MEMSEGs.
 */
static MEMSEGOPS memseg_ops_malloc = MEMSEGOPS_INIT(malloc);

static MEMSEGOPS memseg_ops_mmap; // forward

/*
 * Create and return a new mmap()-backed MEMSEG, given the usual base,
 *  size, and protection, and mmap-specific values for the mmap flags
 *  and the void * returned by the underlying-OS mmap().
 */
static MEMSEG *memseg_new_mmap(uint32_t base, uint32_t size, unsigned char prot, uint32_t mapflags, void *mapped)
{
 MEMSEG *n;
 MEMSEG_PRIV_MMAP *p;

 n = malloc(sizeof(MEMSEG));
 p = malloc(sizeof(MEMSEG_PRIV_MMAP));
 n->base = base;
 n->size = size;
 n->end = base + size;
 n->prot = prot;
 p->refcnt = 1;
 p->mapped = mapped;
 p->size = size;
 p->mapflags = mapflags;
 n->data = mapped;
 n->ops = &memseg_ops_mmap;
 n->priv = p;
 n->link = vm.m;
 vm.m = n;
 return(n);
}

/*
 * The done method for mmap()-backed MEMSEGs.  Drop a reference, and,
 *  if it was the last reference, munmap() the underlying memory and
 *  free the private data struct.
 */
static void memseg_done_mmap(MEMSEG *ms)
{
 MEMSEG_PRIV_MMAP *p;

 p = ms->priv;
 p->refcnt --;
 if (p->refcnt < 1)
  { munmap(p->mapped,p->size);
    free(p);
  }
}

/*
 * The curtail method for mmap()-backed MEMSEGs.  Just leave the
 *  underlying mmap()ped memory alone and adjust the descriptive
 *  values.
 */
static void memseg_curtail_mmap(MEMSEG *ms, uint32_t by)
{
 if (by >= ms->size) panic("impossible mmap curtail");
 ms->size -= by;
 ms->end -= by;
}

/*
 * The behead method for mmap()-backed MEMSEGs.  Just leave the
 *  underlying mmap()ped memory alone and adjust the descriptive
 *  values.
 */
static void memseg_behead_mmap(MEMSEG *ms, uint32_t by)
{
 if (by >= ms->size) panic("impossible mmap behead");
 ms->size -= by;
 ms->base += by;
 ms->data += by;
}

/*
 * The split method for mmap()-backed MEMSEGs.  This is why
 *  MEMSEG_PRIV_MMAPs have refcounts: so that we can, here, generate
 *  two MEMSEGs backed by the same underlying mmap().
 */
static MEMSEG *memseg_split_mmap(MEMSEG *ms, uint32_t part1, uint32_t part2)
{
 MEMSEG *n;
 MEMSEG_PRIV_MMAP *p;

 if ((part1 > ms->size) || (part2 > ms->size) || (part1+part2 > ms->size)) panic("impossible mmap split");
 p = ms->priv;
 n = malloc(sizeof(MEMSEG));
 n->base = ms->base + ms->size - part2;
 n->size = part2;
 n->end = ms->end;
 n->prot = ms->prot;
 n->data = ms->data + (ms->size - part2);
 n->ops = ms->ops;
 p->refcnt ++;
 n->priv = p;
 ms->size = part1;
 ms->end = ms->base + part1;
 n->link = vm.m;
 vm.m = n;
 return(n);
}

/*
 * The postexec method for mmap()-backed MEMSEGs.  Generally, these go
 *  away on exec() just as malloc MEMSEGs do, but if mapped with
 *  MAP_INHERIT they stay around.
 */
static int memseg_postexec_mmap(MEMSEG *ms)
{
 return((((MEMSEG_PRIV_MMAP *)ms->priv)->mapflags & em_MAP_INHERIT) ? 1 : 0);
}

/*
 * The merge method for mmap MEMSEGs.
 */
static int memseg_merge_mmap(MEMSEG *a __attribute__((__unused__)), MEMSEG *b __attribute__((__unused__)))
{
 return(0);
}

/*
 * The check method for mmap MEMSEGs.
 */
static void memseg_check_mmap(MEMSEG *ms, uint32_t addr, uint32_t len, unsigned int prot)
{
 (void)ms;
 (void)addr;
 (void)len;
 (void)prot;
}

/*
 * The desc method for mmap MEMSEGs.
 */
static void memseg_desc_mmap(MEMSEG *ms, FILE *to)
{
 MEMSEG_PRIV_MMAP *p;
 uint32_t f;
 uint32_t m;
 static const struct {
		uint32_t mask;
		uint32_t value;
		const char *text;
		} bits[]
#define BIT(name) { em_MAP_##name, em_MAP_##name, #name }
  = { BIT(SHARED),
      BIT(PRIVATE),
      BIT(COPY),
      BIT(FIXED),
      BIT(RENAME),
      BIT(NORESERVE),
      BIT(INHERIT),
      BIT(NOEXTEND),
      BIT(HASSEMAPHORE),
      { em_MAP_FILE|em_MAP_ANON, em_MAP_FILE, "FILE" },
      { em_MAP_FILE|em_MAP_ANON, em_MAP_ANON, "ANON" } };
 int i;
 const char *pref;

 p = ms->priv;
 fprintf(to,"mmap: %p (%d), %lu@%p, ",(void *)p,p->refcnt,(ULI)p->size,(void *)p->mapped);
 f = p->mapflags;
 m = 0;
 pref = "";
 for (i=0;i<(sizeof(bits)/sizeof(bits[0]));i++)
  { if (bits[i].mask & m) continue;
    if ((f & bits[i].mask) == bits[i].value)
     { m |= bits[i].mask;
       f &= ~bits[i].mask;
       fprintf(to,"%s%s",pref,bits[i].text);
       pref = "|";
     }
  }
 if (f) fprintf(to,"|%#lx",(ULI)f);
#undef BIT
}

/*
 * The MEMSEGOPS for mmap MEMSEGs.
 */
static MEMSEGOPS memseg_ops_mmap = MEMSEGOPS_INIT(mmap);

static MEMSEGOPS memseg_ops_arena; // forward

/*
 * Create and return the malloc-arena MEMSEG.  This does not set up any
 *  MALBLOCKs; that must be handled elsewhere.
 */
static MEMSEG *memseg_new_arena(void)
{
 MEMSEG *n;
 MEMSEG_PRIV_ARENA *p;
 void *mmrv;

 mmrv = mmap(0,ARENA_SIZE,PROT_READ|PROT_WRITE,MAP_ANON|MAP_PRIVATE,-1,0);
 if (mmrv == MAP_FAILED)
  { printf("can't create malloc arena: mmap: %s\n",strerror(errno));
    top();
  }
 n = malloc(sizeof(MEMSEG));
 p = malloc(sizeof(MEMSEG_PRIV_ARENA));
 n->base = USRSTACK - MAXSSIZE - ARENA_STACK_GAP - ARENA_SIZE;
 n->size = ARENA_SIZE;
 n->end = USRSTACK - MAXSSIZE;
 n->prot = P_R | P_W | P_X;
 p->seg = n;
 p->free = 0;
 p->live = 0;
 p->old = 0;
 n->data = mmrv;
 n->ops = &memseg_ops_arena;
 n->priv = p;
 n->link = vm.m;
 vm.m = n;
 return(n);
}

/*
 * The done method for the malloc-arena MEMSEG.
 */
static void memseg_done_arena(MEMSEG *ms)
{
 MEMSEG_PRIV_ARENA *a;

 a = ms->priv;
 free_malblocks_avl(a->live);
 free_malblocks_list(a->free);
 free_malblocks_list(a->old);
 munmap(ms->data,ARENA_SIZE);
 free(a);
}

/*
 * The curtail method for the malloc-arena MEMSEG.
 */
static void memseg_curtail_arena(MEMSEG *ms, uint32_t by)
{
 (void)ms;
 (void)by;
 panic("curtailing malloc-arena memseg");
}

/*
 * The behead method for the malloc-arena MEMSEG.
 */
static void memseg_behead_arena(MEMSEG *ms, uint32_t by)
{
 (void)ms;
 (void)by;
 panic("beheading malloc-arena memseg");
}

/*
 * The split method for the malloc-arena MEMSEG.
 */
static MEMSEG *memseg_split_arena(MEMSEG *ms, uint32_t part1, uint32_t part2)
{
 (void)ms;
 (void)part1;
 (void)part2;
 panic("splitting malloc-arena memseg");
}

/*
 * The postexec method for the malloc-arena MEMSEG.
 */
static int memseg_postexec_arena(MEMSEG *ms)
{
 (void)ms;
 return(0);
}

/*
 * The merge method for the malloc-arena MEMSEG.
 */
static int memseg_merge_arena(MEMSEG *a, MEMSEG *b)
{
 (void)a;
 (void)b;
 panic("merging malloc-arena memsegs");
}

/*
 * The check method for the malloc-arena MEMSEG.  This function is the
 *  reason the malloc-arena MEMSEG exists - and the reason MEMSEGs have
 *  check methods.
 */
static void memseg_check_arena(MEMSEG *ms, uint32_t addr, uint32_t len, unsigned int prot)
{
 MALBLOCK *b;

 (void)prot;
 addr += ms->base;
 b = arena_find_containing(ms->priv,addr);
 if (b && (addr+len <= b->end)) return;
 printf("bad reference to malloc arena region [%08lx..%08lx)\n",(ULI)addr,(ULI)(addr+len));
 top();
}

/*
 * The desc method for the malloc-arena MEMSEG.
 */
static void memseg_desc_arena(MEMSEG *ms, FILE *to)
{
 (void)ms;
 fprintf(to,"arena");
}

/*
 * The MEMSEGOPS for the malloc-arena MEMSEGs.
 */
static MEMSEGOPS memseg_ops_arena = MEMSEGOPS_INIT(arena);

/*
 * Test whether a whole range of VM exists.
 *
 * This tests whether there is a mapping for each page overlapping the
 *  range [addr,addr+size).
 */
static int range_exists(uint32_t addr, uint32_t size)
{
 uint32_t a;
 uint32_t end;
 MEMSEG *ms;

 if (size & 0x80000000) return(0);
 end = addr + size;
 if (addr > end) return(0);
 a = ROUND_DOWN(addr,PAGE_SIZE);
 while (a < end)
  { ms = memseg_find(a,0,0);
    if (! ms) return(0);
    a = ms->end;
  }
 return(1);
}

/*
 * Go through vm and remove anything that overlaps with the area
 *  described by newbase and newsize, except that ignore, if non-nil,
 *  is a MEMSEG that is to be left untouched even if it does overlap.
 *  This is used to punch a hole in the VM space, if necessary, to
 *  accommodate a new MEMSEG.  (This is why ignore exists, so that this
 *  can be called after the new MEMSEG is created and linked in.)
 */
static void memseg_clear_conflict(uint32_t newbase, uint32_t newsize, MEMSEG *ignore)
{
 uint32_t newend;
 MEMSEG *ms;
 MEMSEG **msp;

 newend = newbase + newsize;
 if (newsize < 1) panic("empty memseg");
 if ((newbase & (PAGE_SIZE-1)) || (newsize & (PAGE_SIZE-1))) panic("misaligned memseg");
 if (newend < newbase) panic("va wraparound");
 msp = &vm.m;
 while ((ms = *msp))
  { if (ms != ignore)
     { /*
	* Each existing memseg may relate to the new memseg in one of
	*  13 ways (nnn=new, ooo=old, ***=both):
	*
	*	(a)	...ooo...nnn...
	*	(b)	...ooonnn...
	*	(c)	...ooo***nnn...
	*	(d)	...ooo***...
	*	(e)	...ooo***ooo...
	*	(f)	...***nnn...
	*	(g)	...******...
	*	(h)	...******ooo...
	*	(i)	...nnn***nnn...
	*	(j)	...nnn***...
	*	(k)	...nnn***ooo...
	*	(l)	...nnnooo...
	*	(m)	...nnn...ooo...
	*/
       if ((ms->base >= newbase) && (ms->end <= newend))
	{ // Cases f, g, i, j: destroy old entirely
	  *msp = ms->link;
	  (*ms->ops->done)(ms);
	  free(ms);
	  continue;
	}
       else if ((ms->end <= newbase) || (ms->base >= newend))
	{ // Cases a, b, l, m: do nothing
	}
       else if (ms->end <= newend)
	{ // Cases c, d: curtail old
	  (*ms->ops->curtail)(ms,ms->end-newbase);
	}
       else if (ms->base >= newbase)
	{ // Cases h, k: behead old
	  (*ms->ops->behead)(ms,newend-ms->base);
	}
       else
	{ // Case e: split old
	  (*ms->ops->split)(ms,newbase-ms->base,ms->end-newend);
	}
     }
    msp = &ms->link;
  }
}

/*
 * Just like memseg_clear_conflict, except it never changes anything;
 *  it just returns true if memseg_clear_conflict would have done
 *  anything.
 */
static int memseg_check_conflict(uint32_t newbase, uint32_t newsize, MEMSEG *ignore)
{
 uint32_t newend;
 MEMSEG *ms;

 newend = newbase + newsize;
 if (newsize < 1) panic("empty memseg");
 if ((newbase & (PAGE_SIZE-1)) || (newsize & (PAGE_SIZE-1))) panic("misaligned memseg");
 if (newend < newbase) panic("va wraparound");
 for (ms=vm.m;ms;ms=ms->link)
  { if (ms != ignore)
     { /*
	* Each existing memseg may relate to the new memseg in one of
	*  13 ways (nnn=new, ooo=old, ***=both):
	*
	*	(a)	...ooo...nnn...
	*	(b)	...ooonnn...
	*	(c)	...ooo***nnn...
	*	(d)	...ooo***...
	*	(e)	...ooo***ooo...
	*	(f)	...***nnn...
	*	(g)	...******...
	*	(h)	...******ooo...
	*	(i)	...nnn***nnn...
	*	(j)	...nnn***...
	*	(k)	...nnn***ooo...
	*	(l)	...nnnooo...
	*	(m)	...nnn...ooo...
	*/
       if ((ms->base >= newbase) && (ms->end <= newend)) return(1);
       else if ((ms->end <= newbase) || (ms->base >= newend)) continue;
     }
  }
 return(0);
}

/*
 * Destroy a memory space.  This is used upon a successful exec() to
 *  dispose of the old VM, and, during vfork, by the parent to dispose
 *  of its copy of the child's VM.
 */
static void vm_destroy(VM vm)
{
 MEMSEG *ms;

 while ((ms = vm.m))
  { vm.m = ms->link;
    (*ms->ops->done)(ms);
    free(ms);
  }
}

/*
 * Used after a successful exec() to replace the old VM with the new.
 *  The old VM is in vm; the new VM is passed in.  First we go through
 *  the old VM and throw out everything that shouldn't remain (which in
 *  most cases means all of it, but mmap() with MAP_INHERIT can create
 *  segments which survive).  Then we drop the new VM on top of it.
 *
 * In case of a conflict between a surviving piece of the old space and
 *  a piece of the new space, the new space wins.  I'm not sure what
 *  real NetBSD/sparc 1.4T does in this case; it matters only if mmap()
 *  was given an address it must map at which overlaps part of the new
 *  text, data, or stack.  (System-selected mmap locations can't
 *  overlap any of those.)
 */
static void vm_postexec(VM newvm)
{
 MEMSEG *list;
 MEMSEG **tail;
 MEMSEG *s;

 list = vm.m;
 tail = &vm.m;
 while (vm.m)
  { s = vm.m;
    vm.m = s->link;
    if ((*s->ops->postexec)(s))
     { *tail = s;
       tail = &s->link;
     }
    else
     { (*s->ops->done)(s);
       free(s);
     }
  }
 *tail = 0;
 while (newvm.m)
  { s = newvm.m;
    newvm.m = s->link;
    memseg_clear_conflict(s->base,s->size,0);
    s->link = vm.m;
    vm.m = s;
  }
 vm.dbrk = newvm.dbrk;
 vm_changed = 1;
}

/*
 * Create and return a new malloc MEMSEG for a given base address,
 *  size, and protection.  This is suitable for use when setting up the
 *  address space for exec() and is currently just memseg_new_malloc()
 *  plus memseg_clear_conflict().
 *
 * The returned MEMSEG will always be a malloc MEMSEG.
 *
 * This does not promise anything about the contents of the new memory.
 */
static MEMSEG *memseg_mem(uint32_t va, uint32_t size, unsigned int prot)
{
 MEMSEG *n;

 n = memseg_new_malloc(va,size,prot);
 memseg_clear_conflict(n->base,n->size,n);
 return(n);
}

/*
 * Sort a list of MEMSEGs in order of increasing addresses.  We sort by
 *  base address, but the no-overlap invariant means that the order
 *  induced by this is equivalent to the one induced by sorting by end
 *  address instead (or (base+end)/2, or pretty much anything else even
 *  vaguely sensible).
 *
 * This is used when we're looking for holes in the address space, such
 *  as for an mmap() with no address specified.
 */
static MEMSEG *sort_vm_list(MEMSEG *list)
{
 MEMSEG *a;
 MEMSEG *b;
 MEMSEG *t;
 MEMSEG **lp;

 if (!list || !list->link) return(list);
 a = 0;
 b = 0;
 while (list)
  { t = list;
    list = t->link;
    t->link = b;
    b = a;
    a = t;
  }
 a = sort_vm_list(a);
 b = sort_vm_list(b);
 lp = &list;
 while (a || b)
  { if (a && (!b || (a->base < b->base)))
     { t = a;
       a = a->link;
     }
    else
     { t = b;
       b = b->link;
     }
    *lp = t;
    lp = &t->link;
  }
 *lp = 0;
 return(list);
}

/*
 * Sort the current VM a la sort_vm_list, above.
 */
static void sort_vm(void)
{
 vm.m = sort_vm_list(vm.m);
}

/*
 * Look for a hole at least size bytes large in the VM space, whose
 *  base address is at least base and whose end is no higher than max.
 *  Return the base of the located space.  If no such space can be
 *  found, we panic (arguably we should complain and top()).
 */
static uint32_t find_space(uint32_t base, uint32_t size, uint32_t max)
{
 MEMSEG *ms;
 uint32_t lastend;
 uint32_t rv;

 if (size & 0x80000000) panic("impossible find_space (size)");
 if ((uint32_t)(base+size) < base) panic("impossible find_space (wrap)");
 if (base+size > max) panic("impossible find_space (max)");
 do <"found">
  { sort_vm();
    lastend = PAGE_SIZE;
    for (ms=vm.m;ms;ms=ms->link)
     { rv = (lastend < base) ? base : lastend;
       if ((ms->base >= base+size) && (ms->base-rv >= size)) break <"found">;
       lastend = ms->end;
       if (lastend+size > max) break;
     }
    if (lastend+size <= max)
     { rv = (lastend < base) ? base : lastend;
       break <"found">;
     }
    panic("can't find space: base %08lx size %08lx max %08lx",(ULI)base,(ULI)size,(ULI)max);
  } while( 0);
 return(rv);
}

/*
 * This is used during exec() to read something out of the file being
 *  loaded into the address space.  fd is our (underlying-OS) fd onto
 *  the executable file, buf is where to read the data into, len is the
 *  amount to read, off is the offset into the file (suitable for
 *  passing to pread(2)), and path and what are the path to the file
 *  and an indication of what's being read, the latter two for error
 *  messages, if generated.
 *
 * On success this returns 0.  On failure, it returns -1, with, if
 *  TRC_EXEC tracing is turned on, an indication of what went wrong.
 */
static int read_exe(int fd, void *buf, int len, off_t off, const char *path, const char *what)
{
 int rv;

 rv = pread(fd,buf,len,off);
 if (rv < 0)
  { if (what) trc(TRC_EXEC,"%s: %s: read: %s\n",path,what,strerror(errno));
    return(-1);
  }
 if (rv == 0)
  { if (what) trc(TRC_EXEC,"%s: %s: read EOF\n",path,what);
    return(-1);
  }
 if (rv != len)
  { if (what) trc(TRC_EXEC,"%s: %s: read wanted %d, got %d\n",path,what,len,rv);
    return(-1);
  }
 return(0);
}

/*
 * Print the condition code bits to a FILE *.  This is used when, for
 *  example, printing (emulated) machine registers.
 */
/* The output generated doesn't really make sense otherwise... */
#if (CC_N != 8) || (CC_Z != 4) || (CC_V != 2) || (CC_C != 1)
#error "print_cc assumptions invalid"
#endif
static void print_cc(FILE *to, unsigned int cc)
{
 fprintf(to,"%c%c%c%c",
	(cc & CC_N) ? 'N' : '.',
	(cc & CC_Z) ? 'Z' : '.',
	(cc & CC_V) ? 'V' : '.',
	(cc & CC_C) ? 'C' : '.' );
}

/*
 * Print the FPU condition code bits to a FILE *.  This is used when,
 *  for example, printing (emulated) machine registers.
 */
static void print_fcc(FILE *to, unsigned int fcc)
{
 const char *s;

 switch (fcc)
  { case FCC_EQ: s = "EQ"; break;
    case FCC_LT: s = "LT"; break;
    case FCC_GT: s = "GT"; break;
    case FCC_UN: s = "UN"; break;
    default: abort(); break;
  }
 fprintf(to,"%s",s);
}

/*
 * Print some or all emulated machine registers.  to is the FILE * to
 *  print them to and test is a test routine indicating which registers
 *  to print; it is passed an index into the regnames[] array.
 */
static void print_regs(FILE *to, int (*test)(int))
{
 int nix;
 int ixv[PRINT_REGS__N];
 int nr;
 int r;
 int n;
 int i;

 nix = 0;
 for (i=0;i<PRINT_REGS__N;i++) if ((*test)(i)) ixv[nix++] = i;
 if (nix < 1) return;
 nr = (nix + 3) / 4;
 for (r=0;r<nr;r++)
  { for (n=r;n<nix;n+=nr)
     { i = ixv[n];
       if (n >= nr) fprintf(to,"  ");
       fprintf(to,"%-3s = ",regnames[i]);
       switch (i)
	{ case PRINT_REGS_Y:
	     fprintf(to,"%08lx",(ULI)s.y);
	     break;
	  case PRINT_REGS_PC:
	     fprintf(to,"%08lx",(ULI)s.pc);
	     break;
	  case PRINT_REGS_NPC:
	     fprintf(to,"%08lx",(ULI)s.npc);
	     break;
	  case PRINT_REGS_CC:
	     print_cc(to,s.cc);
	     break;
	  default:
	     if (i < PRINT_REGS_Fbase)
	      { fprintf(to,"%08lx",(ULI)s.regs[i]);
	      }
	     else
	      { fprintf(to,"%08lx",(ULI)s.fregs[i-PRINT_REGS_Fbase]);
	      }
	     break;
	}
     }
    fprintf(to,"\n");
  }
}

/*
 * Set up a clean set of processor registers, including all windows.
 *
 * This is called at startup and when emulating execve().
 */
static void clean_regs(void)
{
 int i;
 int j;

 s.cc = 0;
 s.y = 0;
 for (i=32-1;i>=0;i--) s.regs[i] = 0;
 for (i=32-1;i>=0;i--) s.fregs[i] = 0;
 for (i=NWINDOWS-1;i>=0;i--)
  { for (j=8-1;j>=0;j--)
     { s.rw[i].l[j] = 0;
       s.rw[i].i[j] = 0;
     }
  }
 s.cwp = 0;
 s.iwp = 1;
 s.flags &= ~SF_FPU;
}

/*
 * Initialize emulator state on startup.
 */
static void setup(void)
{
 clean_regs();
 s.cc = 0;
 s.flags = 0;
 s.pc = 0;
 s.npc = s.pc + 4;
 vm = INITVM();
 vfork_dropvm = INITVM();
 s.instrs = 0;
 s.noninteractive = 0;
 s.lastexec = 0;
 mypid = getpid();
 trcmgr_newpid(mypid);
 bpts = 0;
 abpts = 0;
 nbpts = 0;
 bpt_suppress = 0;
 vm_changed = 0;
 memwatches = 0;
}

/*
 * Copy data from the emulator into emulated memory.  (The name was
 *  inspired by the copyout() kernel routine, which performs the
 *  analogous operation.)  osbuf is the buffer to copy from, embuf is
 *  the address in emulated VM to copy to, n is the number of bytes to
 *  copy, what is what's being copied (for errors), and prefail is used
 *  to handle cleanup in error cases: it is called if a memory
 *  protection fault occurs, after the fault is detected and before the
 *  error is printed and top() is called.  prefail is designed to
 *  permit things such as freeing malloc()ed temporaries.
 *
 * XXX prefail should go away in favour of a more general way of
 *  wrapping err_jmp.
 */
static void copyout(const void *osbuf, uint32_t embuf, uint32_t n, const char *what, void (*prefail)(void *), void *pfarg)
{
 uint32_t part;
 MEMSEG *ms;
 int left;
 uint32_t bp;
 int i;

 left = n;
 bp = embuf;
 while (left > 0)
  { ms = memseg_find(bp,0,what);
    if (! (ms->prot & P_W))
     { if (prefail) (*prefail)(pfarg);
       printf("%d: %s: %08lx: not accessible\n",mypid,what,(ULI)bp);
       trc(TRC_ERR,"%s: %08lx: not accessible\n",what,(ULI)bp);
       top();
     }
    part = ms->end - bp;
    if (part > left) part = left;
    (*ms->ops->check)(ms,bp-ms->base,part,P_W);
    bcopy((bp-embuf)+(const char *)osbuf,ms->data+(bp-ms->base),part);
    if (trc_if(TRC_MEM)) for (i=0;i<part;i++) mem_rw('w',bp+i,ms->data[i+bp-ms->base]);
    bp += part;
    left -= part;
  }
}

/*
 * Copy data from emulated memory into the emulator.  (The name was
 *  inspired by the copyin() kernel routine, which performs the
 *  analogous operation.)  osbuf is the buffer to copy into, embuf is
 *  the address in emulated VM to copy from, n is the number of bytes
 *  to copy, what is what's being copied (for errors), and prefail is
 *  used to handle cleanup in error cases: it is called if a memory
 *  protection fault occurs, after the fault is detected and before the
 *  error is printed and top() is called.  prefail is designed to
 *  permit things such as freeing malloc()ed temporaries.
 *
 * XXX prefail should go away in favour of a more general way of
 *  wrapping err_jmp.
 */
static void copyin(void *osbuf, uint32_t embuf, uint32_t n, const char *what, void (*prefail)(void *), void *pfarg)
{
 uint32_t part;
 MEMSEG *ms;
 int left;
 uint32_t bp;
 int i;

 left = n;
 bp = embuf;
 while (left > 0)
  { ms = memseg_find(bp,0,what);
    if (! (ms->prot & P_R))
     { if (prefail) (*prefail)(pfarg);
       printf("%d: %s: %08lx: not accessible\n",mypid,what,(ULI)bp);
       trc(TRC_ERR,"%s: %08lx: not accessible\n",what,(ULI)bp);
       top();
     }
    part = ms->end - bp;
    if (part > left) part = left;
    (*ms->ops->check)(ms,bp-ms->base,part,P_R);
    bcopy(ms->data+(bp-ms->base),(bp-embuf)+(char *)osbuf,part);
    if (trc_if(TRC_MEM)) for (i=0;i<part;i++) mem_rw('w',bp+i,ms->data[i+bp-ms->base]);
    bp += part;
    left -= part;
  }
}

/*
 * Open an ELF file and check that it's suitable for our use (eg, that
 *  it's for 32-bit SPARC).  This is used by exec(), factored out
 *  because it's used both for the file being exec()ed and for the
 *  dynamic loader used by dynamically linked executables.
 */
static uint32_t elf_start_load(ELF_CTX *elf, int exptype, const char *exptypestr, int load_p_s)
{
 __label__ enoexec_;

 void enoexec(void)
  { goto enoexec_;
  }

 elf->fd = open(elf->path,O_RDONLY,0);
 if (elf->fd < 0) return(os2em_errno(errno));
 if (0)
  {
enoexec_:;
    close(elf->fd);
    return(em_ENOEXEC);
  }
 if (read_exe(elf->fd,&elf->eh,sizeof(elf->eh),0,elf->path,"bad ELF file (can't read header)") < 0) enoexec();
 if ( (elf->eh.e_ident[EI_MAG0] != ELFMAG0) ||
      (elf->eh.e_ident[EI_MAG1] != ELFMAG1) ||
      (elf->eh.e_ident[EI_MAG2] != ELFMAG2) ||
      (elf->eh.e_ident[EI_MAG3] != ELFMAG3) )
  { trc(TRC_EXEC,"%s: bad ELF file (bad magic number)\n",elf->path);
    enoexec();
  }
 if (elf->eh.e_ident[EI_CLASS] != ELFCLASS32)
  { trc(TRC_EXEC,"%s: bad ELF file (class isn't 32-bit)\n",elf->path);
    enoexec();
  }
 if (ELF_HALF_TO_NATIVE(elf->eh.e_machine) != EM_SPARC)
  { trc(TRC_EXEC,"%s: bad ELF file (machine isn't SPARC)\n",elf->path);
    enoexec();
  }
 if (ELF_HALF_TO_NATIVE(elf->eh.e_type) != exptype)
  { trc(TRC_EXEC,"%s: bad ELF file (type isn't %s)\n",elf->path,exptypestr);
    enoexec();
  }
 if (ELF_HALF_TO_NATIVE(elf->eh.e_phentsize) != sizeof(Elf32_Phdr))
  { trc(TRC_EXEC,"%s: bad ELF file (phentsize isn't sizeof(Elf32_Phdr))\n",elf->path);
    enoexec();
  }
 elf->entry = ELF_ADDR_TO_NATIVE(elf->eh.e_entry);
 if (load_p_s)
  { if (ELF_HALF_TO_NATIVE(elf->eh.e_shentsize) != sizeof(Elf32_Shdr))
     { trc(TRC_EXEC,"%s: bad ELF file (shentsize isn't sizeof(Elf32_Shdr))\n",elf->path);
       enoexec();
     }
    elf->phn = ELF_HALF_TO_NATIVE(elf->eh.e_phnum);
    elf->ph = malloc(elf->phn*sizeof(Elf32_Phdr));
    if (elf->ph == 0)
     { trc(TRC_EXEC,"%s: bad ELF file (can't malloc %d for phdrs)\n",elf->path,(int)(elf->phn*sizeof(Elf32_Phdr)));
       enoexec();
     }
    elf->shn = ELF_HALF_TO_NATIVE(elf->eh.e_shnum);
    elf->sh = malloc(elf->shn*sizeof(Elf32_Shdr));
    if (elf->sh == 0)
     { trc(TRC_EXEC,"%s: bad ELF file (can't malloc %d for shdrs)\n",elf->path,(int)(elf->shn*sizeof(Elf32_Shdr)));
       enoexec();
     }
    if (read_exe(elf->fd,elf->ph,elf->phn*sizeof(Elf32_Phdr),ELF_OFFSET_TO_NATIVE(elf->eh.e_phoff),elf->path,"bad ELF file (can't read phdrs)") < 0) enoexec();
    if (read_exe(elf->fd,elf->sh,elf->shn*sizeof(Elf32_Shdr),ELF_OFFSET_TO_NATIVE(elf->eh.e_shoff),elf->path,"bad ELF file (can't read shdrs)") < 0) enoexec();
  }
 return(0);
}

/*
 * Read the program headers from an ELF file and iterate over them,
 *  calling the appropriate methods from ops for the entries of
 *  interest.  enoexec is called if something is found which should
 *  provoke an ENOEXEC failure from exec().
 */
static void map_psect(ELF_CTX *elf, const PSECT_OPS *ops, void (*enoexec)(void))
{
 int nph;
 Elf32_Phdr *ph;
 int i;

 void noexec(void)
  { free(ph);
    (*enoexec)();
  }

 nph = ELF_HALF_TO_NATIVE(elf->eh.e_phnum);
 ph = malloc(nph*sizeof(Elf32_Phdr));
 if (read_exe(elf->fd,ph,nph*sizeof(Elf32_Phdr),ELF_ADDR_TO_NATIVE(elf->eh.e_phoff),elf->path,"bad ELF file (can't read program headers)") < 0) noexec();
 for (i=nph-1;i>=0;i--)
  { unsigned int t;
    t = ELF_WORD_TO_NATIVE(ph[i].p_type);
    switch (t)
     { case PT_INTERP:
	  (*ops->pt_interp)(elf,&ph[i],&noexec);
	  break;
       case PT_LOAD:
	  (*ops->pt_load)(elf,&ph[i],&noexec);
	  break;
       case PT_PHDR:
	  (*ops->pt_phdr)(elf,&ph[i],&noexec);
	  break;
     }
  }
}

/*
 * The PT_INTERP handler for the main executable: read and record the
 *  "interpreter" (really, dynamic linker) pathname in the ELF_CTX for
 *  later.
 */
static void psect_pt_interp_main(ELF_CTX *elf, Elf32_Phdr *ph, void (*err)(void))
{
 uint32_t fsz;

 fsz = ELF_WORD_TO_NATIVE(ph->p_filesz);
 if (fsz > em_MAXPATHLEN)
  { trc(TRC_EXEC,"%s: bad ELF file (PT_INTERP section length %lu > max %d)\n",elf->path,(ULI)fsz,em_MAXPATHLEN);
    (*err)();
  }
 if (read_exe(elf->fd,&elf->interp[0],fsz,ELF_WORD_TO_NATIVE(ph->p_offset),elf->path,"PT_INTERP") < 0) err();
 elf->interp[fsz] = '\0';
 trc(TRC_EXEC,"%s saved interp = %s\n",__func__,&elf->interp[0]);
}

/*
 * The PT_INTERP handler when reading the "interpreter" specified by
 *  the main executable.  Since we don't handle cascaded
 *  "interpreter"s, this just always errors.
 */
static void psect_pt_interp_interp(ELF_CTX *elf, Elf32_Phdr *ph __attribute__((__unused__)), void (*err)(void))
{
 trc(TRC_EXEC,"%s: bad PT_INTERP file (has its own PT_INTERP)\n",elf->path);
 (*err)();
}

/*
 * The PT_LOAD handler.  This is used for both the main executable and
 *  the "interpreter", since it turns out they both need the same thing
 *  here - hence the _common naming.
 */
static void psect_pt_load_common(ELF_CTX *elf, Elf32_Phdr *ph, void (*err)(void))
{
 uint32_t align;
 uint32_t va;
 uint32_t fa;
 uint32_t diff;
 uint32_t fo;
 uint32_t fsz;
 uint32_t msz;
 uint32_t psz;
 uint32_t flags;
 MEMSEG *ms;
 uint32_t filesz;

 trc(TRC_EXEC,"%s entry, loadbase %08lx\n",__func__,(ULI)elf->loadbase);
 flags = ELF_WORD_TO_NATIVE(ph->p_flags);
 fa = ELF_ADDR_TO_NATIVE(ph->p_vaddr);
 align = ELF_WORD_TO_NATIVE(ph->p_align);
 trc(TRC_EXEC,"flags %08lx fa %08lx align %08lx\n",
	(ULI)flags,
	(ULI)fa,
	(ULI)align);
 if (!align || (align & (align-1)))
  { trc(TRC_EXEC,"%s: p_align (%#lx) isn't a power of two\n",elf->path,(ULI)align);
    (*err)();
  }
 filesz = ELF_WORD_TO_NATIVE(ph->p_filesz);
 if (align > 1) elf->loadbase = ROUND_UP(elf->loadbase,align);
 va = fa;
 if (align > 1) va = ROUND_DOWN(va,align);
 diff = fa - va;
 trc(TRC_EXEC,"fa %08lx va %08lx diff %08lx\n",
	(ULI)fa,
	(ULI)va,
	(ULI)diff);
 fo = ELF_OFFSET_TO_NATIVE(ph->p_offset) - diff;
 fsz = filesz + diff;
 msz = ELF_WORD_TO_NATIVE(ph->p_memsz) + diff;
 psz = ROUND_UP(msz,PAGE_SIZE);
 trc(TRC_EXEC,"fo %08lx fsz %08lx msz %08lx psz %08lx\n",
	(ULI)fo,
	(ULI)fsz,
	(ULI)msz,
	(ULI)psz);
 va += elf->loadbase;
 trc(TRC_EXEC,"calling memseg_mem, va %08lx psz %08lx\n",
	(ULI)va,
	(ULI)psz);
 ms = memseg_mem(va,psz,((flags&PF_R)?P_R:0)|((flags&PF_W)?P_W:0)|((flags&PF_X)?P_X:0));
 if (read_exe(elf->fd,ms->data,fsz,fo,elf->path,"bad ELF file (can't read program segment)") < 0) (*err)();
 if (fsz < psz) bzero(ms->data+fsz,psz-fsz);
 if (va+psz > elf->dend) elf->dend = va + psz;
 if ((elf->entry >= fa) && (elf->entry <= fa+filesz))
  { elf->taddr = fa;
    if (elf->daddr == ~(uint32_t)0) elf->daddr = elf->taddr;
    elf->dli_interp = fa + elf->loadbase;
  }
 else
  { elf->daddr = fa;
  }
}

// Use psect_pt_load_common for PT_LOAD for both _main and _interp.
#define psect_pt_load_main psect_pt_load_common
#define psect_pt_load_interp psect_pt_load_common

/*
 * The PT_PHDR handler for the main executable.  Just record the value
 *  for possible later use.
 */
static void psect_pt_phdr_main(ELF_CTX *elf, Elf32_Phdr *ph, void (*err)(void) __attribute__((__unused__)))
{
 elf->phdr = ELF_WORD_TO_NATIVE(ph->p_vaddr);
 trc(TRC_EXEC,"%s saved phdr = %08lx\n",__func__,(ULI)elf->phdr);
}

/*
 * The PT_PHDR handler for the dynamic linker.  We do nothing here,
 *  since it's only the main executable that PT_PHDR matters for.
 */
static void psect_pt_phdr_interp(ELF_CTX *elf __attribute__((__unused__)), Elf32_Phdr *ph __attribute__((__unused__)), void (*err)(void) __attribute__((__unused__)))
{
}

/*
 * The PSECT_OPS structs for the main executable (main) and the
 *  PT_INTERP "interpreter" (interp).
 */
static const PSECT_OPS psect_ops_main = PSECT_OPS_INIT(main);
static const PSECT_OPS psect_ops_interp = PSECT_OPS_INIT(interp);

/*
 * A print_regs test function for printing all registers.
 */
static int print_regs_all(int rno __attribute__((__unused__)))
{
 return(1);
}

/*
 * Empty a STAB, preparatory to freeing it or relaoding it or the like.
 */
static void stab_empty(STAB *s)
{
 free(s->syms);
 free(s->strs);
 s->syms = 0;
 s->nsyms = 0;
 s->strs = 0;
}

/*
 * Sort a vector of SYMs by value.  On return, v[i].val will be <=
 *  v[i+1].val for any i from 0 through n-2.  l and h are bounds (not
 *  necessarily tight) on the min and max val values in v.
 */
static void sort_syms(SYM *v, int n, uint32_t min, uint32_t max)
{
 int a;
 int b;
 SYM t;
 uint32_t mid;

 while (1)
  { if (n < 2) return;
    if (max-min < 1) return;
    mid = (min + max) / 2;
    a = 0;
    b = n - 1;
    while (1)
     { while ((a < b) && (v[a].val <= mid)) a ++;
       while ((b > a) && (v[b].val > mid)) b --;
       if (a == b) break;
       t = v[a];
       v[a] = v[b];
       v[b] = t;
     }
    if (v[b].val <= mid) b ++;
    if (max-min == 1) return;
    if (n-b < b)
     { sort_syms(v+b,n-b,mid,max);
       n = b;
       max = mid;
     }
    else
     { sort_syms(v,b,min,mid);
       v += b;
       n -= b;
       min = mid;
     }
  }
}

/*
 * Nested function pulled out of elf_reload_symbols().
 */
static void reload_syms_nosyms(void (*)(void), ELF_CTX *, const char *, ...)
	__attribute__((__format__(__printf__,3,4),__noreturn__));
static void reload_syms_nosyms(void (*throw)(void), ELF_CTX *c, const char *fmt, ...)
{
 char *s;
 va_list ap;

 if (trc_if(TRC_EXEC))
  { va_start(ap,fmt);
    asprintf(&s,fmt,ap);
    va_end(ap);
    trc(TRC_EXEC,"%s: no symbols loaded: %s\n",c->path,s);
    free(s);
  }
 (*throw)();
 abort();
}

/*
 * Load the text symbols from an ELF file; record the limits of the
 *  text segment as well, so we can tell when to do symbol lookup.
 */
static void elf_reload_symbols(ELF_CTX *c)
{
 __label__ nosyms_;

 int i;
 int j;
 int shx;
 int link;
 Elf32_Sym sym;
 char *symv;
 unsigned int u;
 unsigned int v;

 void nosyms_throw(void)
  { goto nosyms_;
  }
#define NOSYMS(...) reload_syms_nosyms(&nosyms_throw,c,__VA_ARGS__)

 if (0)
  {
nosyms_:;
    return;
  }
 stab_empty(&elf_stab);
 shx = -1;
 for (i=c->shn-1;i>=0;i--)
  { if (ELF_WORD_TO_NATIVE(c->sh[i].sh_type) == SHT_SYMTAB)
     { if (shx >= 0) NOSYMS("multiple SYMTAB sections");
       shx = i;
     }
  }
 if (shx < 0) NOSYMS("no SYMTAB section found");
 link = ELF_WORD_TO_NATIVE(c->sh[shx].sh_link);
 if (! link) NOSYMS("SYMTAB section has no link");
 if (link >= c->shn) NOSYMS("SYMTAB section link out of range");
 i = ELF_WORD_TO_NATIVE(c->sh[link].sh_size);
 elf_stab.strslen = i;
 elf_stab.strs = malloc(i+1);
 if (! elf_stab.strs) NOSYMS("can't malloc(%d) for symbol strings",i);
 if (read_exe(c->fd,elf_stab.strs,i,ELF_WORD_TO_NATIVE(c->sh[link].sh_offset),c->path,0) < 0) NOSYMS("can't read symbol strings");
 elf_stab.strs[i] = '\0';
 i = ELF_WORD_TO_NATIVE(c->sh[shx].sh_size);
 if (i % sizeof(Elf32_Sym))
  { trc(TRC_EXEC,"%s: warning: symtab size %d isn't a multiple of symbol size %d\n",c->path,i,(int)sizeof(Elf32_Sym));
  }
 i /= sizeof(Elf32_Sym);
 elf_stab.nsyms = i;
 symv = malloc(i*sizeof(Elf32_Sym));
 if (! symv) NOSYMS("can't malloc(%d) for file symbol table",i*(int)sizeof(Elf32_Sym));
 elf_stab.syms = malloc(i*sizeof(SYM));
 if (! elf_stab.syms) NOSYMS("can't malloc(%d) for in-core symbol table",i*(int)sizeof(SYM));
 if (read_exe(c->fd,symv,i*sizeof(Elf32_Sym),ELF_WORD_TO_NATIVE(c->sh[shx].sh_offset),c->path,0) < 0) NOSYMS("can't read symbols");
 elf_stab.textbeg = c->taddr;
 elf_stab.textend = c->daddr;
 j = 0;
 for (i=0;i<elf_stab.nsyms;i++)
  { bcopy(symv+(i*sizeof(Elf32_Sym)),&sym,sizeof(Elf32_Sym));
    u = ELF_WORD_TO_NATIVE(sym.st_name);
    v = ELF_WORD_TO_NATIVE(sym.st_value);
    if ( (ELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
	 (u < elf_stab.strslen) &&
	 (v >= elf_stab.textbeg) &&
	 (v < elf_stab.textend) )
     { elf_stab.syms[j].name = elf_stab.strs + u;
       elf_stab.syms[j].val = v;
       j ++;
     }
  }
 elf_stab.nsyms = j;
 sort_syms(&elf_stab.syms[0],elf_stab.nsyms,elf_stab.textbeg,elf_stab.textend);
 trc(TRC_EXEC,"symbols loaded, count=%d\n",elf_stab.nsyms);
 free(symv);
#undef NOSYMS
}

/*
 * Lookup a function name by address.
 */
static SYM *lookup_fxn(uint32_t addr)
{
 int l;
 int h;
 int m;

 l = -1;
 h = elf_stab.nsyms;
 while (h-l > 1)
  { m = (h + l) / 2;
    if (elf_stab.syms[m].val <= addr) l = m;
    if (elf_stab.syms[m].val >= addr) h = m;
  }
 return((l==h)?&elf_stab.syms[m]:0);
}

/*
 * Failure function for places where we want to free two things.
 */
static void free2(void *vv)
{
 free(((void **)vv)[0]);
 free(((void **)vv)[1]);
}

/*
 * Try to exec xpath as an ELF executable.  argvstrs and envpstrs, with
 *  narg and nenv as their respective counts, are the argument and
 *  environment vectors, already read out of emulated memory.
 *
 * Returns an emulated-system errno on failure, zero on success.
 */
static int try_exec_elf(const char *xpath, const char **argvstrs, int narg, const char **envpstrs, int nenv)
{
 __label__ enoexec_;

 int i;
 int l;
 MEMSEG *ms;
 uint32_t argv;
 uint32_t envp;
 uint32_t *argvv;
 uint32_t *envpv;
 uint32_t ps_strings;
 int stacklen;
 uint32_t sfp;
 ELF_CTX ctx;
 uint32_t ee;
 void *freev[2];

 void enoexec(void)
  { goto enoexec_;
  }

#define CTX_DONE() do { close(ctx.fd); free(ctx.ph); free(ctx.sh); } while (0)

 trc(TRC_EXEC,"%s %s\n",__func__,xpath);
 trc(TRC_EXEC,"narg %d\n",narg);
 for (i=0;i<narg;i++) trc(TRC_EXEC,"argvstrs[%d] = %s\n",i,argvstrs[i]?:"(nil)");
 trc(TRC_EXEC,"nenv %d\n",nenv);
 for (i=0;i<nenv;i++) trc(TRC_EXEC,"envpstrs[%d] = %s\n",i,envpstrs[i]?:"(nil)");
 ctx.path = xpath;
 ctx.ph = 0;
 ctx.sh = 0;
 ee = elf_start_load(&ctx,em_ET_EXEC,"EXEC",1);
 if (ee) return(ee);
 if (0)
  {
enoexec_:;
    CTX_DONE();
    return(em_ENOEXEC);
  }
 s.pc = ctx.entry;
 ctx.interp[0] = 0;
 ctx.taddr = ~(uint32_t)0;
 ctx.daddr = ~(uint32_t)0;
 ctx.dend = 0;
 ctx.loadbase = 0;
 ctx.dli_pha = 0;
 ctx.dli_phes = 0;
 ctx.dli_phn = 0;
 ctx.dli_interp = 0;
 ctx.dli_entry = 0;
 ctx.phdr = 0;
 trc(TRC_EXEC,"%s doing map_psect for %s\n",__func__,ctx.path);
 map_psect(&ctx,&psect_ops_main,&enoexec);
 trc(TRC_EXEC,"%s map_psect done for %s\n",__func__,ctx.path);
 if (ctx.interp[0])
  { ELF_CTX ictx;
    void interp_err(void)
     { close(ictx.fd);
       free(ictx.ph);
       free(ictx.sh);
       enoexec();
     }
    trc(TRC_EXEC,"%s doing elf_start_load for interp %s\n",__func__,&ctx.interp[0]);
    ictx.path = &ctx.interp[0];
    ictx.ph = 0;
    ictx.sh = 0;
    ee = elf_start_load(&ictx,em_ET_DYN,"DYN",0);
    if (ee)
     { trc(TRC_EXEC,"%s: can't load PT_INTERP file %s\n",ctx.path,ictx.path);
       enoexec();
     }
    ictx.taddr = ~(uint32_t)0;
    ictx.daddr = ~(uint32_t)0;
    ictx.loadbase = ROUND_UP(ctx.daddr+MAXDSIZE,PAGE_SIZE);
    trc(TRC_EXEC,"%s doing map_psect for interp %s\n",__func__,ictx.path);
    map_psect(&ictx,&psect_ops_interp,&interp_err);
    trc(TRC_EXEC,"%s map_psect done for %s\n",__func__,ictx.path);
    ctx.dli_pha = ctx.phdr;
    ctx.dli_phes = ELF_HALF_TO_NATIVE(ctx.eh.e_phentsize);
    ctx.dli_phn = ELF_HALF_TO_NATIVE(ctx.eh.e_phnum);
    ctx.dli_entry = ELF_ADDR_TO_NATIVE(ctx.eh.e_entry);
    ctx.dli_interp = ictx.dli_interp;
    if (ictx.taddr == ~(uint32_t)0)
     { trc(TRC_EXEC,"can't identify text segment in interp\n");
       close(ictx.fd);
       enoexec();
     }
    s.pc = ELF_ADDR_TO_NATIVE(ictx.eh.e_entry) + ictx.taddr + ictx.loadbase;
    trc(TRC_EXEC,"exec_elf setting pc to %08lx (interp: e_entry %08lx taddr %08lx loadbase %08lx)\n",
	(ULI)s.pc,(ULI)ELF_ADDR_TO_NATIVE(ictx.eh.e_entry),(ULI)ictx.taddr,(ULI)ictx.loadbase);
    close(ictx.fd);
  }
 /*
  * Now committed to the exec.
  */
 elf_reload_symbols(&ctx);
 CTX_DONE();
 s.npc = s.pc + 4;
 clean_regs();
 ms = memseg_mem(USRSTACK-MAXSSIZE,MAXSSIZE,P_R|P_W|P_X);
 bzero(ms->data,MAXSSIZE);
 /*
  * For dynamically-linked executables, we need Aux32Info structs as
  *  well.  The stack has to be laid out with sp pointing to argc,
  *  followed by argc+1 pointers to arg strings (and a trailing nil),
  *  then envp pointers (terminated by a trailing nil), then Aux32Info
  *  structs (terminated by one with a_type set to AT_NULL).  After
  *  that comes whatever else - which here means the argv and envp
  *  strings, the stack gap, the signal-delivery trampoline, and
  *  ps_strings.
  *
  * For the sake of comparisons, we want our stack layout to exactly
  *  match the kernel's.  In aid of this, we do things in a slightly
  *  strange order, so as to exactly match the kernel's computations.
  */
 vm.dbrk = ctx.dend;
 stacklen = 0;
 for (i=nenv-1;i>=0;i--) stacklen += strlen(envpstrs[i]) + 1;
 for (i=narg-1;i>=0;i--) stacklen += strlen(argvstrs[i]) + 1;
 stacklen = ROUND_UP(stacklen,8);
 stacklen = ((narg+nenv+2)*4) +		// argv/envp pointers
	    (8 * 2 * 4) +		// Aux32Info structs
	    4 +				// not sure
	    stacklen +			// argv/envp strings
	    STACKGAPLEN +		// stack gap
	    SZSIGCODE +			// signal trampoline
	    16;				// ps_strings
 stacklen = ROUND_UP(stacklen,8);
 sfp = USRSTACK - stacklen;
 s.regs[R_SP] = sfp;
 // argc
 mem_set_4(sfp,narg);
 // argv/envp strings
 argvv = malloc((narg+1)*sizeof(uint32_t));
 argvv[narg] = 0;
 envpv = malloc((nenv+1)*sizeof(uint32_t));
 envpv[nenv] = 0;
 freev[0] = argvv;
 freev[1] = envpv;
 sfp = s.regs[R_SP] + (1 + narg + nenv + 2 + 16) * 4;
 for (i=0;i<narg;i++)
  { argvv[i] = sfp;
    l = strlen(argvstrs[i]) + 1;
    copyout(argvstrs[i],sfp,l,"stack setup (argv)",&free2,&freev[0]);
    sfp += l;
  }
 for (i=0;i<nenv;i++)
  { envpv[i] = sfp;
    l = strlen(envpstrs[i]) + 1;
    copyout(envpstrs[i],sfp,l,"stack setup (envp)",&free2,&freev[0]);
    sfp += l;
  }
 // argv
 argv = s.regs[R_SP] + 4;
 for (i=0;i<narg;i++) mem_set_4(argv+(i*4),argvv[i]);
 mem_set_4(argv+(narg*4),0);
 // envp
 envp = argv + ((narg + 1) * 4);
 for (i=0;i<nenv;i++) mem_set_4(envp+(i*4),envpv[i]);
 mem_set_4(envp+(nenv*4),0);
 // Aux32Info structs
 sfp = envp + ((nenv + 1) * 4);
 mem_set_4(sfp,em_AT_PHDR);
 mem_set_4(sfp+4,ctx.dli_pha);
 mem_set_4(sfp+8,em_AT_PHENT);
 mem_set_4(sfp+12,ctx.dli_phes);
 mem_set_4(sfp+16,em_AT_PHNUM);
 mem_set_4(sfp+20,ctx.dli_phn);
 mem_set_4(sfp+24,em_AT_PAGESZ);
 mem_set_4(sfp+28,PAGE_SIZE);
 mem_set_4(sfp+32,em_AT_BASE);
 mem_set_4(sfp+36,ctx.dli_interp);
 mem_set_4(sfp+40,em_AT_FLAGS);
 mem_set_4(sfp+44,0);
 mem_set_4(sfp+48,em_AT_ENTRY);
 mem_set_4(sfp+52,ctx.dli_entry);
 mem_set_4(sfp+56,em_AT_NULL);
 mem_set_4(sfp+60,0);
 // ps_strings
 ps_strings = USRSTACK - 16;
 mem_set_4(ps_strings,argv);
 mem_set_4(ps_strings+4,narg);
 mem_set_4(ps_strings+8,envp);
 mem_set_4(ps_strings+12,nenv);
 // signal-delivery trampoline
 sigtramp = ps_strings - SZSIGCODE;
 l = SZSIGCODE / 4;
 for (i=0;i<l;i++) mem_set_4(sigtramp+(i<<2),sigcode[i]);
 // Stack all set up!
 free(argvv);
 free(envpv);
 s.regs[R_G1] = ps_strings;
 s.regs[R_SP] -= 64; // rwindow space
 if (trc_if(TRC_EXEC))
  { FILE *f;
    f = trc_f(TRC_EXEC);
    fprintf(f,"%s returning successfully\n",__func__);
    print_regs(f,&print_regs_all);
  }
 return(0);
#undef CTX_DONE
}

/*
 * Make a NUL-terminated copy of a string specified by
 *  pointer-and-length.  (The argument string does not have to be
 *  NUL-terminated; the copy always will be.)
 */
static char *ntcopy(const void *data, int len)
{
 char *t;

 t = malloc(len+1);
 bcopy(data,t,len);
 t[len] = '\0';
 return(t);
}

/*
 * Try to exec path as a #! script executable.  argvstrs and envpstrs,
 *  with narg and nenv as their respective counts, are the argument and
 *  environment vectors, already read out of emulated memory.
 *
 * Returns an emulated-system errno on failure, zero on success.
 */
static int try_exec_script(const char *path, const char **argvstrs, int narg, const char **envpstrs, int nenv)
{
 __label__ enoexec_;

 int efd;
 char interp[em_MAXINTERP];
 void *nlv;
 int nl;
 int sh0;
 int sh1;
 int arg0;
 char **newargv;
 int nnewarg;
 int ax;
 int i;
 int e;

 void enoexec(void)
  { goto enoexec_;
  }

 if (narg < 1)
  { trc(TRC_EXEC,"%s: script exec with no argv[0]\n",path);
    return(em_ENOEXEC);
  }
 efd = open(path,O_RDONLY,0);
 if (efd < 0) return(os2em_errno(errno));
 if (0)
  {
enoexec_:;
    close(efd);
    return(em_ENOEXEC);
  }
 i = pread(efd,&interp[0],em_MAXINTERP,0);
 if ((i < 2) || (interp[0] != '#') || (interp[1] != '!'))
  { trc(TRC_EXEC,"%s: bad script (#! isn't)\n",path);
    enoexec();
  }
 nlv = memchr(&interp[0],'\n',i);
 if (! nlv)
  { trc(TRC_EXEC,"%s: no terminating newline within %d bytes on #! line\n",path,em_MAXINTERP);
    enoexec();
  }
 nl = ((char *)nlv) - &interp[0];
 for (sh0=2;(sh0<nl)&&((interp[sh0]==' ')||(interp[sh0]=='\t'));sh0++) ;
 if (sh0 >= nl)
  { trc(TRC_EXEC,"%s: no shell name present on #! line\n",path);
    enoexec();
  }
 for (sh1=sh0;(sh1<nl)&&(interp[sh1]!=' ')&&(interp[sh1]!='\t');sh1++) ;
 for (arg0=sh1;(arg0<nl)&&((interp[arg0]==' ')||(interp[arg0]=='\t'));arg0++) ;
 nnewarg = narg + ((arg0 < nl) ? 2 : 1);
 newargv = malloc((nnewarg+1)*sizeof(char *));
 newargv[0] = ntcopy(&interp[sh0],sh1-sh0);
 ax = 1;
 if (arg0 < nl) newargv[ax++] = ntcopy(&interp[arg0],nl-arg0);
 newargv[ax++] = strdup(path);
 for (i=1;i<narg;i++) newargv[ax++] = strdup(argvstrs[i]);
 if (ax != nnewarg) panic("script exec: argcount wrong (nnewarg %d, ax %d)",nnewarg,ax);
 e = try_exec_elf(newargv[0],(void *)newargv,nnewarg,envpstrs,nenv);
 for (i=nnewarg-1;i>=0;i--) free(newargv[i]);
 free(newargv);
 return(e);
}

/*
 * Try to exec a program.  We first try to handle it as an ELF
 *  executable; if that fails, we then try it as a #! script.  If that
 *  fails too, we return failure.
 *
 * Returns 0 on success or an (emulated-OS) errno on failure.
 */
static int do_execve(const char *path, const char **argvstrs, const char **envpstrs)
{
 int e;
 int narg;
 int nenv;
 struct stat stb;

 for (narg=0;argvstrs[narg];narg++) ;
 for (nenv=0;envpstrs[nenv];nenv++) ;
 if (stat(path,&stb) < 0) return(os2em_errno(errno));
 // To fully support running as non-root this needs to change.
 if (! (stb.st_mode & 0111)) return(em_EACCES);
 if ((stb.st_mode & S_IFMT) != S_IFREG) return(em_EACCES);
 e = try_exec_elf(path,argvstrs,narg,envpstrs,nenv);
 if (e) e = try_exec_script(path,argvstrs,narg,envpstrs,nenv);
 return(e);
}

/*
 * Do the initial exec in accordance with the command-line args.
 */
static void initial_exec(void)
{
 const char **argv;
 int i;
 const char **envp;
 int e;

 argv = malloc((cl_nargs+1)*sizeof(const char *));
 argv[cl_nargs] = 0;
 for (i=cl_nargs-1;i>=0;i--) argv[i] = cl_args[i];
 envp = malloc((cl_nenvp+1)*sizeof(const char *));
 envp[cl_nenvp] = 0;
 for (i=cl_nenvp-1;i>=0;i--) envp[i] = cl_envp[i];
 e = do_execve(exe,argv,envp);
 postexec = 1;
 if (e)
  { printf("Initial exec failed %d (%s)\n",e,em_strerror(e));
    free(argv);
    initial_exec_state = IES_FAILED;
    top();
  }
 free(argv);
 initial_exec_state = IES_WORKED;
}

/*
 * This is called when an operation that potentially needs vfork fixup
 *  is done from a vforked child process.  This adds a VFORKBACKOUT
 *  record, so the parent can do the fixup.
 */
static void add_vfork_backout(VFBKIND k, uint32_t emfd, FD fd)
{
 VFORKBACKOUT *b;

 if (during_vfork < 1) return;
 trc(TRC_VFORK,"adding vfork backout: kind=%d(%s) level=%d emfd=%lu fd fd=%d prot=%u flags=%u\n",(int)k,vfb_kind_str(k),during_vfork,(ULI)emfd,fd.fd,fd.prot,fd.flags);
 b = malloc(sizeof(VFORKBACKOUT));
 b->kind = k;
 b->emfd = emfd;
 b->fd = fd;
 b->level = during_vfork;
 if (k == VFB_TRCMGR)
  { b->link = vfbtm;
    vfbtm = b;
  }
 else
  { b->link = vfb;
    vfb = b;
  }
}

/*
 * Used in a vfork()ed child after exec, to clean up backout records
 *  made between vforking and execing.  during_vfork is cleared
 *  elsewhere.
 */
static void flush_vfork_backout(void)
{
 VFORKBACKOUT *b;

 while ((b = vfb))
  { vfb = b->link;
    free(b);
  }
 while ((b = vfbtm))
  { vfbtm = b->link;
    free(b);
  }
}

/*
 * Do post-vfork cleanup in the parent process.  Because
 *  add_vfork_backout pushes things onto vfb, doing things this way
 *  does them last-done first-restored, which is exactly what we want.
 *
 * This does just one list; see vfork_cleanup(), below.
 */
static void vfork_cleanup_list(VFORKBACKOUT **rootp)
{
 VFORKBACKOUT *b;
 FD *fd;
 VFORKBACKOUT *root;

 root = *rootp;
 while (root && (root->level > during_vfork))
  { b = root;
    root = b->link;
    switch (b->kind)
     { default:
	  panic("impossible kind %d in %s",(int)b->kind,__func__);
	  break;
       case VFB_OPEN:
	  /*
	   * The file was opened during the vfork.  The OS's file
	   *  descriptor went away with the switch back to the parent,
	   *  but the FD is still around.  Fix that.
	   */
	  trc(TRC_VFORK,"backing out OPEN: emfd=%lu\n",(ULI)b->emfd);
	  if (b->emfd >= nfds) panic("impossible VFB_OPEN backout 1");
	  fd = fds[b->emfd];
	  if (! fd) panic("impossible VFB_OPEN backout 2");
	  fds[b->emfd] = 0;
	  free(fd);
	  break;
       case VFB_CLOSE:
	  /*
	   * The file was closed during the vfork.  The switch back to
	   *  the parent has resurrected the file descriptor, but the
	   *  FD is still nonexistent.  Fix that.
	   */
	  trc(TRC_VFORK,"backing out CLOSE: emfd=%lu\n",(ULI)b->emfd);
	  if (b->emfd >= nfds) panic("impossible VFB_CLOSE backout 1");
	  if (fds[b->emfd]) panic("impossible VFB_CLOSE backout 2");
	  fd = malloc(sizeof(FD));
	  *fd = b->fd;
	  fds[b->emfd] = fd;
	  break;
       case VFB_DUP2:
	  /*
	   * A dup2() was done during the vfork.  The file descriptors
	   *  have been restored, but the FD needs updating - its fd is
	   *  correct, but its prot needs restoring.
	   */
	  trc(TRC_VFORK,"backing out DUP2: emfd=%lu\n",(ULI)b->emfd);
	  if (b->emfd >= nfds) panic("impossible VFB_DUP2 backout 1");
	  fd = fds[b->emfd];
	  if (! fd) panic("impossible VFB_DUP2 backout 2");
	  fd->prot = b->fd.prot;
	  break;
       case VFB_TRCMGR:
	  /*
	   * Clean up the tracing manager connection fd.  Unusually, we
	   *  have to log _after_ doing the work, because, until this
	   *  trcmgr_set_fd happens, logging is broken.
	   */
	  trcmgr_set_fd(b->fd.fd);
	  trc(TRC_VFORK,"cleaning up tracing: fd=%d\n",b->fd.fd);
	  break;
     }
    free(b);
  }
 *rootp = root;
}

/*
 * Do post-vfork cleanup in the parent process.  This calls
 *  vfork_cleanup_list to clean up TRCMGR entries first, then for all
 *  other entries.
 */
static void vfork_cleanup(void)
{
 vfork_cleanup_list(&vfbtm);
 vfork_cleanup_list(&vfb);
}

/*
 * Open a new (emulated) file descriptor.  osfd is the underlying OS
 *  file desciprtor.  minfd is the point in the open file table that
 *  the search for an available fd should start.  rw is P_R and/or P_W,
 *  indicating how the descriptor should be opened.
 *
 * This makes the add_vfork_backout() call for the new fd.
 */
static int new_fd(int osfd, int minfd, unsigned int rw)
{
 int d;
 FD *fd;
 int i;

 for (d=minfd;(d<nfds)&&fds[d];d++) ;
 if (d > MAXFDS)
  { printf("Out of fds\n");
    top();
  }
 if (d >= nfds)
  { i = nfds;
    nfds = d + 8;
    fds = realloc(fds,nfds*sizeof(*fds));
    for (;i<nfds;i++) fds[i] = 0;
  }
 if (fds[d]) panic("impossible new_fd");
 fd = malloc(sizeof(FD));
 fds[d] = fd;
 fd->fd = osfd;
 fd->prot = rw & (P_R | P_W);
 fd->flags = 0;
 add_vfork_backout(VFB_OPEN,d,*fd);
 return(d);
}

/*
 * Set up the initial file descriptors during startup.
 */
static void init_fds(void)
{
 int i;
 int d;
 struct stat stb;

 nfds = 0;
 fds = 0;
 during_vfork = 0;
 vfb = 0;
 vfbtm = 0;
 for (i=2;i>=0;i--)
  { if (fstat(i,&stb) >= 0)
     { d = dup(i);
       new_fd(d,i,(i==0)?P_R:P_W);
     }
  }
}

static int siglogfd;

// This needs to be reentrant and signal-safe....
static void sig_log(const char *fmt, ...)
{
 va_list ap;
 const char *fp;
 char obuf[256];
 int ox;
 int v_int;
 uint32_t v_u32;

 void gen(char c)
  { if (ox < 255) obuf[ox++] = c;
  }

 void gen_u_dec(unsigned long long int v)
  { if (v >= 10) gen_u_dec(v/10);
    gen("0123456789"[v%10]);
  }

 if (siglogfd < 0) return;
 ox = 0;
 va_start(ap,fmt);
 for (fp=fmt;*fp;fp++)
  { if (*fp == '~')
     { switch (*++fp)
	{ case '\0':
	     panic("sig_log: format ends with ~");
	     break;
	  default:
	     panic("sig_log: unrecognized format ~%c",*fp);
	     break;
	  case 'd':
	     v_int = va_arg(ap,int);
	     if (v_int < 0)
	      { gen('-');
		gen_u_dec((unsigned int)-v_int);
	      }
	     else
	      { gen_u_dec((unsigned int)v_int);
	      }
	     break;
	  case '3':
	     v_u32 = va_arg(ap,uint32_t);
	     gen_u_dec(v_u32);
	     break;
	}
     }
    else
     { gen(*fp);
     }
  }
 va_end(ap);
 write(siglogfd,&obuf[0],ox);
}

/*
 * This is the signal handler routine for all signals we catch.  We
 *  install this handler for all signals we can; it just records them
 *  in s.sigpend[] and then sets anysigpend and alert_run so they'll be
 *  noticed at the next emulated instruction boundary.
 */
static void catch_signal(int sig)
{
 uint32_t emsig;

 sig_log("catch_signal: sig = ~d\n",sig);
 emsig = os2em_signal(sig);
 if (emsig == 0)
  { sig_log("catch_signal: no em sig\n");
    return;
  }
 if (emsig >= em__NSIG) panic("handling impossible emsig %lu",(ULI)emsig);
 sig_log("setting sigpend[~3], anysigpend, and alert_run\n",emsig);
 s.sigpend[emsig] = 1;
 anysigpend = 1;
 alert_run = 1;
}

/*
 * Set our catcher for a given signal.  This is used both during
 *  initial signal setup and when (un)ignoring when the emulated
 *  program changes their settings.
 */
static void set_our_catcher(int ossig, void (*catcher)(int))
{
 struct sigaction sa;

 sa.sa_handler = catcher;
 sigemptyset(&sa.sa_mask);
 sa.sa_flags = 0;
 sigaction(ossig,&sa,0);
}

/*
 * Set up signals on initial startup.  The only thing of note here is
 *  that we skip trying to install handlers for not just SIGKILL and
 *  SIGSTOP (which we're not allowed to do anything with) but also
 *  SIGSEGV.  This is because the sort of memory fault that leads to
 *  SIGSEGV on real hardware is, here, instead noticed by the memory
 *  access code.
 */
static void init_signals(void)
{
 int i;
 int ossig;

 s.sigmask = 0;
 anysigpend = 0;
 s.ignsigs = 0;
 for (i=em__NSIG-1;i>=1;i--)
  { s.sigh[i].handler = em_SIG_DFL;
    s.sigh[i].flags = 0;
    s.sigpend[i] = 0;
    switch (i)
     { case em_SIGKILL:
       case em_SIGSTOP:
       case em_SIGSEGV:
	  continue;
	  break;
     }
    ossig = em2os_signal(i);
    if (sigdef[i] == SIGDEF_IGNORE)
     { s.ignsigs |= 1ULL << i;
       set_our_catcher(ossig,SIG_DFL);
     }
    else
     { set_our_catcher(ossig,&catch_signal);
     }
  }
 s.onsigstack = 0;
 s.sigstack_enabled = 0;
 s.sigstack_base = 0;
 s.sigstack_size = 0;
 siglogfd = open("/sparc.sig.log",O_WRONLY|O_APPEND,0);
}

/*
 * Sign-extend a value.  v is the value and bits is the number of
 *  significant bits in it, including the sign bit.  For example,
 *  signextend(9,4) will return 0xfffffff9, while signextend(9,5) will
 *  return 0x00000009.
 *
 * This assumes two's-copmplement representation; this is correct for
 *  the emulated machine, and this is for emulator use.
 */
static uint32_t signextend(uint32_t v, int bits)
{
 if ((v >> (bits-1)) & 1)
  { v |= (~(uint32_t)0) << bits;
  }
 else
  { v &= ~((~(uint32_t)0) << bits);
  }
 return(v);
}

/*
 * Report an unimplemented opcode.  Just print the details and throw
 *  out.
 */
static void unimp(uint32_t xa, uint32_t inst)
{
 FILE *f;

 if (trc_if(TRC_INSTR))
  { f = fwrap_tee(stdout,trc_f(TRC_INSTR),(FILE *)0);
  }
 else
  { f = fwrap_tee(stdout,(FILE *)0);
  }
 fprintf(f,"Unimplemented: at %08lx inst=%08lx\n",(ULI)xa,(ULI)inst);
 fprintf(f,"  OPC=%d OP2=%d DREG=%d A=%d COND=%d IMM22=%d, DISP22=%d\n",
	(int)OPC(inst), (int)OP2(inst), (int)DREG(inst),
	(int)A(inst), (int)COND(inst), (int)IMM22(inst), (int)DISP22(inst));
 fprintf(f,"  OP3=%d SREG1=%d SREG2=%d I=%d ASI=%d SIMM13=%d OPF=%d\n",
	(int)OP3(inst), (int)SREG1(inst), (int)SREG2(inst),
	(int)I(inst), (int)ASI(inst), (int)SIMM13(inst), OPF(inst));
 fclose(f);
 top();
}

/*
 * Return the next window number after v in the `save' direction.  That
 *  is, if CWP were v, after a save it would be cwp_s(v).
 */
static unsigned int cwp_s(unsigned int v)
{
 return((v?:NWINDOWS)-1);
}

/*
 * Return the next window number after v in the `restore' direction.
 *  That is, if CWP were v, after a restore it would be cwp_r(v).
 */
static unsigned int cwp_r(unsigned int v)
{
 return((v==NWINDOWS-1)?0:(v+1));
}

/*
 * Save the current window's registers into s.rw[].
 */
static void save_cwindow(void)
{
 bcopy(&s.regs[R_L0],&s.rw[s.cwp].l[0],8*sizeof(uint32_t));
 bcopy(&s.regs[R_I0],&s.rw[s.cwp].i[0],8*sizeof(uint32_t));
 bcopy(&s.regs[R_O0],&s.rw[cwp_s(s.cwp)].i[0],8*sizeof(uint32_t));
}

/*
 * Load the current window's registers from s.rw[].
 */
static void load_cwindow(void)
{
 bcopy(&s.rw[s.cwp].l[0],&s.regs[R_L0],8*sizeof(uint32_t));
 bcopy(&s.rw[s.cwp].i[0],&s.regs[R_I0],8*sizeof(uint32_t));
 bcopy(&s.rw[cwp_s(s.cwp)].i[0],&s.regs[R_O0],8*sizeof(uint32_t));
}

/*
 * Spill a window's worth of registers to the stack.
 *
 * This code doesn't need to test whether any of the registers affected
 *  are in s.regs[] instead of s.rw[].  At first sight it appears to,
 *  but it's obviously unnecessary when you note that it's never called
 *  except after a save_cwindow() and before load_cwindow() (or
 *  equivalent) - so that all register values are in s.rw[].  (Some of
 *  them are also in s.regs[], but that doesn't matter; the ones in
 *  s.regs[] will be replaced before we use them for anything.)
 *
 * Conceptually, we have
 *
 *				|         |
 *				+-  ...  -+
 *				|         |
 *				+---------+
 *				|         | %i0-%i7  \
 *				+-  CWP  -+           \
 *				|         | %l0-%l7    > current window
 *				+---------+           /
 *		      / %i0-%i7 |         | %o0-%o7  /
 *		     /  	+- CWP-1 -+
 *   invalid window <   %l0-%l7 |         |
 *		     \  	+---------+
 *		      \ %o0-%o7 |         | %i0-%i7  \
 *				+- CWP-2 -+           \
 *				|         | %l0-%l7    > being spilled
 *				+---------+           /
 *		      / %i0-%i7 |         | %o0-%o7  /
 *		     /  	+- CWP-3 -+
 *     has %i6 used <   %l0-%l7 |         |
 *		     \  	+---------+
 *		      \ %o0-%o7 |         |
 *				+-  ...  -+
 *				|         |
 *
 * If NWINDOWS is at least 4, none of these overlap.  If NWINDOWS is 3,
 *  the "has %i6 used" window is the current window, but that doesn't
 *  hurt anything.  If NWINDOWS is only 2, the being-spilled window is
 *  the current window and the has-%i6-used window is the invalid
 *  window, but everything still has the correct values in s.rw[].
 *
 * NWINDOWS does need to be at least 2, though.  (It has to be anyway
 *  in order that the current window and the invalid window be
 *  distinct.)
 */
#if NWINDOWS < 2
#error "Must have at least two register windows!"
#endif
static void spill_window(int w)
{
 uint32_t sp;
 int i;

 sp = s.rw[cwp_s(w)].i[6]; // %o6 of window w
 if (sp & 3)
  { fprintf(stderr,"window spill: %%sp low two bits are %d%d\n",(int)((sp>>1)&1),(int)(sp&1));
    top();
  }
 trc(TRC_WINDOW,"saving window %d to %08lx\n",w,(ULI)sp);
 for (i=0;i<8;i++) mem_set_4(sp+(i*4),s.rw[w].l[i]);
 for (i=0;i<8;i++) mem_set_4(sp+32+(i*4),s.rw[w].i[i]);
}

/*
 * Fill a window's worth of registers from the stack.
 */
static void fill_window(int w)
{
 uint32_t sp;
 int i;

 sp = s.rw[cwp_s(w)].i[6]; // %o6 of window w
 if (sp & 3)
  { fprintf(stderr,"window fill: %%sp low two bits are %d%d\n",(int)((sp>>1)&1),(int)(sp&1));
    top();
  }
 trc(TRC_WINDOW,"restoring window %d from %08lx\n",w,(ULI)sp);
 for (i=0;i<8;i++) s.rw[w].l[i] = mem_get_4(sp+(i*4));
 for (i=0;i<8;i++) s.rw[w].i[i] = mem_get_4(sp+32+(i*4));
}

/*
 * Do a window save.  On real hardware, the hardware just traps to the
 *  kernel upon attempting to save (or restore) into an invalid window.
 *  We are a userland-only emulator, so we do the stuff the kernel
 *  does: we implement window spills on save and fills on restore when
 *  attempting to shift into the invalid window.  ("The" invalid
 *  window, not "an" invalid window - unlike the hardware, which has a
 *  bitmask, we have exactly one invalid window at any given time.)
 */
static void window_save(void)
{
 int i;
 int j;

 save_cwindow();
 i = cwp_s(s.cwp);
 if (i == s.iwp)
  { j = cwp_s(i);
    spill_window(j);
    s.iwp = j;
    s.cwp = i;
    bcopy(&s.regs[R_O0],&s.regs[R_I0],8*sizeof(uint32_t));
    bzero(&s.regs[R_L0],8*sizeof(uint32_t));
    bzero(&s.regs[R_O0],8*sizeof(uint32_t));
  }
 else
  { s.cwp = i;
    load_cwindow();
  }
}

/*
 * Flush all valid windows, except the current one, to memory.
 *
 * The kernel implements this by doing NWINDOWS-1 saves, then
 *  NWINDOWS-1 restores.  We just spill_window() each window between
 *  cwp and iwp - in the restore direction; the ones between cwp and
 *  iwp in the save direction aren't valid.  We don't write cwp; we
 *  reset iwp to cwp_r(cwp) to match the state the kernel leaves things
 *  in.
 *
 * We could do what the kernel does and do multiple window_save()s and
 *  window_restore()s.  But we'd have to keep copying between %sp and
 *  %fp, and that also pays the price of save_cwindow/load_cwindow
 *  repeatedly.
 *
 * sc___sigreturn14 assumes this does a save_cwindow().
 */
static void window_flush(void)
{
 int i;

 save_cwindow();
 for (i=cwp_s(s.iwp);i!=s.cwp;i=cwp_s(i)) spill_window(i);
 s.iwp = cwp_r(s.cwp);
 // Don't need to load_cwindow(); s.rw[] haven't changed.
}

/*
 * Do a window restore.  See window_save for further comments.
 */
static void window_restore(void)
{
 int i;

 save_cwindow();
 i = cwp_r(s.cwp);
 if (i == s.iwp)
  { s.iwp = cwp_r(i);
    fill_window(i);
  }
 s.cwp = i;
 load_cwindow();
}

/*
 * Implement addcc: compute a+b and return the result, affecting the
 *  condition codes correspondingly.
 */
static uint32_t addcc(uint32_t a, uint32_t b)
{
 uint64_t v;

 v = (uint64_t)a + (uint64_t)b;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	(((uint32_t)v == 0) ? CC_Z : 0) |
	((0x80000000&(a^v)&~(a^b)) ? CC_V : 0) |
	((v & 0x100000000ULL) ? CC_C : 0);
 return(v);
}

/*
 * Implement subcc: compute a-b and return the result, affecting the
 *  condition codes correspondingly.
 */
static uint32_t subcc(uint32_t a, uint32_t b)
{
 uint64_t v;

 v = (uint64_t)a - (uint64_t)b;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	(((uint32_t)v == 0) ? CC_Z : 0) |
	(((a^b)&(v^a)&0x80000000) ? CC_V : 0) |
	((v & 0x100000000ULL) ? CC_C : 0);
 return(v);
}

/*
 * Implement addxcc: compute a+b+c and return the result, affecting the
 *  condition codes correspondingly.  c must be 0 or 1.
 */
static uint32_t addxcc(uint32_t a, uint32_t b, uint32_t c)
{
 uint64_t v;

 v = (uint64_t)a + (uint64_t)b + (uint64_t)c;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	(((uint32_t)v == 0) ? CC_Z : 0) |
	((0x80000000&(a^v)&~(a^b)) ? CC_V : 0) |
	((v & 0x100000000ULL) ? CC_C : 0);
 return(v);
}

/*
 * Implement subxcc: compute a-b-c and return the result, affecting the
 *  condition codes correspondingly.  c must be 0 or 1.
 */
static uint32_t subxcc(uint32_t a, uint32_t b, uint32_t c)
{
 uint64_t v;

 v = (uint64_t)a - (uint64_t)b - (uint64_t)c;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	(((uint32_t)v == 0) ? CC_Z : 0) |
	(((a^b)&(v^a)&0x80000000) ? CC_V : 0) |
	((v & 0x100000000ULL) ? CC_C : 0);
 return(v);
}

/*
 * Implement sra: compute v>>n, where the shift is arithmetic, and
 *  return the result.
 */
static uint32_t sra(uint32_t v, uint32_t n)
{
 n &= 31;
 if (v & 0x80000000)
  { v = ~((~v) >> n);
  }
 else
  { v >>= n;
  }
 return(v);
}

/*
 * Implement andcc: compute a&b and return the result, affecting the
 *  condition codes correspondingly.
 */
static uint32_t andcc(uint32_t a, uint32_t b)
{
 uint32_t v;

 v = a & b;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	((v == 0) ? CC_Z : 0);
 return(v);
}

/*
 * Implement orcc: compute a|b and return the result, affecting the
 *  condition codes correspondingly.
 */
static uint32_t orcc(uint32_t a, uint32_t b)
{
 uint32_t v;

 v = a | b;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	((v == 0) ? CC_Z : 0);
 return(v);
}

/*
 * Implement xorcc: compute a^b and return the result, affecting the
 *  condition codes correspondingly.
 */
static uint32_t xorcc(uint32_t a, uint32_t b)
{
 uint32_t v;

 v = a ^ b;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	((v == 0) ? CC_Z : 0);
 return(v);
}

/*
 * Implement andncc: compute a&~b and return the result, affecting the
 *  condition codes correspondingly.
 */
static uint32_t andncc(uint32_t a, uint32_t b)
{
 uint32_t v;

 v = a & ~b;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	((v == 0) ? CC_Z : 0);
 return(v);
}

/*
 * Implement xnorcc: compute a^~b and return the result, affecting the
 *  condition codes correspondingly.
 */
static uint32_t xnorcc(uint32_t a, uint32_t b)
{
 uint32_t v;

 v = a ^ ~b;
 s.cc = ((v & 0x80000000) ? CC_N : 0) |
	((v == 0) ? CC_Z : 0);
 return(v);
}

/*
 * Implement (integer) conditional branch instructions.  Except for
 *  branch-always (8) and branch-never (0), which are special-cased,
 *  just look up the condition in conds[] and pick out the bit
 *  corresponding to the current state of the condition codes, using
 *  that to either branch or not.  We special-case 0 and 8 because the
 *  handling of the annul bit is backwards for those two.
 */
// The ">> (s.cc & 15)" part below depends on this....
#if (CC_N | CC_Z | CC_V | CC_C) != 15
#error "cbranch assumptions wrong"
#endif
static void cbranch(int cond, int annul, uint32_t to)
{
 switch (cond)
  { case 8:
       s.npc = to;
       /* fall through */
    case 0:
       if (annul) s.flags |= SF_ANNUL;
       break;
    default:
       if ((conds[cond&15] >> (s.cc & 15)) & 1)
	{ s.npc = to;
	}
       else
	{ if (annul) s.flags |= SF_ANNUL;
	}
       break;
  }
}

/*
 * Implement floating-point conditional branch instructions.  Except
 *  for branch-always (8) and branch-never (0), which are
 *  special-cased, just look up the condition in fconds[] and pick out
 *  the bit corresponding to the current state of the FPU condition
 *  codes, using that to either branch or not.  We special-case 0 and 8
 *  because the handling of the annul bit is backwards for those two.
 */
// The ">> (s.fcc & 3)" below depends on this....
#if (FCC_EQ < 0) || (FCC_EQ > 3) ||\
    (FCC_LT < 0) || (FCC_LT > 3) ||\
    (FCC_GT < 0) || (FCC_GT > 3) ||\
    (FCC_UN < 0) || (FCC_UN > 3)
#error "fcbranch assumptions invalid"
#endif
static void fcbranch(int cond, int annul, uint32_t to)
{
 switch (cond)
  { case 8:
       s.npc = to;
       /* fall through */
    case 0:
       if (annul) s.flags |= SF_ANNUL;
       break;
    default:
       if ((fconds[cond&15] >> (s.fcc & 3)) & 1)
	{ s.npc = to;
	}
       else
	{ if (annul) s.flags |= SF_ANNUL;
	}
       break;
  }
}

/*
 * Load a NUL-terminated string out of emulated memory (at ptr) and
 *  return a malloc()ed copy of it in emulator memory.  This also sets
 *  up the NULTERM_STATUS for later cleanup when the string is no
 *  longer needed.
 *
 * We frob nomemacc to ensure we read the memory exactly once as far as
 *  TRC_MEM is concerned.
 *
 * XXX Possible bug lurking: what if mem_get_1 errors and throws out?
 *  If nothing else, we'll leave nomemacc incremented.
 */
static const char *nulterm_scarg(uint32_t ptr, NULTERM_STATUS *nts)
{
 int l;
 unsigned char *s;
 int i;

 l = 0;
 while (mem_get_1(ptr+l)) l ++;
 s = malloc(l+1);
 nomemacc ++;
 for (i=0;i<=l;i++) s[i] = mem_get_1(ptr+i);
 nomemacc --;
 nts->tofree = s;
 return(s);
}

/*
 * Do any appropriate cleanup for a NUL-terminated string copied out of
 *  emulator memory once it's no longer needed.
 */
static void nulterm_done(NULTERM_STATUS *nts)
{
 free(nts->tofree);
}

/*
 *  Fetch a 32-bit syscall argument.  Narrower arguments are padded;
 *  wider arguments are treated as multiple 32-bit arguments.  This
 *  fetches out of register shadows or memory depending on the argument
 *  number and how many arguments are in registers, as described by the
 *  SCARGS.
 */
uint32_t scarg(SCARGS *args, int n)
{
 if (n < 0) panic("impossible scarg");
 if (n < args->nreg) return(args->regs[n]);
 if (! args->sp) panic("scarg overrun");
 return(mem_get_4(args->sp+((23+n-args->nreg)*4)));
}

/*
 * Copy from/fromlen, in emulator memory, to to/tolen, in emulated
 *  memory.  If fromlen is longer, this truncates; if tolen is longer,
 *  this NUL-pads.  what is a text description of what's being copied,
 *  in case an error message is generated.  prefail is called on any
 *  failure, before throwing out, as described in the comment on
 *  copyout().
 */
static void copy_or_nulpad(const void *from, int fromlen, uint32_t to, int tolen, const char *what, void (*prefail)(void *), void *pfarg)
{
 int o;
 int n;

 if (fromlen < tolen)
  { copyout(from,to,fromlen,what,prefail,pfarg);
    o = fromlen;
    while (o < tolen)
     { n = sizeof(nulbuf);
       if (n > tolen-o) n = tolen - o;
       copyout(&nulbuf[0],to+o,n,what,prefail,pfarg);
       o += n;
     }
  }
 else
  { copyout(from,to,tolen,what,prefail,pfarg);
  }
}

/*
 * Build a SPARC PSR value from our state.  Only some bits are
 *  emulated: the condition codes and the "FPU used" bit.  The other
 *  bits of the PSR are not emulated.  This is used, eg, when building
 *  a sigcontext structure for signal delivery.
 */
static uint32_t build_psr(void)
{
 uint32_t v;

#if (CC_N == em_PSR_CC_N) &&\
    (CC_Z == em_PSR_CC_Z) &&\
    (CC_V == em_PSR_CC_V) &&\
    (CC_C == em_PSR_CC_C)
 v = s.cc;
#else
 v = ((s.cc & CC_N) ? em_PSR_CC_N : 0) |
     ((s.cc & CC_Z) ? em_PSR_CC_Z : 0) |
     ((s.cc & CC_V) ? em_PSR_CC_V : 0) |
     ((s.cc & CC_C) ? em_PSR_CC_C : 0);
#endif
 v <<= em_PSR_CC_S;
 if (s.flags & SF_FPU) v |= em_PSR_EF;
 return(v);
}

/*
 * Build a SPARC FSR value from our state.  Only some bits are
 *  emulated, notably the condition codes.
 */
static uint32_t build_fsr(void)
{
 return( (em_FSR_RD_NEAREST << em_FSR_RD_S) |
	 (0 << em_FSR_TEM_S) |
	 // no em_FSR_NS
	 (0 << em_FSR_VER_S) |
	 (0 << em_FSR_FTT_S) |
	 // no em_FSR_QNE
#if (em_FSR_FCC_EQ == FCC_EQ) &&\
    (em_FSR_FCC_LT == FCC_LT) &&\
    (em_FSR_FCC_GT == FCC_GT) &&\
    (em_FSR_FCC_UN == FCC_UN)
	 (s.fcc << em_FSR_FCC_S) |
#else
	 ( (s.fcc == FCC_EQ) ? (em_FSR_FCC_EQ << em_FSR_FCC_S) :
	   (s.fcc == FCC_LT) ? (em_FSR_FCC_LT << em_FSR_FCC_S) :
	   (s.fcc == FCC_GT) ? (em_FSR_FCC_GT << em_FSR_FCC_S) :
	   (s.fcc == FCC_UN) ? (em_FSR_FCC_UN << em_FSR_FCC_S) ) |
#endif
	 (0 << em_FSR_AEXC_S) |
	 (0 << em_FSR_CEXC_S) );
}

/*
 * Given a SPARC FSR value, install it in the emulator state.  Attempts
 *  to set anything we don't emulate throw out.
 */
static void set_fsr(uint32_t v)
{
 int throw;

 throw = 0;
 if (v & em_FSR_MBZ)
  { printf("MBZ bits aren't all zero\n");
    throw = 1;
  }
 if (((v >> em_FSR_RD_S) & em_FSR_RD_M) != em_FSR_RD_NEAREST)
  { printf("rounding direction other than nearest not supported\n");
    throw = 1;
  }
 if (((v >> em_FSR_TEM_S) & em_FSR_TEM_M) != 0)
  { printf("floating trap enables not supported\n");
    throw = 1;
  }
 if (throw)
  { printf("can't set %%fsr\n");
    top();
  }
#if (em_FSR_FCC_EQ == FCC_EQ) &&\
    (em_FSR_FCC_LT == FCC_LT) &&\
    (em_FSR_FCC_GT == FCC_GT) &&\
    (em_FSR_FCC_UN == FCC_UN)
 s.fcc = (v >> em_FSR_FCC_S) & em_FSR_FCC_M;
#else
 switch ((v >> em_FSR_FCC_S) & em_FSR_FCC_M)
  { case em_FSR_FCC_EQ: s.fcc = FCC_EQ; break;
    case em_FSR_FCC_LT: s.fcc = FCC_LT; break;
    case em_FSR_FCC_GT: s.fcc = FCC_GT; break;
    case em_FSR_FCC_UN: s.fcc = FCC_UN; break;
    default: abort(); break;
  }
#endif
}

/*
 * Convert a SPARC PSR value's condition codes to the cc representation
 *  we use.  The other bits of the PSR are ignored.  This is used
 *  during signal return.
 */
static unsigned int psr_to_cc(uint32_t psr)
{
#if (CC_N == em_PSR_CC_N) &&\
    (CC_Z == em_PSR_CC_Z) &&\
    (CC_V == em_PSR_CC_V) &&\
    (CC_C == em_PSR_CC_C)
 return((psr>>em_PSR_CC_S)&em_PSR_CC_M);
#else
 return( ((psr & (em_PSR_CC_N << em_PSR_CC_S)) ? CC_N : 0) |
	 ((psr & (em_PSR_CC_Z << em_PSR_CC_S)) ? CC_Z : 0) |
	 ((psr & (em_PSR_CC_V << em_PSR_CC_S)) ? CC_V : 0) |
	 ((psr & (em_PSR_CC_C << em_PSR_CC_S)) ? CC_C : 0) );
#endif
}

/*
 * Deliver a signal.  The signal number is am emulated-OS signal
 *  number.  Our caller must have made sure the signal is not blocked
 *  or ignored; we handle SIG_DFL actions and user-provided handlers.
 */
static void deliver_signal(uint32_t sig, uint32_t *context)
{
 int onstack;
 SIG *sh;
 uint32_t fp;
 int how;

 if ((sig < 1) || (sig >= em__NSIG)) panic("delivery of impossible signal %lu",(ULI)sig);
 sh = &s.sigh[sig];
 onstack = 0; // Signal stack support not yet implemented
 trc(TRC_SIGNAL,"delivering signal %lu, handler %lx (context %p)\n",(ULI)sig,(ULI)sh->handler,(void *)context);
 if (sh->handler == em_SIG_DFL)
  { if ((sig < 1) || (sig >= (sizeof(sigdef)/sizeof(sigdef[0]))))
     { trc(TRC_SIGNAL,"SIG_DFL handler for out-of-range signal %ld\n",(LI)(int32_t)sig);
       how = SIGDEF_KILL;
     }
    else
     { how = sigdef[sig];
       if (how == SIGDEF_HOLE)
	{ trc(TRC_SIGNAL,"SIG_DFL handler for unknown signal %ld\n",(LI)(int32_t)sig);
	  how = SIGDEF_KILL;
	}
     }
    switch (how)
     { case SIGDEF_KILL:
	  trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: killing process\n",(LI)(int32_t)sig,em_signame(sig,"unknown"));
	  exit(0);
	  break;
       case SIGDEF_CORE:
	  trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: core dump\n",(LI)(int32_t)sig,em_signame(sig,"unknown"));
	  exit(0);
	  break;
       case SIGDEF_IGNORE:
	  trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: ignore\n",(LI)(int32_t)sig,em_signame(sig,"unknown"));
	  return;
	  break;
       case SIGDEF_STOP:
	  trc(TRC_SIGNAL,"signal %ld (%s) SIG_DFL: stop\n",(LI)(int32_t)sig,em_signame(sig,"unknown"));
	  exit(0);
	  break;
       default:
	  panic("SIG_DFL finds unknown sigdef %d",how);
	  break;
     }
  }
 window_flush();
 spill_window(s.cwp);
 fp = onstack ? s.sigstack_base + s.sigstack_size : s.regs[R_SP];
 fp -= 64;
 fp &= ~(uint32_t)7;
 // Return the stacked stuff's address if desired.
 if (context) *context = fp;
 trc(TRC_SIGNAL,"setting context = %08lx\n",(ULI)fp);
 // Build the signal frame.
 mem_set_4(fp,sig);			// sf.sf_signo;
 mem_set_4(fp+4,0);			// sf.sf_code
 mem_set_4(fp+8,0);			// sf.sf_scp
 mem_set_4(fp+12,0);			// sf.sf_addr
 // Build the context to be used by sigreturn.
 mem_set_4(fp+16,0);			// sf.sf_sc.sc_onstack (on-stack not implemented)
 mem_set_4(fp+20,s.sigmask>>1);		// sf.sf_sc.__sc_mask13
 mem_set_4(fp+24,s.regs[R_SP]);		// sf.sf_sc.sc_sp
 mem_set_4(fp+28,s.pc);			// sf.sf_sc.sc_pc
 mem_set_4(fp+32,s.npc);		// sf.sf_sc.sc_npc
 mem_set_4(fp+36,build_psr());		// sf.sf_sc.sc_psr
 mem_set_4(fp+40,s.regs[R_G1]);		// sf.sf_sc.sc_g1
 mem_set_4(fp+44,s.regs[R_O0]);		// sf.sf_sc.sc_o0
 mem_set_4(fp+48,s.sigmask>>1);		// sf.sf_sc.sc_mask, first word
 mem_set_4(fp+52,s.sigmask>>33);	// sf.sf_sc.sc_mask, second word
 mem_set_4(fp+56,0);			// sf.sf_sc.sc_mask, third word
 mem_set_4(fp+60,0);			// sf.sf_sc.sc_mask, fourth word
 s.regs[R_G1] = sh->handler;
 s.pc = sigtramp;
 s.npc = sigtramp + 4;
 s.regs[R_SP] = fp - 64;
 s.sigmask = ( s.sigmask |
		( ( sh->mask.bits[0] |
		    (((uint64_t)sh->mask.bits[1]) << 32)
		  ) << 1 )
	      ) & SIG_CANBLOCK;
 // any other flags?
 if (sh->flags & em_SA_RESETHAND)
  { sh->handler = em_SIG_DFL;
    bzero(&sh->mask.bits[0],sizeof(sh->mask.bits));
    sh->flags = 0;
  }
 // if (onstack) record that we're on the signal stack
}

/*
 * Deliver pending signals, if any.  If any pending signals remain
 *  undelivered, leave anysigpend set true; otherwise, false.  The
 *  return value is a count of signals delivered.
 *
 * We deliver all signals with code set to 0.  Signal delivery with
 *  code set to anything else occurs only for signals reflecting
 *  hardware traps, which we currently never generate.  If we make
 *  segfaults and illegal instructions and the like into signals, this
 *  will need to change.
 *
 * If we have just interrupted a restartable syscall, then we need to
 *  do extra stuff.  The machine state has been backed up to restart
 *  the syscall; if any of the delivered signals are not marked
 *  SA_RESTART, we need to re-advance it and arrange for an EINTR
 *  return.
 */
static int deliver_signals(uint32_t *firstcontext)
{
 int sig;
 int any;
 int n;
 uint32_t fc;
 uint32_t *fcp;
 int allrestart;

 any = 0;
 n = 0;
 anysigpend = 0;
 fc = 0;
 fcp = &fc;
 allrestart = 1;
 for (sig=em__NSIG-1;sig>=1;sig--)
  { sig_log("deliver_signals sig ~d pend ~d\n",sig,(int)s.sigpend[sig]);
    if (s.sigpend[sig])
     { if (s.sigh[sig].handler == em_SIG_IGN)
	{ trc(TRC_SIGNAL,"deliver_signals sees %d (%s) pending: ignored\n",sig,em_signame(sig,"unknown"));
	  s.sigpend[sig] = 0;
	}
       else if (! ((s.sigmask >> sig) & 1U))
	{ trc(TRC_SIGNAL,"deliver_signals sees %d (%s) pending: deliverable\n",sig,em_signame(sig,"unknown"));
	  s.sigpend[sig] = 0;
	  if (! (s.sigh[sig].flags & em_SA_RESTART)) allrestart = 0;
	  deliver_signal(sig,fcp);
	  if (fcp && fc) fcp = 0;
	  n ++;
	}
       else
	{ trc(TRC_SIGNAL,"deliver_signals sees %d (%s) pending: blocked\n",sig,em_signame(sig,"unknown"));
	  any = 1;
	}
     }
  }
 if (firstcontext) *firstcontext = fc;
 if ((s.flags & SF_SIGRESTART) && !allrestart)
  { s.flags &= ~SF_SIGRESTART;
    s.pc = s.npc;
    s.npc += 4;
    // Must match the error path in dosyscall()
    trc(TRC_SYSCALL,"returning EINTR after all\n");
    mem_set_4(fc+36,mem_get_4(fc+36)|(em_PSR_CC_C<<em_PSR_CC_S)); // sf.sf_sc.sc_psr
    mem_set_4(fc+44,em_EINTR);		// sf.sf_sc.sc_o0
  }
 else
  { trc(TRC_SYSCALL,"syscall will be restarted post-signal\n");
  }
 if (any)
  { trc(TRC_SIGNAL,"deliver_signals leaving anysigpend set\n");
    anysigpend = 1;
  }
 trc(TRC_SIGNAL,"deliver_signals returning %d\n",n);
 return(n);
}

/*
 * The guts of trace_io_data_em and trace_io_data_os: step through the
 *  data, generating trace lines.
 */
static void trace_io_data(const char *what, uint32_t ptr, int len, uint8_t (*getb)(uint32_t))
{
 FILE *f;
 int i;

 f = trc_f(TRC_IO);
 if (! f) return;
 if (len < 1) return;
 fprintf(f,"%s:\n",what);
 if (ptr & 15)
  { fprintf(f,"%08lx:",(ULI)(ptr&~(uint32_t)15));
    if (ptr & 8)
     { fprintf(f," .. .. .. .. .. .. .. .. ");
     }
    for (i=(ptr&7);i>0;i--) fprintf(f," ..");
  }
 for (;len>0;len--,ptr++)
  { switch (ptr & 15)
     { case 0:
	  fprintf(f,"%08lx:",(ULI)ptr);
	  break;
       case 8:
	  fprintf(f," ");
	  break;
     }
    fprintf(f," %02x",(*getb)(ptr));
    if ((ptr & 15) == 15) fprintf(f,"\n");
  }
 if (ptr & 15) fprintf(f,"\n");
}

/*
 * Trace I/O data, pulling the data from emulated memory.
 *
 * XXX Possible bug lurking: what if mem_get_1 errors and throws out?
 *  If nothing else, we'll leave nomemacc incremented.
 */
static void trace_io_data_em(const char *what, uint32_t ptr, int len)
{
 uint8_t getb(uint32_t a)
  { return(mem_get_1(a));
  }

 trace_io_data(what,ptr,(len>io_trace_size)?io_trace_size:len,&getb);
}

/*
 * Trace I/O data, pulling the data from emulator memory.
 */
static void trace_io_data_os(const char *what, uint32_t ptr, const void *data, int len)
{
 uint8_t getb(uint32_t a)
  { return(((const unsigned char *)data)[a-ptr]);
  }

 trace_io_data(what,ptr,(len>io_trace_size)?io_trace_size:len,&getb);
}

/*
 * Given a uint32_t, return the number of set bits in it.
 */
static unsigned int bitcount32(uint32_t v)
{
 v = (v & 0x55555555) + ((v >> 1) & 0x55555555);
 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
 v = (v & 0x0f0f0f0f) + ((v >> 4) & 0x0f0f0f0f);
 v = (v & 0x00ff00ff) + ((v >> 8) & 0x00ff00ff);
 return((v&0x0000ffff)+((v>>16)&0x0000ffff));
}

/*
 * Emulate an integer sysctl() value.  valp and lenp are the
 *  emulated-memory locations where we should write the value and
 *  length.  val is the value.  rv is the SCRV for the syscall.
 */
static int em_sc_int(uint32_t valp, uint32_t lenp, uint32_t val, SCRV *rv)
{
 uint32_t len;
 int i;

 if (valp == 0)
  { mem_set_4(lenp,4);
    trace_io_data_em("length",lenp,4);
    SYSCALL_SETRET(4);
    return(1);
  }
 len = mem_get_4(lenp);
 trace_io_data_em("input length",lenp,4);
 if (len < 4)
  { for (i=0;i<len;i++)
     { mem_set_1(valp+i,val>>24);
       val <<= 8;
     }
    trace_io_data_em("data",valp,len);
    SYSCALL_SETERR(em_ENOMEM);
  }
 else
  { for (i=0;i<4;i++)
     { mem_set_1(valp+i,val>>24);
       val <<= 8;
     }
    trace_io_data_em("data",valp,4);
    mem_set_4(lenp,4);
    trace_io_data_em("output length",lenp,4);
    SYSCALL_SETRET(4);
  }
 return(1);
}

/*
 * Emulate a string sysctl() value.  valp and lenp are the
 *  emulated-memory locations where we should write the value and
 *  length.  str and strlen are the value string.  rv is the SCRV for
 *  the syscall.
 */
static int em_sc_string(uint32_t valp, uint32_t lenp, const void *str, int strlen, SCRV *rv)
{
 uint32_t len;

 if (valp == 0)
  { mem_set_4(lenp,strlen);
    trace_io_data_em("length",lenp,4);
    SYSCALL_SETRET(strlen);
    return(1);
  }
 len = mem_get_4(lenp);
 trace_io_data_em("input length",lenp,4);
 if (len < strlen)
  { copyout(str,valp,len,"sysctl string",0,0);
    trace_io_data_em("data",valp,len);
    SYSCALL_SETERR(em_ENOMEM);
  }
 else
  { copyout(str,valp,strlen,"sysctl string",0,0);
    trace_io_data_em("data",valp,strlen);
    mem_set_4(lenp,strlen);
    trace_io_data_em("output length",lenp,4);
    SYSCALL_SETRET(strlen);
  }
 return(1);
}

/*
 * Handle a hw.* sysctl.  mib/miblen are the MIB (already copied out of
 *  emulated memory), valp and lenp are the places (addresses in
 *  emulated memory) to write the value and length, and rv is the SCRV
 *  for the syscall.
 */
static int em_sysctl_hw(uint32_t *mib, int miblen, uint32_t valp, uint32_t lenp, SCRV *rv)
{
 if (miblen != 1) return(0);
 switch (mib[0])
  { case em_HW_MACHINE:
       return(em_sc_string(valp,lenp,"sparc",5,rv));
       break;
    case em_HW_PAGESIZE:
       return(em_sc_int(valp,lenp,PAGE_SIZE,rv));
       break;
    case em_HW_MACHINE_ARCH:
       return(em_sc_string(valp,lenp,"sparc",5,rv));
       break;
  }
 return(0);
}

/*
 * Implementation of vm.loadavg sysctl.
 */
static int em_sc_vm_loadavg(uint32_t valp, uint32_t lenp, SCRV *rv)
{
 struct loadavg osla;
 int osmib[2];
 size_t osvalsize;
 uint32_t len;
 uint32_t vals[4];
 int i;

 osmib[0] = CTL_VM;
 osmib[1] = VM_LOADAVG;
 osvalsize = sizeof(osla);
 if (sysctl(&osmib[0],2,&osla,&osvalsize,0,0) < 0)
  { SYSCALL_SETERR(os2em_errno(errno));
    return(1);
  }
 if (valp == 0)
  { mem_set_4(lenp,16);
    trace_io_data_em("length",lenp,4);
    SYSCALL_SETRET(16);
    return(1);
  }
 vals[0] = osla.ldavg[0];
 vals[1] = osla.ldavg[1];
 vals[2] = osla.ldavg[2];
 vals[3] = osla.fscale;
 len = mem_get_4(lenp);
 trace_io_data_em("input length",lenp,4);
 if (len < 16)
  { for (i=0;i<len;i++)
     { mem_set_1(valp+i,vals[i>>2]>>(8*(3-(i&3))));
     }
    trace_io_data_em("data",valp,len);
    SYSCALL_SETERR(em_ENOMEM);
  }
 else
  { mem_set_4(valp,vals[0]);
    mem_set_4(valp+4,vals[1]);
    mem_set_4(valp+16,vals[2]);
    mem_set_4(valp+24,vals[3]);
    trace_io_data_em("data",valp,16);
    mem_set_4(lenp,16);
    trace_io_data_em("output length",lenp,4);
    SYSCALL_SETRET(16);
  }
 return(1);
}

/*
 * Handle a vm.* sysctl.  mib/miblen are the MIB (already copied out of
 *  emulated memory), valp and lenp are the places (addresses in
 *  emulated memory) to write the value and length, and rv is the SCRV
 *  for the syscall.
 */
static int em_sysctl_vm(uint32_t *mib, int miblen, uint32_t valp, uint32_t lenp, SCRV *rv)
{
 if (miblen != 1) return(0);
 switch (mib[0])
  { case em_VM_LOADAVG:
       return(em_sc_vm_loadavg(valp,lenp,rv));
       break;
  }
 return(0);
}

/*
 * Handle a kern.* sysctl.  mib/miblen are the MIB (already copied out
 *  of emulated memory), valp and lenp are the places (addresses in
 *  emulated memory) to write the value and length, and rv is the SCRV
 *  for the syscall.
 *
 * Arguably KERN_NGROUPS should return the 1.4T value, but if the
 *  underlying OS doesn't support that many....
 */
static int em_sysctl_kern(uint32_t *mib, int miblen, uint32_t valp, uint32_t lenp, SCRV *rv)
{
 if (miblen != 1) return(0);
 switch (mib[0])
  { case em_KERN_OSTYPE:
       return(em_sc_string(valp,lenp,"NetBSD",7,rv));
       break;
    case em_KERN_OSRELEASE:
       return(em_sc_string(valp,lenp,"1.4T",5,rv));
       break;
    case em_KERN_VERSION:
       return(em_sc_string(valp,lenp,"NetBSD 1.4T (GENERIC) #0: Mon Aug 13 23:49:34 EDT 2018\n    mouse@Sparkle.Rodents-Montreal.ORG:/home/mouse/kbuild/GENERIC\n",122,rv));
       break;
    case em_KERN_HOSTNAME:
       // Append "-SPARC" to make it a little easier to tell whether
       //  I'm typing to the emulated host or the underlying host.
	{ static char *hn;
	  static int hnl = 0;
	  int l;
	  char *dot;
	  if (hnl < 1)
	   { hnl = 8;
	     hn = malloc(hnl);
	   }
	  while (1)
	   { gethostname(hn,hnl-1);
	     hn[hnl-1] = '\0';
	     l = strlen(hn);
	     if (l < hnl-6-1) break;
	     hnl <<= 1;
	     free(hn);
	     hn = malloc(hnl);
	   }
	  dot = index(hn,'.');
	  if (dot)
	   { bcopy(dot,dot+6,l+1-(dot-hn));
	     bcopy("-SPARC",dot,6);
	   }
	  else
	   { bcopy("-SPARC",hn+l,6+1);
	   }
	  return(em_sc_string(valp,lenp,hn,l+6+1,rv));
	}
       break;
    case em_KERN_NGROUPS:
	{ static int ngroups = -1;
	  if (ngroups < 0)
	   { int osmib[2];
	     size_t osvalsize;
	     osmib[0] = CTL_KERN;
	     osmib[1] = KERN_NGROUPS;
	     osvalsize = sizeof(ngroups);
	     if (sysctl(&osmib[0],2,&ngroups,&osvalsize,0,0) < 0)
	      { SYSCALL_SETERR(os2em_errno(errno));
		return(1);
	      }
	   }
	  return(em_sc_int(valp,lenp,ngroups,rv));
	}
       break;
    case em_KERN_IOV_MAX:
	{ static int iovmax = -1;
	  if (iovmax < 0)
	   { int osmib[2];
	     size_t osvalsize;
	     osmib[0] = CTL_KERN;
	     osmib[1] = KERN_IOV_MAX;
	     osvalsize = sizeof(iovmax);
	     if (sysctl(&osmib[0],2,&iovmax,&osvalsize,0,0) < 0)
	      { SYSCALL_SETERR(os2em_errno(errno));
		return(1);
	      }
	   }
	  return(em_sc_int(valp,lenp,iovmax,rv));
	}
       break;
  }
 return(0);
}

/*
 * Store an underlying-OS struct rusage (at ru) into an emulated struct
 *  rusage (at buf).
 */
static void store_rusage(uint32_t buf, const struct rusage *ru)
{
 mem_set_8(buf,ru->ru_utime.tv_sec);
 mem_set_4(buf+8,ru->ru_utime.tv_usec);
 mem_set_4(buf+12,0); // struct padding
 mem_set_8(buf+16,ru->ru_stime.tv_sec);
 mem_set_4(buf+24,ru->ru_stime.tv_usec);
 mem_set_4(buf+28,0); // struct padding
 mem_set_4(buf+32,ru->ru_maxrss);
 mem_set_4(buf+36,ru->ru_ixrss);
 mem_set_4(buf+40,ru->ru_idrss);
 mem_set_4(buf+44,ru->ru_isrss);
 mem_set_4(buf+48,ru->ru_minflt);
 mem_set_4(buf+52,ru->ru_majflt);
 mem_set_4(buf+56,ru->ru_nswap);
 mem_set_4(buf+60,ru->ru_inblock);
 mem_set_4(buf+64,ru->ru_oublock);
 mem_set_4(buf+68,ru->ru_msgsnd);
 mem_set_4(buf+72,ru->ru_msgrcv);
 mem_set_4(buf+76,ru->ru_nsignals);
 mem_set_4(buf+80,ru->ru_nvcsw);
 mem_set_4(buf+84,ru->ru_nivcsw);
}

/*
 * Store an underlying-OS struct stat (at stb) into an emulated struct
 *  stat (at stp).
 */
static void store_stat(uint32_t stp, const struct stat *stb)
{
 mem_set_4(stp,stb->st_dev);
 mem_set_4(stp+4,stb->st_ino);
 mem_set_4(stp+8,stb->st_mode);
 mem_set_4(stp+12,stb->st_nlink);
 mem_set_4(stp+16,stb->st_uid);
 mem_set_4(stp+20,stb->st_gid);
 mem_set_4(stp+24,stb->st_rdev);
 mem_set_4(stp+28,0); // padding
 mem_set_8(stp+32,stb->st_atimespec.tv_sec);
 mem_set_4(stp+40,stb->st_atimespec.tv_nsec);
 mem_set_4(stp+44,0); // padding
 mem_set_8(stp+48,stb->st_mtimespec.tv_sec);
 mem_set_4(stp+56,stb->st_mtimespec.tv_nsec);
 mem_set_4(stp+60,0); // padding
 mem_set_8(stp+64,stb->st_ctimespec.tv_sec);
 mem_set_4(stp+72,stb->st_ctimespec.tv_nsec);
 mem_set_4(stp+76,0); // padding
 mem_set_8(stp+80,stb->st_size);
 mem_set_8(stp+88,stb->st_blocks);
 mem_set_4(stp+96,stb->st_blksize);
 mem_set_4(stp+100,0); // XXX should be st_flags
 mem_set_4(stp+104,stb->st_gen);
 mem_set_4(stp+108,0); // padding
 mem_set_8(stp+112,0); // st_qspare[0]
 mem_set_8(stp+120,0); // st_qspare[1]
}

#if defined(STATFS_VIA_STATVFS) || defined(GETFSSTAT_VIA_GETVFSSTAT)
/*
 * Store an underlying-OS struct statvfs (at sf) into an emulated
 *  struct statfs (at buf).  what and prefail are passed on to
 *  copy_or_nulpad for the strings.
 */
static void store_statvfs_as_statfs(uint32_t buf, const struct statvfs *sf, const char *what, void (*prefail)(void *), void *pfarg)
{
 uint32_t emflags;

 emflags = ((sf->f_flag & MNT_RDONLY) ? em_MNT_RDONLY : 0) |
	   ((sf->f_flag & MNT_SYNCHRONOUS) ? em_MNT_SYNCHRONOUS : 0) |
	   ((sf->f_flag & MNT_NOEXEC) ? em_MNT_NOEXEC : 0) |
	   ((sf->f_flag & MNT_NOSUID) ? em_MNT_NOSUID : 0) |
	   ((sf->f_flag & MNT_NODEV) ? em_MNT_NODEV : 0) |
	   ((sf->f_flag & MNT_UNION) ? em_MNT_UNION : 0) |
	   ((sf->f_flag & MNT_ASYNC) ? em_MNT_ASYNC : 0) |
	   ((sf->f_flag & MNT_EXRDONLY) ? em_MNT_EXRDONLY : 0) |
	   ((sf->f_flag & MNT_EXPORTED) ? em_MNT_EXPORTED : 0) |
	   ((sf->f_flag & MNT_DEFEXPORTED) ? em_MNT_DEFEXPORTED : 0) |
	   ((sf->f_flag & MNT_EXPORTANON) ? em_MNT_EXPORTANON : 0) |
	   ((sf->f_flag & MNT_EXKERB) ? em_MNT_EXKERB : 0) |
	   ((sf->f_flag & MNT_LOCAL) ? em_MNT_LOCAL : 0) |
	   ((sf->f_flag & MNT_QUOTA) ? em_MNT_QUOTA : 0) |
	   ((sf->f_flag & MNT_ROOTFS) ? em_MNT_ROOTFS : 0) |
	   ((sf->f_flag & MNT_NOCOREDUMP) ? em_MNT_NOCOREDUMP : 0) |
	   ((sf->f_flag & MNT_NOATIME) ? em_MNT_NOATIME : 0) |
	   ((sf->f_flag & MNT_EXNORESPORT) ? em_MNT_EXNORESPORT : 0) |
	   ((sf->f_flag & MNT_EXPUBLIC) ? em_MNT_EXPUBLIC : 0) |
	   ((sf->f_flag & MNT_SYMPERM) ? em_MNT_SYMPERM : 0) |
	   ((sf->f_flag & MNT_NODEVMTIME) ? em_MNT_NODEVMTIME : 0) |
	   ((sf->f_flag & MNT_SOFTDEP) ? em_MNT_SOFTDEP : 0);
 mem_set_2(buf,0);
 mem_set_2(buf+2,emflags&0xffff);
 mem_set_4(buf+4,sf->f_bsize);
 mem_set_4(buf+8,sf->f_iosize);
 mem_set_4(buf+12,sf->f_blocks);
 mem_set_4(buf+16,sf->f_bfree);
 mem_set_4(buf+20,sf->f_bavail);
 mem_set_4(buf+24,sf->f_files);
 mem_set_4(buf+28,sf->f_ffree);
 mem_set_4(buf+32,sf->f_fsidx.__fsid_val[0]);
 mem_set_4(buf+36,sf->f_fsidx.__fsid_val[1]);
 mem_set_4(buf+40,sf->f_owner);
 mem_set_4(buf+44,emflags);
 mem_set_4(buf+48,sf->f_syncwrites);
 mem_set_4(buf+52,sf->f_asyncwrites);
 mem_set_4(buf+56,0);
 copy_or_nulpad(&sf->f_fstypename[0],sizeof(sf->f_fstypename),buf+60,em_MFSNAMELEN,what,prefail,pfarg);
 copy_or_nulpad(&sf->f_mntonname[0],sizeof(sf->f_mntonname),buf+76,em_MNAMELEN,what,prefail,pfarg);
 copy_or_nulpad(&sf->f_mntfromname[0],sizeof(sf->f_mntfromname),buf+166,em_MNAMELEN,what,prefail,pfarg);
}
#endif

#if !defined(STATFS_VIA_STATVFS) || !defined(GETFSSTAT_VIA_GETVFSSTAT)
/*
 * Store an underlying-OS struct statfs (at sf) into an emulated struct
 *  statfs (at buf).  what and prefail are passed on to copy_or_nulpad
 *  for the strings.
 */
static void store_statfs(uint32_t buf, const struct statfs *sf, const char *what, void (*prefail)(void))
{
 uint32_t emflags;

 emflags = ((sf->f_flags & MNT_RDONLY) ? em_MNT_RDONLY : 0) |
	   ((sf->f_flags & MNT_SYNCHRONOUS) ? em_MNT_SYNCHRONOUS : 0) |
	   ((sf->f_flags & MNT_NOEXEC) ? em_MNT_NOEXEC : 0) |
	   ((sf->f_flags & MNT_NOSUID) ? em_MNT_NOSUID : 0) |
	   ((sf->f_flags & MNT_NODEV) ? em_MNT_NODEV : 0) |
	   ((sf->f_flags & MNT_UNION) ? em_MNT_UNION : 0) |
	   ((sf->f_flags & MNT_ASYNC) ? em_MNT_ASYNC : 0) |
	   ((sf->f_flags & MNT_EXRDONLY) ? em_MNT_EXRDONLY : 0) |
	   ((sf->f_flags & MNT_EXPORTED) ? em_MNT_EXPORTED : 0) |
	   ((sf->f_flags & MNT_DEFEXPORTED) ? em_MNT_DEFEXPORTED : 0) |
	   ((sf->f_flags & MNT_EXPORTANON) ? em_MNT_EXPORTANON : 0) |
	   ((sf->f_flags & MNT_EXKERB) ? em_MNT_EXKERB : 0) |
	   ((sf->f_flags & MNT_LOCAL) ? em_MNT_LOCAL : 0) |
	   ((sf->f_flags & MNT_QUOTA) ? em_MNT_QUOTA : 0) |
	   ((sf->f_flags & MNT_ROOTFS) ? em_MNT_ROOTFS : 0) |
	   ((sf->f_flags & MNT_NOCOREDUMP) ? em_MNT_NOCOREDUMP : 0) |
	   ((sf->f_flags & MNT_NOATIME) ? em_MNT_NOATIME : 0) |
	   ((sf->f_flags & MNT_EXNORESPORT) ? em_MNT_EXNORESPORT : 0) |
	   ((sf->f_flags & MNT_EXPUBLIC) ? em_MNT_EXPUBLIC : 0) |
	   ((sf->f_flags & MNT_SYMPERM) ? em_MNT_SYMPERM : 0) |
	   ((sf->f_flags & MNT_NODEVMTIME) ? em_MNT_NODEVMTIME : 0) |
	   ((sf->f_flags & MNT_SOFTDEP) ? em_MNT_SOFTDEP : 0);
 mem_set_2(buf,0);
 mem_set_2(buf+2,emflags&0xffff);
 mem_set_4(buf+4,sf->f_bsize);
 mem_set_4(buf+8,sf->f_iosize);
 mem_set_4(buf+12,sf->f_blocks);
 mem_set_4(buf+16,sf->f_bfree);
 mem_set_4(buf+20,sf->f_bavail);
 mem_set_4(buf+24,sf->f_files);
 mem_set_4(buf+28,sf->f_ffree);
 mem_set_4(buf+32,sf->f_fsid.val[0]);
 mem_set_4(buf+36,sf->f_fsid.val[1]);
 mem_set_4(buf+40,sf->f_owner);
 mem_set_4(buf+44,emflags);
 mem_set_4(buf+48,sf->f_syncwrites);
 mem_set_4(buf+52,sf->f_asyncwrites);
 mem_set_4(buf+56,0);
 copy_or_nulpad(&sf->f_fstypename[0],sizeof(sf->f_fstypename),buf+60,em_MFSNAMELEN,what,prefail);
 copy_or_nulpad(&sf->f_mntonname[0],sizeof(sf->f_mntonname),buf+76,em_MNAMELEN,what,prefail);
 copy_or_nulpad(&sf->f_mntfromname[0],sizeof(sf->f_mntfromname),buf+166,em_MNAMELEN,what,prefail);
}
#endif

/*
 * Store an underlying-OS struct rlimit (at rl) into an emulated struct
 *  rlimit (at buf).
 */
static void store_rlimit(uint32_t buf, const struct rlimit *rl)
{
 uint64_t v;

 v = (rl->rlim_cur == RLIM_INFINITY) ? em_RLIM_INFINITY : rl->rlim_cur;
 mem_set_8(buf,v);
 v = (rl->rlim_max == RLIM_INFINITY) ? em_RLIM_INFINITY : rl->rlim_max;
 mem_set_8(buf+8,v);
}

/*
 * Load an underlying-OS struct rlimit (at rl) from an emulated struct
 *  rlimit (at buf).
 */
static void load_rlimit(uint32_t buf, struct rlimit *rl)
{
 uint64_t v;

 v = mem_get_8(buf);
 rl->rlim_cur = (v == em_RLIM_INFINITY) ? RLIM_INFINITY : v;
 v = mem_get_8(buf+8);
 rl->rlim_max = (v == em_RLIM_INFINITY) ? RLIM_INFINITY : v;
}

/*
 * Handle common code for file descriptor syscall arguments.  arg is
 *  the argument value, as returned by (eg) scarg().  prot is either 0,
 *  meaning that no protection check should be done, or P_R or P_W,
 *  meaning that the descriptor has to be readable or writable.  call
 *  is the text name of the syscall, for potential error messages.
 */
static FD *descriptor_arg(uint32_t arg, unsigned int prot, const char *call)
{
 FD *fd;

 if (arg >= nfds)
  { trc(TRC_SYSCALL,"%s fd %lu out of range -> EBADF\n",call,(ULI)arg);
    return(0);
  }
 fd = fds[arg];
 if (! fd)
  { trc(TRC_SYSCALL,"%s fd %lu not open -> EBADF\n",call,(ULI)arg);
    return(0);
  }
 if (prot && !(fd->prot & prot))
  { const char *pkind;
    switch (prot)
     { case P_R: pkind = "readable"; break;
       case P_W: pkind = "writable"; break;
       default:
	  panic("bad prot to descriptor_arg");
	  break;
     }
    trc(TRC_SYSCALL,"%s fd %lu not %s -> EBADF\n",call,(ULI)arg,pkind);
    return(0);
  }
 return(fd);
}

/*
 * Do a forkwait-style loop.
 *
 * We could leave the body of the loop empty, but that would burn CPU
 *  unnecessarily.  1/10 second per iteration is long enough that we're
 *  not hogging CPU but short enough that humans don't get impatient.
 */
static void do_forkwait(void)
{
 volatile sig_atomic_t v;

 v = 1;
 while (v) poll(0,0,100);
}

/*
 * Do post-exec() signal-handling stuff.  This mostly means resetting
 *  caught signals to SIG_DFL.
 */
static void sig_postexec(void)
{
 int i;

 for (i=em__NSIG-1;i>=1;i--)
  { switch (s.sigh[i].handler)
     { case em_SIG_IGN:
       case em_SIG_DFL:
	  break;
       default:
	  trc(TRC_SIGNAL,"postexec resetting signal %d (%s) to SIG_DFL\n",i,em_signame(i,"unknown"));
	  s.sigh[i].handler = em_SIG_DFL;
	  bzero(&s.sigh[i].mask,sizeof(s.sigh[i].mask));
	  s.sigh[i].flags = 0;
	  if (sigdef[i] == SIGDEF_IGNORE)
	   { s.ignsigs |= 1ULL << i;
	     set_our_catcher(em2os_signal(i),SIG_DFL);
	     s.sigpend[i] = 0;
	   }
	  break;
     }
    s.sigstack_enabled = 0;
    s.sigstack_base = 0;
    s.sigstack_size = 0;
  }
}

/*
 * Print an emulated-OS signal mask.  Note that NetBSD/sparc 1.4T
 *  signal masks are shifted by one bit, so that the low bit
 *  corresponds to signal 1, not (nonexistent) signal 0.
 */
static void print_em_sig_mask(FILE *f, const EMSIGSET *mask)
{
 const char *pref;
 int j;

 pref = "";
 for (j=0;j<em__NSIG-1;j++)
  { if ((mask->bits[j>>5] >> (j & 31)) & 1)
     { const char *n;
       n = em_signame(j+1,0);
       if (n) fprintf(f,"%s%s",pref,n); else fprintf(f,"%s?%d",pref,j+1);
       pref = "|";
     }
  }
 if (! pref[0]) fprintf(f,"0");
}

/*
 * Fetch a struct sockaddr out of emulator memory, converting it to an
 *  emulator-OS struct sockaddr.
 *
 * Arguments are the pointer and length values, usually syscall
 *  arguments (eg, the second and third arguments to connect(2)).
 *
 * Return value is a struct containing:
 *
 *	err
 *		If this is zero, it worked, and the rest of the fields
 *		are valid.  If this is nonzero, it is an emulator
 *		errno, and the rest of the fields are garbage.
 *	emlen
 *		The sockaddr length field from the emulated struct.
 *	emfam
 *		The sockaddr family field from the emulated struct.
 *	sa
 *		Pointer (in emulator memory) to malloc()ed space
 *		holding the emulator version of the struct sockaddr.
 *	salen
 *		Size of the emulator version of the struct sockaddr.
 */
typedef struct {
	  int err;
	  uint8_t emlen;
	  uint8_t emfam;
	  void *sa;
	  int salen;
	  }  GETSA;
static GETSA get_sockaddr(uint32_t addr, uint32_t len)
{
 GETSA r;
 int i;

 if ((len < 2) || (len > 255)) return((GETSA){.err=em_EADDRNOTAVAIL});
 r.emlen = mem_get_1(addr);
 r.emfam = mem_get_1(addr+1);
 switch (r.emfam)
  { case em_AF_INET:
	{ struct sockaddr_in *p;
	  if (r.emlen != 16) return((GETSA){.err=em_EINVAL});
	  p = malloc(sizeof(*p));
	  bzero(p,sizeof(*p)); // XXX API botch
	  p->sin_len = sizeof(*p);
	  p->sin_family = AF_INET;
	  p->sin_port = htons(mem_get_2(addr+2));
	  for (i=0;i<4;i++) ((unsigned char *)&p->sin_addr)[i] = mem_get_1(addr+4+i);
	  r.sa = p;
	  r.salen = sizeof(*p);
	}
       break;
    case em_AF_INET6:
	{ struct sockaddr_in6 *p;
	  if (r.emlen != 28) return((GETSA){.err=em_EINVAL});
	  p = malloc(sizeof(*p));
	  bzero(p,sizeof(*p)); // XXX API botch still needed for v6?
	  p->sin6_len = sizeof(*p);
	  p->sin6_family = AF_INET;
	  p->sin6_port = htons(mem_get_2(addr+2));
	  // XXX Does sin6_flowinfo need byteswapping?
	  p->sin6_flowinfo = mem_get_4(addr+4);
	  for (i=0;i<16;i++) ((unsigned char *)&p->sin6_addr)[i] = mem_get_1(addr+8+i);
	  // XXX Does sin6_scope_id need byteswapping?
	  p->sin6_scope_id = mem_get_4(addr+24);
	  r.sa = p;
	  r.salen = sizeof(*p);
	}
       break;
    case em_AF_LOCAL:
	{ struct sockaddr_un *p;
	  int i;
	  p = malloc(256+offsetof(struct sockaddr_un,sun_path));
	  p->sun_len = r.emlen - 2 + offsetof(struct sockaddr_un,sun_path);
	  p->sun_family = AF_LOCAL;
	  for (i=2;i<len;i++) p->sun_path[i-2] = mem_get_1(addr+i);
	  r.sa = p;
	  r.salen = r.emlen - 2 + offsetof(struct sockaddr_un,sun_path);
	}
       break;
    default:
       return((GETSA){.err=em_EADDRNOTAVAIL});
       break;
  }
 r.err = 0;
 return(r);
}

/*
 * Take an emulator-OS struct sockaddr and convert it to an emulated-OS
 *  struct sockaddr, storing it into emulated memory.
 *
 * Arguments are the sockaddr pointer and length values from the
 *  emulator OS (eg, as returned by getsockname) and the sockaddr
 *  pointer and length pointer values in the emulated machine (eg, as
 *  passed to emulated getsockname).  The fifth argument, if non-nil,
 *  is a pointer through which the value stored through emlenp is
 *  written, so our caller can get that length without having to pull
 *  it out of emulated memory (which would involve brittle assumptions
 *  about the order of our writes in cases of overlap).
 *
 * Return value is normally zero; if it is nonzero, it is an emulator
 *  errno describing the error.
 */
static uint32_t put_sockaddr(const void *ossa, int oslen, uint32_t emsa, uint32_t emlenp, uint32_t *plenp)
{
 int i;
 uint32_t emlen;

 if (emlenp == 0) return(0);
 emlen = mem_get_4(emlenp);
 if (emlen == 0) return(0);
 switch (((const struct sockaddr *)ossa)->sa_family)
  { case AF_LOCAL:
	{ const struct sockaddr_un *p;
	  int pl;
	  p = ossa;
	  pl = ((oslen < p->sun_len) ? oslen : p->sun_len) - offsetof(struct sockaddr_un,sun_path);
	  if (pl < 0)
	   { printf("%s: oslen = %d < offsetof(struct sockaddr_un,sun_path) = %d\n",__func__,oslen,(int)offsetof(struct sockaddr_un,sun_path));
	     top();
	   }
	  mem_set_1(emsa,pl+2);
	  if (emlen > 1)
	   { mem_set_1(emsa+1,em_AF_LOCAL);
	     if (pl > emlen-2) pl = emlen - 2;
	     for (i=0;i<pl;i++) mem_set_1(emsa+2+i,p->sun_path[i]);
	     emlen = pl + 2;
	   }
	  mem_set_4(emlenp,emlen);
	  if (plenp) *plenp = emlen;
	}
       break;
    case AF_INET:
	{ const struct sockaddr_in *p;
	  uint8_t b[16];
	  if (oslen < sizeof(struct sockaddr_in))
	   { printf("%s: oslen = %d < sizeof(struct sockaddr_in) = %d\n",__func__,oslen,(int)sizeof(struct sockaddr_in));
	     top();
	   }
	  p = ossa;
	  b[0] = 16;
	  b[1] = em_AF_INET;
	  b[2] = ((const uint8_t *)&p->sin_port)[0];
	  b[3] = ((const uint8_t *)&p->sin_port)[1];
	  b[4] = ((const uint8_t *)&p->sin_addr)[0];
	  b[5] = ((const uint8_t *)&p->sin_addr)[1];
	  b[6] = ((const uint8_t *)&p->sin_addr)[2];
	  b[7] = ((const uint8_t *)&p->sin_addr)[3];
	  bzero(&b[8],8);
	  i = (emlen < 16) ? emlen : 16;
	  copyout(&b[0],emsa,i,"struct sockaddr_in",0,0);
	  mem_set_4(emlenp,i);
	  if (plenp) *plenp = i;
	}
       break;
    case AF_INET6:
	{ const struct sockaddr_in6 *p;
	  uint8_t b[28];
	  if (oslen < sizeof(struct sockaddr_in6))
	   { printf("%s: oslen = %d < sizeof(struct sockaddr_in6) = %d\n",__func__,oslen,(int)sizeof(struct sockaddr_in6));
	     top();
	   }
	  p = ossa;
	  b[0] = 28;
	  b[1] = em_AF_INET6;
	  b[2] = ((const uint8_t *)&p->sin6_port)[0];
	  b[3] = ((const uint8_t *)&p->sin6_port)[1];
	  // XXX Does sin6_flowinfo need byteswapping?
	  b[4] = (p->sin6_flowinfo >> 24) & 0xff;
	  b[5] = (p->sin6_flowinfo >> 16) & 0xff;
	  b[6] = (p->sin6_flowinfo >>  8) & 0xff;
	  b[7] =  p->sin6_flowinfo        & 0xff;
	  bcopy(&p->sin6_addr,&b[8],16);
	  // XXX Does sin6_scope_id need byteswapping?
	  b[24] = (p->sin6_scope_id >> 24) & 0xff;
	  b[25] = (p->sin6_scope_id >> 16) & 0xff;
	  b[26] = (p->sin6_scope_id >>  8) & 0xff;
	  b[27] =  p->sin6_scope_id        & 0xff;
	  i = (emlen < 28) ? emlen : 28;
	  copyout(&b[0],emsa,i,"struct sockaddr_in6",0,0);
	  mem_set_4(emlenp,i);
	  if (plenp) *plenp = i;
	}
       break;
    default:
       return(em_EAFNOSUPPORT);
       break;
  }
 return(0);
}

/*
 * Dump out the current memory map in a human-readable form.  The vm
 *  is printed in linked-list order, so it usually should be sorted
 *  before calling this.
 *
 * XXX We should print some indication of each MEMSEG's type and maybe
 *  how it arose.
 */
static void dump_vm(FILE *to)
{
 MEMSEG *ms;

 fprintf(to,"base      size      end       prot      (brk at %08lx)\n",(ULI)vm.dbrk);
 for (ms=vm.m;ms;ms=ms->link)
  { fprintf(to,"%08lx  %08lx  %08lx  %c%c%c    ",
	(ULI)ms->base,
	(ULI)ms->size,
	(ULI)ms->end,
	(ms->prot&P_R)?'R':'-',
	(ms->prot&P_W)?'W':'-',
	(ms->prot&P_X)?'X':'-');
    (*ms->ops->desc)(ms,to);
    fprintf(to,"\n");
  }
}

/*
 * A malloc implementation for the emulated program that makes it
 *  impossible for the program to corrupt the arena, because the
 *  bookkeeping data isn't even in its address space - and with true
 *  redzones, not just data the corruption of which is only probably
 *  detected.
 *
 * The emu_*() routines are called when the emulated machine executes a
 *  trap instruction with the magic trap number appropriate to the
 *  routine in question.  They expect to find their arguments in the %o
 *  registers (ie, the trap occurs in a leaf-routine context).
 */

/*
 * Find - creating if necessary - the emulated malloc arena.
 */
static MEMSEG *malloc_arena(void)
{
 MEMSEG *m;
 MEMSEG_PRIV_ARENA *a;
 MALBLOCK *b;

 for (m=vm.m;m;m=m->link) if (m->ops == &memseg_ops_arena) return(m);
 trc(TRC_ARENA,"creating malloc arena\n");
 m = memseg_new_arena();
 if (memseg_check_conflict(m->base,m->size,m))
  { printf("malloc arena conflicts with existing space somehow\n");
    dump_vm(stdout);
    top();
  }
 a = m->priv;
 b = malloc(sizeof(MALBLOCK));
 b->kind = MBK_FREE;
 b->l = 0;
 b->r = 0;
 b->base = m->base;
 b->size = m->size;
 b->end = m->end;
 a->free = b;
 return(m);
}

/*
 * Rebalance an AVL (sub)tree after an insertion or deletion.  *pp is
 *  the root of the (sub)tree and pptr is the correct thing to put in
 *  the u field of an element that replaces *pp.  The tree must be
 *  self-consistent; that is, the only permissible violation of the AVL
 *  invariants is that pp[0]->bal may be 2 or -2.
 *
 * Return value is true if the result is unbalanced (by 1, necessarily)
 *  or 0 if it's balanced.
 *
 * See rebalance.txt if you're not familiar with AVL tree rebalancing.
 *  The case numbers in comments below refer the cases listed there.
 */
static int arena_rebalance(MALBLOCK **pp, MALBLOCK *pptr)
{
 MALBLOCK *p;
 MALBLOCK *f;
 MALBLOCK *b;
 MALBLOCK *c;

 p = *pp;
 if (pptr != p->u) panic("pptr wrong");
 switch (p->bal)
  { case 0:
       return(0);
       break;
    case -1: case 1:
       return(1);
       break;
    case -2:
       if (p->l->bal <= 0)
	{ // case 1
	  p->bal = -1 - p->l->bal;
	  p->l->bal ++;
	  *pp = p->l;
	  p->l->u = pptr;
	  f = p->l->r;
	  p->l->r = p;
	  p->u = p->l;
	  p->l = f;
	  if (f) f->u = p;
	  if (p->bal) return(1);
	}
       else
	{ // case 2
	  f = p->l->r;
	  b = f->l;
	  c = f->r;
	  *pp = f;
	  f->u = pptr;
	  f->l = p->l;
	  f->l->u = f;
	  f->r = p;
	  p->u = f;
	  f->l->r = b;
	  if (b) b->u = f->l;
	  p->l = c;
	  if (c) c->u = p;
	  f->l->bal = (f->bal > 0) ? -1 : 0;
	  f->r->bal = (f->bal < 0) ? 1 : 0;
	  f->bal = 0;
	}
       break;
    case 2:
       if (p->r->bal >= 0)
	{ // case 3
	  p->bal = 1 - p->r->bal;
	  p->r->bal --;
	  *pp = p->r;
	  p->r->u = pptr;
	  f = p->r->l;
	  p->r->l = p;
	  p->u = p->r;
	  p->r = f;
	  if (f) f->u = p;
	  if (p->bal) return(1);
	}
       else
	{ // case 4
	  f = p->r->l;
	  b = f->r;
	  c = f->l;
	  *pp = f;
	  f->u = pptr;
	  f->r = p->r;
	  f->r->u = f;
	  f->l = p;
	  p->u = f;
	  f->r->l = b;
	  if (b) b->u = f->r;
	  p->r = c;
	  if (c) c->u = p;
	  f->r->bal = (f->bal < 0) ? 1 : 0;
	  f->l->bal = (f->bal > 0) ? -1 : 0;
	  f->bal = 0;
	}
       break;
    default:
       panic("impossible rebalance");
       break;
  }
 return(0);
}

/*
 * Insert b into the tree rooted at *pp.  u is the correct thing to put
 *  into the u pointer of a MALBLOCK stored into *pp.  Return value is
 *  true if the tree deepened, false if the new MALBLOCK was absorbed
 *  without deepening.  This code knows that u cannot be nil unless *pp
 *  also is (though the converse is not true).
 */
static int arena_insert(MALBLOCK **pp, MALBLOCK *b, MALBLOCK *u)
{
 MALBLOCK *p;

 p = *pp;
 if (! p)
  { *pp = b;
    b->u = u;
    return(1);
  }
 if (b->rz2 <= p->rz1)
  { if (arena_insert(&p->l,b,p))
     { p->bal --;
       return(arena_rebalance(pp,u));
     }
  }
 else if (b->rz1 >= p->rz2)
  { if (arena_insert(&p->r,b,p))
     { p->bal ++;
       return(arena_rebalance(pp,u));
     }
  }
 else
  { printf("corrupt arena: new block [%08lx..%08lx) overlaps existing block [%08lx..%08lx)\n",(ULI)b->rz1,(ULI)b->rz2,(ULI)p->rz1,(ULI)p->rz2);
    top();
  }
 return(0);
}

/*
 * Add a new MBK_LIVE MALBLOCK to the arena.
 */
static void arena_add_live(MEMSEG_PRIV_ARENA *a, MALBLOCK *b)
{
 b->bal = 0;
 b->l = 0;
 b->r = 0;
 arena_insert(&a->live,b,0);
}

/*
 * Remove an MBK_LIVE MALBLOCK from the arena.
 */
static void arena_remove_live(MEMSEG_PRIV_ARENA *a, MALBLOCK *b)
{
 MALBLOCK *p;
 MALBLOCK *l;
 MALBLOCK *r;
 MALBLOCK **pp;
 int dr;
 MALBLOCK *s;

 p = b->u;
 l = b->l;
 r = b->r;
 pp = p ? (p->l == b) ? &p->l : &p->r : &a->live;
 dr = p ? (p->l == b) ? 1 : -1 : 0;
 if (! b->r)
  { if (! b->l)
     { *pp = 0;
     }
    else
     { b->l->u = p;
       *pp = b->l;
     }
  }
 else if (! b->l)
  { b->r->u = p;
    *pp = b->r;
  }
 else if (! b->r->l)
  { b->r->l = b->l;
    b->l->u = b->r;
    b->r->u = p;
    *pp = b->r;
    p = b->r;
    p->bal = b->bal;
    dr = -1;
  }
 else
  { s = b->r;
    while (s->l) s = s->l;
    s->u->l = s->r;
    if (s->r) s->r->u = s->u;
    s->l = b->l;
    b->l->u = s;
    s->r = b->r;
    b->r->u = s;
    s->bal = b->bal;
    b = s->u;
    s->u = p;
    *pp = s;
    p = b;
    dr = 1;
  }
 if (p)
  { p->bal += dr;
    while <"delrebal"> (1)
     { switch (p->bal)
	{ case 0:
	     if (p->u)
	      { p->u->bal += (p == p->u->l) ? 1 : -1;
		p = p->u;
		continue;
	      }
	     break <"delrebal">;
	  case -1:
	  case 1:
	     break <"delrebal">;
	  case -2:
	  case 2:
	     s = p->u;
	     if (s)
	      { dr = s->bal;
		s->bal += (p == s->l) ? 1 : -1;
		if (arena_rebalance((p==s->l)?&s->l:&s->r,s))
		 { s->bal = dr;
		   break <"delrebal">;
		 }
		p = s;
		continue;
	      }
	     arena_rebalance(&a->live,0);
	     break <"delrebal">;
	  default:
	     panic("impossible delete balance");
	     break;
	}
     }
  }
}

/*
 * The guts of emulated malloc(), factored out because realloc() also
 *  wants to call it.  Assumes size has already been checked against
 *  ARENA_SIZE.  If size is zero, returns an allocation that is nothing
 *  but redzones.
 */
static MALBLOCK *emu_malloc_internal(MEMSEG_PRIV_ARENA *a, uint32_t size)
{
 uint32_t want;
 MALBLOCK *f;
 MALBLOCK *b;

 want = size + (2 * REDZONE);
 do <"found">
  { for (f=a->free;f;f=f->r) if (f->size >= want) break <"found">;
    trc(TRC_ARENA,"no free block is large enough, failing\n");
    return(0);
  } while (0);
 if (f->size < want + REDZONE + ALLOC_GRAIN + REDZONE)
  { trc(TRC_ARENA,"using whole free block: %08lx at %08lx\n",(ULI)f->size,(ULI)f->base);
    if (f->r) f->r->l = f->l;
    if (f->l) f->l->r = f->r; else a->free = f->r;
    b = f;
  }
 else
  { trc(TRC_ARENA,"found free block: %08lx at %08lx\n",(ULI)f->size,(ULI)f->base);
    b = malloc(sizeof(MALBLOCK));
  }
 b->kind = MBK_LIVE;
 b->rz1 = f->base;
 b->base = b->rz1 + REDZONE;
 b->size = size;
 b->end = b->base + size;
 b->rz2 = (b->end + REDZONE + ALLOC_GRAIN - 1) & ~(uint32_t)(ALLOC_GRAIN-1);
 if (f != b)
  { f->size -= b->rz2 - b->rz1;
    f->base = b->rz2;
  }
 arena_add_live(a,b);
 trc(TRC_ARENA,"returning %08lx (internal %p)\n",(ULI)b->base,(void *)b);
 return(b);
}

/*
 * The guts of emulated free(), factored out because realloc() also
 *  wants to call it.
 */
static void emu_free_internal(MEMSEG_PRIV_ARENA *a, MALBLOCK *b)
{
 arena_remove_live(a,b);
 b->kind = MBK_OLD;
 b->l = 0;
 b->r = a->old;
 if (b->r) b->r->l = b;
 a->old = b;
}

/*
 * Emulated malloc().  Input size is in %o0, return value replaces it.
 */
#if ALLOC_GRAIN & (ALLOC_GRAIN-1)
#error "emu_malloc code assumes ALLOC_GRAIN is a power of two"
#endif
static void emu_malloc(void)
{
 uint32_t size;
 MALBLOCK *b;

 size = s.regs[R_O0];
 trc(TRC_ARENA,"malloc(%lu)\n",(ULI)size);
 if (size > ARENA_SIZE-(2*REDZONE))
  { trc(TRC_ARENA,"huge allocation, failing\n");
    s.regs[R_O0] = 0;
    return;
  }
 b = emu_malloc_internal(malloc_arena()->priv,size);
 s.regs[R_O0] = b ? b->base : 0;
}

/*
 * Emulated free().  Input block pointer is in %o0.
 */
static void emu_free(void)
{
 MEMSEG *arena;
 MEMSEG_PRIV_ARENA *a;
 uint32_t eb;
 MALBLOCK *b;

 eb = s.regs[R_O0];
 trc(TRC_ARENA,"free(%08lx)\n",(ULI)eb);
 if (! eb) return;
 arena = malloc_arena();
 if ((eb < arena->base) || (eb >= arena->end))
  { trc(TRC_ARENA,"outside arena\n");
    printf("wild free(%08lx)\n",(ULI)eb);
    return;
  }
 a = arena->priv;
 b = arena_find_live(a,eb);
 if (! b)
  { trc(TRC_ARENA,"no block found\n");
    printf("unfound free(%08lx)\n",(ULI)eb);
    return;
  }
 trc(TRC_ARENA,"found, internal %p\n",(void *)b);
 emu_free_internal(a,b);
}

/*
 * Emulated realloc().  Input block pointer is in %o0, new size in %o1.
 */
static void emu_realloc(void)
{
 MEMSEG *arena;
 MEMSEG_PRIV_ARENA *a;
 uint32_t blk;
 uint32_t siz;
 MALBLOCK *b;
 MALBLOCK *n;
 uint32_t c;
 
 blk = s.regs[R_O0];
 siz = s.regs[R_O1];
 trc(TRC_ARENA,"realloc(%08lx,%08lx)\n",(ULI)blk,(ULI)siz);
 arena = malloc_arena();
 a = arena->priv;
 if (blk && ((blk < arena->base) || (blk >= arena->end)))
  { trc(TRC_ARENA,"outside arena\n");
    printf("wild realloc(%08lx)\n",(ULI)blk);
    return;
  }
 if (blk)
  { b = arena_find_live(a,blk);
    if (! b)
     { trc(TRC_ARENA,"no block found\n");
       printf("unfound realloc(%08lx)\n",(ULI)blk);
       s.regs[R_O0] = 0;
       return;
     }
  }
 else
  { b = 0;
  }
 n = emu_malloc_internal(a,siz);
 if (! b)
  { s.regs[R_O0] = 0;
    return;
  }
 if (b)
  { c = (siz < b->size) ? siz : b->size;
    if (c > 0) bcopy(&arena->data[b->base-arena->base],&arena->data[n->base-arena->base],c);
    emu_free_internal(a,b);
  }
 s.regs[R_O0] = n->base;
}

/*
 * Emulated calloc().  Input numbers are in %o0 and %o1.
 */
static void emu_calloc(void)
{
 MEMSEG *arena;
 uint32_t sz1;
 uint32_t sz2;
 uint64_t size;
 MALBLOCK *b;

 sz1 = s.regs[R_O0];
 sz2 = s.regs[R_O1];
 trc(TRC_ARENA,"calloc(%08lx,%08lx)\n",(ULI)sz1,(ULI)sz2);
 size = sz1 * (uint64_t)sz2;
 if (size > ARENA_SIZE-(2*REDZONE))
  { trc(TRC_ARENA,"huge allocation, failing\n");
    s.regs[R_O0] = 0;
    return;
  }
 arena = malloc_arena();
 b = emu_malloc_internal(arena->priv,size);
 if (b) bzero(&arena->data[b->base-arena->base],size);
 s.regs[R_O0] = b ? b->base : 0;
}

/*
 * Native exec: used to run a program in the underlying OS.  Most
 *  useful for programs like nc or copytolog for which the
 *  functionality is what matters and which OS they run under is more
 *  or less irrelevant.  (Of course, doing this reduces the ability to
 *  test the emulator.  That's inevitable.)
 *
 * Unlike execve(), this returns an errno on failure.  (On success, of
 *  course, it doesn't return at all.)
 *
 * This is simpler than sc_execve because we don't have the same vfork
 *  headaches; if we're in a vforked child, cleanup is automatic upon
 *  exec().
 *
 * We do, though, need to do a file-descriptor dance, to put
 *  underlying-OS file descriptors in the places the emulated program
 *  thinks they are, in case we are surrounded by redirections.  There
 *  are three kinds of file descriptors:
 *	(A) File descriptors that are open in the emulated world and
 *	     are not marked close-on-exec.  For these, we need to
 *	     arrange for the underlying fd to be at the emulated
 *	     descriptor number if the exec succeeds.
 *	(B) File descriptors that are open in the emulated world and
 *	     are marked close-on-exec.  For these, we need to arrange
 *	     for the underlying descriptor to be closed if the exec
 *	     succeeds.
 *	(C) File descriptors that are open in the emulator but that do
 *	     not exist in the emulated world.  For these, we need to
 *	     arrange for the underlying descriptor to be closed if the
 *	     exec succeeds.
 *  In all cases, everything should remain untouched if the exec fails.
 *  We run with emulator-world close-on-exec clear on everything, so we
 *  don't, for example, need to save the current state of close-on-exec
 *  on emulator-world descriptors.
 *
 * We have to be careful to not, for example, call trc() while we
 *  potentially have the trace-manager communication descriptor moved
 *  somewhere else.
 */
static void native_exec(void)
{
#if 0
 typedef struct dmove DMOVE;
 struct dmove {
   int from;
   int to;
   } ;
 uint32_t eav;
 uint32_t eep;
 int nargv;
 int nenvp;
 char *path;
 char **argv;
 char **envp;
 NULTERM_STATUS nts_path;
 NULTERM_STATUS *nts_av;
 NULTERM_STATUS *nts_ep;
 int i;

 path = nulterm_scarg(s.regs[R_O0],&nts_path);
 eav = s.regs[R_O1];
 eep = s.regs[R_O2];
 for (nargv=0;mem_get_4(eav+(nargv<<2));nargv++) ;
 for (nenvp=0;mem_get_4(eep+(nenvp<<2));nenvp++) ;
 trc(TRC_EXEC,"%s: path %s nargv %d nenvp %d\n",__func__,path,nargv,nenvp);
 argv = malloc((nargv+1)*sizeof(char *));
 envp = malloc((nenvp+1)*sizeof(char *));
 nts_av = malloc(nargv*sizeof(NULTERM_STATUS));
 nts_ep = malloc(nenvp*sizeof(NULTERM_STATUS));
 argv[nargv] = 0;
 for (i=0;i<nargv;i++)
  { argv[i] = nulterm_scarg(mem_get_4(eav+(i<<2)),&nts_av[i]);
    trc(TRC_EXEC,"argv[%d] = %s\n",i,argv[i]);
  }
 envp[nenvp] = 0;
 for (i=0;i<nenvp;i++)
  { envp[i] = nulterm_scarg(mem_get_4(eep+(i<<2)),&nts_ep[i]);
    trc(TRC_EXEC,"envp[%d] = %s\n",i,envp[i]);
  }


 /*
  * Scan all emulated-world fds, compiling a list of those which are
  *  actually open (and, of those, which are marked CLEX).
  */
 emaxfd = -1;
 efds = malloc(nfds);
 for (i=nfds-1;i>=0;i--)
  { if (fds[i])
     { if (i > emaxfd) emaxfd = i;
       efds[i] = (fds[i]->flags & FDF_CLEX) ? 2 : 1;
     }
    else
     { efds[i] = 0;
     }
  }
 /*
  * Scan all emulator-world fds, recording which are actually open.
  */
 omaxfd = fcntl(0,F_MAXFD,0);
 ofds = malloc(omaxfd+1);
 for (i=omaxfd;i>=0;i--)
  { ofds[i] = ! ((fcntl(i,F_GETFD,0) == -1) && (errno == EBADF));
  }
 /*
  * Find emulated-world fds which we want to keep open, but which
  *  aren't already at the correct emulator-world descriptor.  These
  *  are descriptors we need to move.  Also figure out which of them
  *  have something else there in the emulator already; these need
  *  saving elsewhere.
  */
 ne2o = 0;
 nsave = 0;
 e2o_move = 0;
 ae2o = 0;
 save_move = 0;
 asave = 0;
 spare = -1;
 for (i=nfds-1;i>=0;i--)
  { if ((efds[i] == 1) && (fds[i]->fd != i))
     { if (ne2o >= ae2o) e2o_move = realloc(e2o_move,(ae2o=ne2o+8)*sizeof(DMOVE));
       e2o_move[ne2o] = (DMOVE) { .from = fds[i]->fd, .to = i };
       ne2o ++;
       if ((i <= omaxfd) && ofds[i])
	{ if (nsave >= save) save_moved = reallco(save_moved,(asave=nsave+8)*sizeof(DMOVE));
	  do spare ++; while (((spare < nfds) && efds[spare]) || ((spare <= omaxfd) && ofds[spare]));
	  save_moved[nsave] = (DMOVE) { .from = i, .to = spare };
	  nsave ++;
	}
     }
  }
 /*
  * Do all stashing of descriptors we need to.
  */
 for (i=nsave-1;i>=0;i--)
  { if (dup2(save_moved[i].from,save_moved[i].to) < 0)
     { /*
	* Aak, we can't save it!  Record errno, close any stashed
	*  copies we've already made, and fail.  We don't need to dup2
	*  back because all saved copies are to unused descriptors; the
	*  original descriptors are all still open.  We also don't need
	*  to fiddle CLEX because we set CLEX on the copy, not the
	*  original.
	*/
       e = errno;
       for (i++;i<nsave;i++) close(save_moved[i].to);
       free(e2o_move);
       free(save_moved);
       return(e);
     }
    // Set the saved copy CLEX.
    fcntl(save_moved[i].to,F_SETFD,FD_CLOEXEC);
  }
 /*
  * Move all emulated descriptors' underlying descriptors to their
  *  final locations.  This is the beginning of the time during which
  *  we can't do anything (like trc()) that depends on emulator-world
  *  descriptors being where they should be.
  *
  * We have to be careful, though, in case the from descriptor is one
  *  that got saved above.
  */
 for (i=ne2o-1;i>=0;i--)
  { if (dup2(e2o_moved[i].from,e2o_moved[i].to) < 0)
     { /*
	* Aak!  This should never happen.  The only way I can see it
	*  happening is if we're moving to a high descriptor that calls
	*  for expanding the fd array and that expansion failed.  In
	*  any case, save errno, back out what we've done, and fail.
	*  That's unlikely enough I'm willing to crash the emulator in
	*  this case.
	*/
       fprintf(stderr,"Impossible dup2() failure rearranging descriptors: %s\n",strerror(errno));
       exit(1);
     }
  }
#else
 s.regs[R_O0] = em_ENOSYS;
#endif
}

/*
 * Do I/O: construct a struct iovec array for the emulated buffer(s),
 *  breaking it up at MEMSEG boundaries as necessary, and do the I/O.
 *
 * The way this tests accessibility is not quite right.  I think you
 *  can, for example, read() into a buffer of which only the beginning
 *  is accessible, provided the data read doesn't actually spill over
 *  into the inaccessible part.  This errors if any part of the
 *  provided buffer is inaccessible.  But so far this seems to be good
 *  enough in practice.
 */
static int io_rw(int niov, IOV (*getiov)(int, void *), int prot, int (*doio)(struct iovec *, int, void *), void *priv, const char *call)
{
 static int iov_a = 0;
 static struct iovec *iov_v = 0;
 int iov_n;
 int i;
 IOV iov;
 MEMSEG *ms;
 uint32_t part;

 iov_n = 0;
 for (i=0;i<niov;i++)
  { iov = (*getiov)(i,priv);
    while (iov.len > 0)
     { ms = memseg_find(iov.base,0,call);
       if (! (ms->prot & prot))
	{ printf("%d: %s: %08lx: not accessible\n",mypid,call,(ULI)iov.base);
	  trc(TRC_ERR,"%s: %08lx: not accessible\n",call,(ULI)iov.base);
	  top();
	}
       part = ms->end - iov.base;
       if (part > iov.len) part = iov.len;
       (*ms->ops->check)(ms,iov.base-ms->base,part,prot);
       if (iov_n >= iov_a) iov_v = realloc(iov_v,(iov_a=iov_n+8)*sizeof(*iov_v));
       iov_v[iov_n++] = (struct iovec) { .iov_base = ms->data + (iov.base - ms->base), .iov_len = part };
       iov.base += part;
       iov.len -= part;
     }
  }
 return((*doio)(iov_v,iov_n,priv));
}

/*
 * Internal to sc_{,p}{read,write}: get the IOV.
 */
static IOV getiov_rw(int n, void *pv)
{
 if (n) panic("impossible");
 return(((IO_PRIV_RW *)pv)->iov);
}

/*
 * Internal to sc_{,p}{read,write}v: get an IOV.
 */
static IOV getiov_rwv(int n, void *pv)
{
 return((IOV){.base=((IO_PRIV_RWV *)pv)->iov[n][0],.len=((IO_PRIV_RWV *)pv)->iov[n][1]});
}

/*
 * Internal to sc_read and sc_readv: do the read.
 */
static int doio_read(struct iovec *iov, int niov, void *pv)
{
 return(readv(((IO_PRIV_RW *)pv)->fd->fd,iov,niov));
}

/*
 * Internal to sc_pread and sc_preadv: do the read.
 */
static int doio_pread(struct iovec *iov, int niov, void *pv)
{
 return(preadv(((IO_PRIV_RW *)pv)->fd->fd,iov,niov,((IO_PRIV_RW *)pv)->off));
}

/*
 * Internal to sc_write and sc_writev: do the write.
 */
static int doio_write(struct iovec *iov, int niov, void *pv)
{
 return(writev(((IO_PRIV_RW *)pv)->fd->fd,iov,niov));
}

/*
 * Internal to sc_pwrite and sc_pwritev: do the write.
 */
static int doio_pwrite(struct iovec *iov, int niov, void *pv)
{
 return(pwritev(((IO_PRIV_RW *)pv)->fd->fd,iov,niov,((IO_PRIV_RW *)pv)->off));
}

/*
 * Implement exit(2).
 */
static SYSCALL_IMPL(sc_exit)
{
 uint32_t ec;

 ec = scarg(args,0);
 trc(TRC_SYSCALL,"exit %lu\n",(ULI)ec);
 exit(ec);
}

/*
 * Implement fork(2).
 *
 * The return semantics of fork()-the-syscall are undocumented.
 *  UTSLing reveals a somewhat schizoid mismatch.  Comments in the libc
 *  wrapper claim the child returns <parent,1> while the parent returns
 *  <child,0>.  But, looking at the kernel source, it looks to me as
 *  though the child returns <0,1>, a mismatch which has probably gone
 *  unnoticed because the libc wrapper immediately throws away the
 *  first return value in the child.  This is also true of __vfork14,
 *  and probably any other fork()ish syscalls.  Why this, rather than
 *  just returning what is documented as the return value in %o0, I
 *  have no idea.
 *
 * We go with what the kernel actually does.
 */
static SYSCALL_IMPL(sc_fork)
{
 pid_t kid;
 pid_t parent;

 fflush(0);
 kid = fork();
 if (kid < 0) SYSCALL_ERR(os2em_errno(errno));
 if (kid == 0)
  { parent = mypid;
    s.noninteractive = 1;
    if (forkwait) do_forkwait();
    mypid = getpid();
    trcmgr_newpid(mypid);
    trc(TRC_PROC,"fork child, parent %lu\n",(ULI)parent);
  }
 else
  { trc(TRC_PROC,"fork parent, child %lu\n",(ULI)kid);
  }
 SYSCALL_RET2(kid?:0,!kid);
}

/*
 * Implement read(2).
 */
static SYSCALL_IMPL(sc_read)
{
 uint32_t d;
 IO_PRIV_RW priv;
 int n;

 syscall_restartable = 1;
 d = scarg(args,0);
 priv.iov.base = scarg(args,1);
 priv.iov.len = scarg(args,2);
 trc(TRC_SYSCALL,"read %ld, %08lx, %lu\n",(LI)(int32_t)d,(ULI)priv.iov.base,(ULI)priv.iov.len);
 priv.fd = descriptor_arg(d,P_R,"read");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.iov.len < 1)
  { trc(TRC_SYSCALL,"read -> 0\n");
    SYSCALL_RET(0);
  }
 n = io_rw(1,&getiov_rw,P_W,&doio_read,&priv,"read");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"read -> error %d (%s)\n",n,em_strerror(n));
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"read -> %d\n",n);
 trace_io_data_em("data",priv.iov.base,n);
 SYSCALL_RET(n);
}

/*
 * Implement write(2).
 */
static SYSCALL_IMPL(sc_write)
{
 uint32_t d;
 IO_PRIV_RW priv;
 int n;

 syscall_restartable = 1;
 d = scarg(args,0);
 priv.iov.base = scarg(args,1);
 priv.iov.len = scarg(args,2);
 trc(TRC_SYSCALL,"write %ld, %08lx, %lu\n",(LI)(int32_t)d,(ULI)priv.iov.base,(ULI)priv.iov.len);
 priv.fd = descriptor_arg(d,P_W,"write");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.iov.len < 1)
  { trc(TRC_SYSCALL,"write -> 0\n");
    SYSCALL_RET(0);
  }
 n = io_rw(1,&getiov_rw,P_R,&doio_write,&priv,"write");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"write err %d (%s)\n",n,em_strerror(n));
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"write -> %d\n",n);
 trace_io_data_em("data",priv.iov.base,n);
 SYSCALL_RET(n);
}

/*
 * Implement open(2).
 *
 * This is an interesting case, because, when O_CREAT is not set, the
 *  third arg is not actually used.  We load it only when we have to,
 *  for the sake of unset-value tracking, and pass 0 to the underlying
 *  open() if O_CREAT is not used.
 *
 * There is an ugly hack here.  If we just do this naïvely, opening
 *  /dev/stdout opens the emulator's stdout, not the emulated
 *  program's.  To make /dev/stdout, /dev/fd/, etc, work right, we
 *  kludge it here: if the thing being open()ed stats as a character
 *  special device, and its major matches /dev/stdout's, then we
 *  convert the open into a dup, somewhat a la the kernel's handling of
 *  such opens.  (If there is no /dev/stdout, then the whole thing is
 *  suppressed; we assume that any system with fd-duping opens is set
 *  up with /dev/stdout.)
 *
 * We could do string comparison sensing instead, special-casing
 *  "/dev/stdout", "/dev/fd/4", etc.  I think I prefer this, though
 *  it's a close call.
 */
static SYSCALL_IMPL(sc_open)
{
 const char *path;
 uint32_t how;
 uint32_t perm;
 int oshow;
 int osfd;
 int e;
 int fdp;
 NULTERM_STATUS nts;
 uint32_t d;
 static int stdio_major = -1;
 struct stat stb;
 FD *ofd;

 if (stdio_major == -1)
  { if ( (stat("/dev/stdout",&stb) < 0) ||
	 ((stb.st_mode & S_IFMT) != S_IFCHR) )
     { stdio_major = -2;
     }
    else
     { stdio_major = major(stb.st_rdev);
     }
  }
 path = nulterm_scarg(scarg(args,0),&nts);
 how = scarg(args,1);
 switch (how & em_O_ACCMODE)
  { case em_O_RDONLY:   oshow = O_RDONLY;   fdp = P_R;     break;
    case em_O_WRONLY:   oshow = O_WRONLY;   fdp = P_W;     break;
    case em_O_RDWR:     oshow = O_RDWR;     fdp = P_R|P_W; break;
    case em_O_NOACCESS: oshow = O_NOACCESS; fdp = 0;       break;
  }
#define F(x) do { if (how & em_##x) oshow |= x; } while (0)
 F(O_NONBLOCK);
 F(O_APPEND);
 F(O_SHLOCK);
 F(O_EXLOCK);
 F(O_ASYNC);
 F(O_SYNC);
 F(O_CREAT);
 F(O_TRUNC);
 F(O_EXCL);
 F(O_DSYNC);
 F(O_RSYNC);
 F(O_ALT_IO);
 F(O_NOCTTY);
 F(O_DIRECTORY);
 F(O_PLAIN);
#undef F
 perm = (how & em_O_CREAT) ? scarg(args,2) : 0;
 if ( (stat(path,&stb) >= 0) &&
      ((stb.st_mode & S_IFMT) == S_IFCHR) &&
      (major(stb.st_rdev) == stdio_major) )
  { ofd = descriptor_arg(minor(stb.st_rdev),0,"dup-open");
    if (! ofd) SYSCALL_ERR(em_EBADF);
    if (fdp & ~ofd->prot) SYSCALL_ERR(em_EACCES);
    osfd = dup(ofd->fd);
    if (osfd < 0) SYSCALL_ERR(os2em_errno(errno));
  }
 else
  { osfd = open(path,oshow,perm);
    if (osfd < 0)
     { e = errno;
       nulterm_done(&nts);
       SYSCALL_ERR(os2em_errno(e));
     }
  }
 d = new_fd(osfd,0,fdp);
 nulterm_done(&nts);
 SYSCALL_RET(d);
}

/*
 * Implement close(2).
 */
static SYSCALL_IMPL(sc_close)
{
 uint32_t d;
 FD *fd;
 int e;

 d = scarg(args,0);
 fd = descriptor_arg(d,0,"close");
 if (! fd) SYSCALL_ERR(em_EBADF);
 add_vfork_backout(VFB_CLOSE,d,*fd);
 e = close(fd->fd);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 fds[d] = 0;
 free(fd);
 SYSCALL_RET(0);
}

/*
 * Implement wait4(2).
 *
 * The most complicated part here is converting the status value.
 */
static SYSCALL_IMPL(sc_wait4)
{
 uint32_t wpid;
 uint32_t statusp;
 uint32_t options;
 uint32_t rusagep;
 int e;
 int st;
 struct rusage ru;

 trc(TRC_MAGIC,"wait4(%08lx,%08lx,%08lx,%08lx)\n",(ULI)scarg(args,0),(ULI)scarg(args,1),(ULI)scarg(args,2),(ULI)scarg(args,3));
 if ( (scarg(args,0) == 0x456d756c) &&	// ASCII Emul
      (scarg(args,1) == 0x61746f72) &&	// ASCII ator
      (scarg(args,2) == 0x4d616769) &&	// ASCII Magi
      (scarg(args,3) == 0x633a2d29) )	// ASCII c:-)
  { s.flags |= SF_EMU_MAGIC;
    trc(TRC_MAGIC,"Magic syscall set EMU_MAGIC\n");
    SYSCALL_ERR(em_EINPROGRESS);
  }
 wpid = scarg(args,0);
 statusp = scarg(args,1);
 options = scarg(args,2);
 rusagep = scarg(args,3);
 e = wait4(
	(wpid & 0x80000000) ? -(int)(int32_t)(uint32_t)-wpid : (int)wpid,
	&st,
	((options & em_WUNTRACED) ? WUNTRACED : 0) |
		((options & em_WNOHANG) ? WNOHANG : 0) |
		((options & em_WALTSIG) ? WALTSIG : 0) |
		((options & em_WNOREAP) ? WNOREAP : 0),
	&ru );
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 if (e == 0) SYSCALL_RET(0);
 if (statusp)
  { if (em_WIFEXITED(st))
     { mem_set_4(statusp,em_W_EXITCODE(WEXITSTATUS(st),0));
     }
    else if (em_WIFSIGNALED(st))
     { mem_set_4(statusp,em_W_DEADSIG(WTERMSIG(st),WCOREDUMP(st)));
     }
    else if (em_WIFSTOPPED(st))
     { mem_set_4(statusp,em_W_STOPCODE(WSTOPSIG(st)));
     }
    else
     { printf("Undecipherable wait4 status %#x\n",st);
       top();
     }
  }
 SYSCALL_RET(e);
}

/*
 * Implement link(2).
 */
static SYSCALL_IMPL(sc_link)
{
 const char *p1;
 const char *p2;
 NULTERM_STATUS nts1;
 NULTERM_STATUS nts2;
 int e;

 p1 = nulterm_scarg(scarg(args,0),&nts1);
 p2 = nulterm_scarg(scarg(args,1),&nts2);
 if (link(p1,p2) < 0)
  { e = errno;
    nulterm_done(&nts1);
    nulterm_done(&nts2);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts1);
 nulterm_done(&nts2);
 SYSCALL_RET(0);
}

/*
 * Implement unlink(2).
 */
static SYSCALL_IMPL(sc_unlink)
{
 const char *path;
 NULTERM_STATUS nts;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 if (unlink(path) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement chdir(2).
 */
static SYSCALL_IMPL(sc_chdir)
{
 const char *path;
 NULTERM_STATUS nts;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 if (chdir(path) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement fchdir(2).
 */
static SYSCALL_IMPL(sc_fchdir)
{
 uint32_t d;
 FD *fd;

 d = scarg(args,0);
 fd = descriptor_arg(d,0,"fchdir");
 if (! fd) SYSCALL_ERR(em_EBADF);
 if (fchdir(fd->fd) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement chmod(2).
 */
static SYSCALL_IMPL(sc_chmod)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t mode;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 mode = scarg(args,1);
 if (chmod(path,mode) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement chown(2).
 */
static SYSCALL_IMPL(sc_chown)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t eu;
 uint32_t eg;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 eu = scarg(args,1);
 eg = scarg(args,2);
 if (chown(path,(eu==-(uint32_t)1)?-1:eu,(eg==-(uint32_t)1)?-1:eg) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement break(2).  This is the actual syscall underlying brk() and
 *  sbrk(), with the differences being dealt with by libc.
 */
static SYSCALL_IMPL(sc_break)
{
 uint32_t newbrk;
 MEMSEG *ms;

 newbrk = scarg(args,0);
 newbrk = ROUND_UP(newbrk,PAGE_SIZE);
 if (newbrk > MAXDSIZE)
  { printf("break: %08lx exceeds data size limit\n",(ULI)newbrk);
    top();
  }
 if (vm.dbrk == newbrk)
  { trc(TRC_SYSCALL,"break unchanged at %08lx\n",(ULI)vm.dbrk);
  }
 else
  { trc(TRC_SYSCALL,"break %08lx -> %08lx\n",(ULI)vm.dbrk,(ULI)newbrk);
  }
 if (newbrk > vm.dbrk)
  { ms = memseg_new_malloc(vm.dbrk,newbrk-vm.dbrk,P_R|P_W);
    bzero(ms->data,ms->size);
    memseg_clear_conflict(ms->base,ms->size,ms);
  }
 else if (newbrk < vm.dbrk)
  { memseg_clear_conflict(newbrk,vm.dbrk-newbrk,0);
  }
 vm.dbrk = newbrk;
 vm_changed = 1;
 SYSCALL_RET(0);
}

/*
 * Implement getfsstat(2).
 *
 * There is a complication here.  For some underlying OS versions, we
 *  have to use getvfsstat() instead of getfsstat(); see the comment
 *  near the head of this file, where STATFS_VIA_STATVFS and
 *  GETFSSTAT_VIA_GETVFSSTAT are potentially set.
 */
static SYSCALL_IMPL(sc_getfsstat)
{
#ifdef GETFSSTAT_VIA_GETVFSSTAT
 struct statvfs *osbuf;
#define osWAIT ST_WAIT
#define osNOWAIT ST_NOWAIT
#define osCALL getvfsstat
#define osSTORE store_statvfs_as_statfs
#else
 struct statfs *osbuf;
#define osWAIT MNT_WAIT
#define osNOWAIT MNT_NOWAIT
#define osCALL getfsstat
#define osSTORE store_statfs
#endif
 uint32_t embuf;
 uint32_t n;
 uint32_t emflags;
 int osflags;
 int e;
 int i;

 embuf = scarg(args,0);
 n = scarg(args,1);
 emflags = scarg(args,2);
 osflags = ((emflags & em_MNT_WAIT) ? osWAIT : 0) |
	   ((emflags & em_MNT_NOWAIT) ? osNOWAIT : 0);
 n /= 256;
 if (embuf == 0)
  { e = osCALL(0,n,osflags);
    if (e < 0) SYSCALL_ERR(os2em_errno(errno));
    SYSCALL_RET(e);
  }
 else
  { osbuf = malloc(n*sizeof(*osbuf));
    if (! osbuf)
     { printf("Out of memory allocating getfsstat() buffer\n");
       top();
     }
    e = osCALL(osbuf,n*sizeof(*osbuf),osflags);
    if (e < 0) SYSCALL_ERR(os2em_errno(errno));
    for (i=0;i<e;i++) osSTORE(embuf+(i*256),&osbuf[i],"statfs",&free,osbuf);
    SYSCALL_RET(e);
  }
#undef osWAIT
#undef osNOWAIT
#undef osCALL
#undef osSTORE
}

/*
 * Implement getpid(2).
 *
 * Note that getpid-the-syscall returns both PID and parent PID.
 */
static SYSCALL_IMPL(sc_getpid)
{
 SYSCALL_RET2(getpid(),getppid());
}

/*
 * Implement setuid(2).
 */
static SYSCALL_IMPL(sc_setuid)
{
 if (setuid(scarg(args,0)) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement getuid(2).
 */
static SYSCALL_IMPL(sc_getuid)
{
 SYSCALL_RET(getuid());
}

/*
 * Implement geteuid(2).
 */
static SYSCALL_IMPL(sc_geteuid)
{
 SYSCALL_RET(geteuid());
}

/*
 * Implement recvfrom(2).
 *
 * XXX Leverage io_rw maybe?
 */
static SYSCALL_IMPL(sc_recvfrom)
{
 uint32_t d;
 uint32_t buf;
 uint32_t len;
 uint32_t emflags;
 uint32_t from;
 uint32_t fromlen;
 char *osbuf;
 char *osfrom;
 int osflags;
 socklen_t osfromlen;
 FD *fd;
 int n;
 void *freev[2];
 int plen;

 d = scarg(args,0);
 buf = scarg(args,1);
 len = scarg(args,2);
 emflags = scarg(args,3);
 from = scarg(args,4);
 fromlen = scarg(args,5);
 fd = descriptor_arg(d,P_R,"recvfrom");
 if (! fd) SYSCALL_ERR(em_EBADF);
 osflags = em2os_MSG_flags(emflags);
 if (len)
  { osbuf = malloc(len);
    if (! osbuf)
     { printf("Out of memory allocating recvfrom() data buffer\n");
       top();
     }
  }
 else
  { osbuf = 0;
  }
 if (fromlen)
  { osfromlen = 256;
    osfrom = malloc(256);
    if (! osfrom)
     { printf("Out of memory allocating recvfrom() address buffer\n");
       free(osbuf);
       top();
     }
    n = recvfrom(fd->fd,osbuf,len,osflags,(void *)osfrom,&osfromlen);
  }
 else
  { n = recvfrom(fd->fd,osbuf,len,osflags,0,0);
    osfrom = 0;
  }
 if (n < 0)
  { n = os2em_errno(errno);
    free(osbuf);
    free(osfrom);
    trc(TRC_SYSCALL,"recvfrom -> error %d (%s)\n",n,em_strerror(n));
    SYSCALL_ERR(n);
  }
 freev[0] = osbuf;
 freev[1] = osfrom;
 copyout(osbuf,buf,n,"recvfrom",&free2,&freev[0]);
 trace_io_data_os("data",buf,osbuf,n);
 if (fromlen)
  { /*
     * Ideally, I would like to trace the from address and its length
     *	separately, reporting the data ctually written in each case,
     *	even if the address buffer and the length overlap.  But that
     *	requires more code restructuring than I want to get into now,
     *	so I'm punting and just reporting the final values of each.
     */
    put_sockaddr(osfrom,osfromlen,from,fromlen,&plen);
    trace_io_data_em("from address",from,plen);
    trace_io_data_em("from length",fromlen,4);
  }
 free(osbuf);
 free(osfrom);
 trc(TRC_SYSCALL,"recvfrom -> %d\n",n);
 SYSCALL_RET(n);
}

/*
 * Implement access(2).
 */
static SYSCALL_IMPL(sc_access)
{
 const char *path;
 uint32_t how;
 int oshow;
 int e;
 NULTERM_STATUS nts;

 path = nulterm_scarg(scarg(args,0),&nts);
 how = scarg(args,1);
 trc(TRC_SYSCALL,"access %s, %08lx (%c%c%c)",path,(ULI)how,(how&em_R_OK)?'R':'-',(how&em_W_OK)?'W':'-',(how&em_X_OK)?'X':'-');
 oshow = ((how & em_R_OK) ? R_OK : 0) | ((how & em_W_OK) ? W_OK : 0) | ((how & em_X_OK) ? X_OK : 0);
 e = access(path,oshow);
 if (e < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement fchflags(2).
 */
static SYSCALL_IMPL(sc_fchflags)
{
 uint32_t d;
 FD *fd;
 uint32_t emflags;
 unsigned long int osflags;

 d = scarg(args,0);
 emflags = scarg(args,1);
 fd = descriptor_arg(d,0,"fchmod");
 if (! fd) SYSCALL_ERR(em_EBADF);
 osflags = 0;
 if (emflags & em_UF_NODUMP)    osflags |= UF_NODUMP;
 if (emflags & em_UF_IMMUTABLE) osflags |= UF_IMMUTABLE;
 if (emflags & em_UF_APPEND)    osflags |= UF_APPEND;
 if (emflags & em_UF_OPAQUE)    osflags |= UF_OPAQUE;
 if (emflags & em_SF_ARCHIVED)  osflags |= SF_ARCHIVED;
 if (emflags & em_SF_IMMUTABLE) osflags |= SF_IMMUTABLE;
 if (emflags & em_SF_APPEND)    osflags |= SF_APPEND;
 if (emflags & ~(em_UF_NODUMP | em_UF_IMMUTABLE | em_UF_APPEND |
		 em_UF_OPAQUE | em_SF_ARCHIVED | em_SF_IMMUTABLE |
		 em_SF_APPEND))
  { printf("fchflags: unrecognized flag bits in 0x%08lx\n",(ULI)emflags);
    top();
  }
 if (fchflags(fd->fd,osflags) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement kill(2).
 */
static SYSCALL_IMPL(sc_kill)
{
 uint32_t pid;
 uint32_t emsig;
 int ossig;
 int e;

 pid = scarg(args,0);
 emsig = scarg(args,1);
 ossig = em2os_signal(emsig);
 if ((ossig == 0) && emsig) SYSCALL_ERR(em_EINVAL);
 trc(TRC_SIGNAL,"kill(2) %lu with %lu->%d\n",(ULI)pid,(ULI)emsig,ossig);
 e = kill(pid,ossig);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement getppid(2).
 */
static SYSCALL_IMPL(sc_getppid)
{
 SYSCALL_RET(getppid());
}

/*
 * Implement dup(2).
 */
static SYSCALL_IMPL(sc_dup)
{
 uint32_t od;
 FD *ofd;
 int osnew;
 uint32_t emnew;

 od = scarg(args,0);
 ofd = descriptor_arg(od,0,"dup");
 if (! ofd) SYSCALL_ERR(em_EBADF);
 osnew = dup(ofd->fd);
 if (osnew < 0) SYSCALL_ERR(os2em_errno(errno));
 emnew = new_fd(osnew,0,ofd->prot);
 SYSCALL_RET(emnew);
}

/*
 * Implement pipe(2).
 *
 * Arguably should use the underlying OS's pipe(2), but 1.4T
 *  implemented pipes with AF_LOCAL sockets, so we do too, though I
 *  doubt much if any software cares about the differences.
 */
static SYSCALL_IMPL(sc_pipe)
{
 int osfd[2];
 int emfd[2];

 if (socketpair(AF_LOCAL,SOCK_STREAM,0,&osfd[0]) < 0) SYSCALL_ERR(os2em_errno(errno));
 emfd[0] = new_fd(osfd[0],0,P_R);
 emfd[1] = new_fd(osfd[1],0,P_W);
 SYSCALL_RET2(emfd[0],emfd[1]);
}

/*
 * Implement getegid(2).
 */
static SYSCALL_IMPL(sc_getegid)
{
 SYSCALL_RET(getegid());
}

/*
 * Implement getgid(2).
 */
static SYSCALL_IMPL(sc_getgid)
{
 SYSCALL_RET(getgid());
}

/*
 * Implement __getlogin, the syscall behind getlogin().
 *
 * Fortunately, it's relatively easy to implement __getlogin in terms
 *  of getlogin.
 */
static SYSCALL_IMPL(sc___getlogin)
{
 uint32_t ptr;
 uint32_t len;
 const char *l;

 ptr = scarg(args,0);
 len = scarg(args,1);
 if (len > em_MAXLOGNAME) len = em_MAXLOGNAME;
 l = getlogin();
 if (! l) l = "";
 copy_or_nulpad(l,strlen(l),ptr,len,"__getlogin string",0,0);
 SYSCALL_RET(0);
}

/*
 * Implement ioctl(2).
 *
 * We implement only a handful of ioctls.
 */
static SYSCALL_IMPL(sc_ioctl)
{
 uint32_t d;
 uint32_t ioc;
 uint32_t arg;
 FD *fd;
 int e;

 d = scarg(args,0);
 ioc = scarg(args,1);
 fd = descriptor_arg(d,0,"ioctl");
 if (! fd) SYSCALL_ERR(em_EBADF);
 switch (ioc)
  { case em_TIOCGETA:
	{ struct termios tio;
	  arg = scarg(args,2);
	  e = ioctl(fd->fd,TIOCGETA,&tio);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  os2em_termios(&tio,arg);
	  SYSCALL_RET(0);
	}
       break;
    case em_TIOCGPGRP:
	{ int iv;
	  arg = scarg(args,2);
	  e = ioctl(fd->fd,TIOCGPGRP,&iv);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  mem_set_4(arg,iv);
	  SYSCALL_RET(0);
	}
       break;
    case em_TIOCSPGRP:
	{ int iv;
	  arg = scarg(args,2);
	  iv = mem_get_4(arg);
	  e = ioctl(fd->fd,TIOCSPGRP,&iv);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  SYSCALL_RET(0);
	}
       break;
    case em_TIOCGWINSZ:
	{ struct winsize wsz;
	  arg = scarg(args,2);
	  e = ioctl(fd->fd,TIOCGWINSZ,&wsz);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  mem_set_2(arg,wsz.ws_row);
	  mem_set_2(arg+2,wsz.ws_col);
	  mem_set_2(arg+4,wsz.ws_xpixel);
	  mem_set_2(arg+6,wsz.ws_ypixel);
	  SYSCALL_RET(0);
	}
       break;
    case em_TIOCSWINSZ:
	{ struct winsize wsz;
	  arg = scarg(args,2);
	  wsz.ws_row = mem_get_2(arg);
	  wsz.ws_col = mem_get_2(arg+2);
	  wsz.ws_xpixel = mem_get_2(arg+4);
	  wsz.ws_ypixel = mem_get_2(arg+6);
	  e = ioctl(fd->fd,TIOCSWINSZ,&wsz);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  SYSCALL_RET(0);
	}
       break;
    case em_FIOCLEX:
       fd->flags |= FDF_CLEX;
       SYSCALL_RET(0);
       break;
    case em_TIOCSETAW:
	{ struct termios tio;
	  arg = scarg(args,2);
	  em2os_termios(arg,&tio);
	  e = ioctl(fd->fd,TIOCSETAW,&tio);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  SYSCALL_RET(0);
	}
       break;
    case em_FIONCLEX:
       fd->flags &= ~FDF_CLEX;
       SYSCALL_RET(0);
       break;
    case em_TIOCSETA:
	{ struct termios tio;
	  arg = scarg(args,2);
	  em2os_termios(arg,&tio);
	  e = ioctl(fd->fd,TIOCSETA,&tio);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  SYSCALL_RET(0);
	}
       break;
    case em_TIOCGETD:
       arg = scarg(args,2);
       mem_set_4(arg,0); // 0 is only ldisc we support
       SYSCALL_RET(0);
       break;
    case em_TIOCSETAF:
	{ struct termios tio;
	  arg = scarg(args,2);
	  em2os_termios(arg,&tio);
	  e = ioctl(fd->fd,TIOCSETAF,&tio);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  SYSCALL_RET(0);
	}
       break;
    case em_MTIOCGET:
       // XXX should we do a real MTIOCGET?
       SYSCALL_ERR(em_ENOTTY);
       break;
    case em_FIONREAD:
	{ int iv;
	  arg = scarg(args,2);
	  e = ioctl(fd->fd,FIONREAD,&iv);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  mem_set_4(arg,iv);
	  SYSCALL_RET(0);
	}
       break;
    case em_FIONBIO:
	{ int iv;
	  iv = mem_get_4(scarg(args,2)) ? 1 : 0;
	  e = ioctl(fd->fd,FIONBIO,&iv);
	  if (e < 0) SYSCALL_ERR(os2em_errno(e));
	  SYSCALL_RET(0);
	}
       break;
    // When adding cases to this switch, add them to print_special_IOCTL too
  }
 printf("Unimplemented ioctl %08lx = ",(ULI)ioc);
 print_decoded_ioctl(stdout,ioc);
 printf("\n");
 top();
}

/*
 * Implement revoke(2).
 */
static SYSCALL_IMPL(sc_revoke)
{
 const char *p;
 NULTERM_STATUS nts;
 int e;

 p = nulterm_scarg(scarg(args,0),&nts);
 if (revoke(p) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement symlink(2).
 */
static SYSCALL_IMPL(sc_symlink)
{
 const char *p1;
 const char *p2;
 NULTERM_STATUS nts1;
 NULTERM_STATUS nts2;
 int e;

 p1 = nulterm_scarg(scarg(args,0),&nts1);
 p2 = nulterm_scarg(scarg(args,1),&nts2);
 if (symlink(p1,p2) < 0)
  { e = errno;
    nulterm_done(&nts1);
    nulterm_done(&nts2);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts1);
 nulterm_done(&nts2);
 SYSCALL_RET(0);
}

/*
 * Implement readlink(2).
 */
static SYSCALL_IMPL(sc_readlink)
{
 unsigned char buf[65536];
 uint32_t l;
 int n;
 const char *path;
 int i;
 uint32_t bufp;
 NULTERM_STATUS nts;

 path = nulterm_scarg(scarg(args,0),&nts);
 bufp = scarg(args,1);
 l = scarg(args,2);
 if (l > 65536) l = 65536;
 n = readlink(path,&buf[0],l);
 if (n < 0)
  { i = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(i));
  }
 nulterm_done(&nts);
 trc(TRC_SYSCALL,"readlink result %.*s\n",n,&buf[0]);
 for (i=0;i<n;i++) mem_set_1(bufp+i,buf[i]);
 SYSCALL_RET(n);
}

/*
 * Implement execve(2).
 *
 * If the exec succeeds, and we're a vforked child, we have to arrange
 *  for the vfork parent to back out in-memory changes made internal to
 *  the exec.  This mostly means destroying the new VM space, which is
 *  what vfork_dropvm is for; we have to be careful that there is
 *  nothing else needing destruction when we vforkbreak().
 */
static SYSCALL_IMPL(sc_execve)
{
 const char *path;
 uint32_t emargv;
 uint32_t emenvp;
 NULTERM_STATUS nts_path;
 VM oldvm;
 STATE oldstate;
 const char **argv;
 int nargv;
 const char **envp;
 int nenvp;
 int i;
 int e;
 NULTERM_STATUS *nts_argv;
 NULTERM_STATUS *nts_envp;

 path = nulterm_scarg(scarg(args,0),&nts_path);
 emargv = scarg(args,1);
 emenvp = scarg(args,2);
 for (nargv=0;mem_get_4(emargv+(nargv<<2));nargv++) ;
 for (nenvp=0;mem_get_4(emenvp+(nenvp<<2));nenvp++) ;
 trc(TRC_EXEC,"%s: path %s nargv %d nenvp %d\n",__func__,path,nargv,nenvp);
 argv = malloc((nargv+1)*sizeof(char *));
 envp = malloc((nenvp+1)*sizeof(char *));
 nts_argv = malloc(nargv*sizeof(NULTERM_STATUS));
 nts_envp = malloc(nenvp*sizeof(NULTERM_STATUS));
 argv[nargv] = 0;
 for (i=0;i<nargv;i++)
  { argv[i] = nulterm_scarg(mem_get_4(emargv+(i<<2)),&nts_argv[i]);
    trc(TRC_EXEC,"argv[%d] = %s\n",i,argv[i]);
  }
 envp[nenvp] = 0;
 for (i=0;i<nenvp;i++)
  { envp[i] = nulterm_scarg(mem_get_4(emenvp+(i<<2)),&nts_envp[i]);
    trc(TRC_EXEC,"envp[%d] = %s\n",i,envp[i]);
  }
 oldstate = s;
 oldvm = vm;
 vm = INITVM();
 e = do_execve(path,argv,envp);
 if (! e)
  { char *lastxp;
    for (i=nargv-1;i>=0;i--) nulterm_done(&nts_argv[i]);
    for (i=nenvp-1;i>=0;i--) nulterm_done(&nts_envp[i]);
    lastxp = strdup(path);
    nulterm_done(&nts_path);
    trc(TRC_PROC,"execve, %s\n",lastxp);
    fflush(0);
    /*
     * As of vforkbreak(), vm must point to the parent's old VM and
     *	vfork_dropvm to the child's new VM.
     */
    vfork_dropvm = vm;
    vm = oldvm;
    vforkbreak();
    if (during_vfork && forkwait) do_forkwait();
    flush_vfork_backout();
    during_vfork = 0;
    postexec = 1;
    rv->flags = 0;
    for (i=nfds-1;i>=0;i--)
     { FD *fd;
       fd = fds[i];
       if (fd && (fd->flags & FDF_CLEX))
	{ trc(TRC_EXEC,"closing CLEX fd %d\n",i);
	  add_vfork_backout(VFB_CLOSE,i,*fd);
	  close(fd->fd);
	  fds[i] = 0;
	  free(fd);
	}
     }
    vm_postexec(vfork_dropvm);
    vfork_dropvm = INITVM();
    sig_postexec();
    // We don't handle set-ID
    vm_changed = 1;
    free(s.lastexec);
    s.lastexec = lastxp;
    setproctitle("%s",lastxp);
    SYSCALL_RET(0);
  }
 else
  { for (i=nargv-1;i>=0;i--) nulterm_done(&nts_argv[i]);
    for (i=nenvp-1;i>=0;i--) nulterm_done(&nts_envp[i]);
    nulterm_done(&nts_path);
    vm_destroy(vm);
    vm = oldvm;
    s = oldstate;
    SYSCALL_ERR(e);
  }
}

/*
 * Implement umask(2).
 */
static SYSCALL_IMPL(sc_umask)
{
 SYSCALL_RET(umask(scarg(args,0)&0777)&0777);
}

/*
 * Implement munmap(2).
 */
static SYSCALL_IMPL(sc_munmap)
{
 uint32_t addr;
 uint32_t len;
 uint32_t o;

 addr = scarg(args,0);
 len = scarg(args,1);
 // if (addr & (PAGE_SIZE-1)) SYSCALL_ERR(em_EINVAL);
 o = addr;
 addr = ROUND_DOWN(addr,PAGE_SIZE);
 len += o - addr;
 len = ROUND_UP(len,PAGE_SIZE);
 if (len & 0x80000000) SYSCALL_ERR(em_EINVAL);
 if (! range_exists(addr,len)) SYSCALL_ERR(em_EINVAL);
 memseg_clear_conflict(addr,len,0);
 vm_changed = 1;
 SYSCALL_RET(0);
}

/*
 * Implement madvise(2).
 */
static SYSCALL_IMPL(sc_madvise)
{
 // madvise() is advisory; we always ignore it
 SYSCALL_RET(0);
}

/*
 * Implement getgroups(2).
 */
static SYSCALL_IMPL(sc_getgroups)
{
 uint32_t ng;
 uint32_t gp;
 gid_t *osv;
 int actng;
 int i;

 ng = scarg(args,0);
 gp = scarg(args,1);
 actng = getgroups(0,0);
 if (ng == 0) SYSCALL_RET(actng);
 if (actng < ng) ng = actng;
 osv = malloc(ng*sizeof(gid_t));
 i = getgroups(ng,osv);
 if (i < 0)
  { i = os2em_errno(errno);
    free(osv);
    SYSCALL_ERR(i);
  }
 if (i > ng) abort();
 ng = i;
 for (i=0;i<ng;i++) mem_set_4(gp+(4*i),osv[i]);
 free(osv);
 SYSCALL_RET(ng);
}

/*
 * Implement setgroups(2).
 */
static SYSCALL_IMPL(sc_setgroups)
{
 uint32_t ng;
 uint32_t gp;
 gid_t *osv;
 int i;

 ng = scarg(args,0);
 gp = scarg(args,1);
 if ((ng > em_NGROUPS) || (ng > NGROUPS)) SYSCALL_ERR(em_EINVAL);
 osv = malloc(ng*sizeof(gid_t));
 for (i=0;i<ng;i++) osv[i] = mem_get_4(gp+(4*i));
 if (setgroups(ng,osv) < 0)
  { i = os2em_errno(errno);
    free(osv);
    SYSCALL_ERR(i);
  }
 free(osv);
 SYSCALL_RET(0);
}

/*
 * Implement getpgrp(2).
 */
static SYSCALL_IMPL(sc_getpgrp)
{
 SYSCALL_RET(getpgrp());
}

/*
 * Implement setpgid(2).
 */
static SYSCALL_IMPL(sc_setpgid)
{
 int e;

 e = setpgid(scarg(args,0),scarg(args,1));
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(e);
}

/*
 * Implement setitimer(2).
 */
static SYSCALL_IMPL(sc_setitimer)
{
 uint32_t emwhich;
 uint32_t emval;
 uint32_t emoval;
 int oswhich;
 struct itimerval v;
 struct itimerval ov;
 struct itimerval *vp;
 struct itimerval *ovp;
 int i;

 emwhich = scarg(args,0);
 emval = scarg(args,1);
 emoval = scarg(args,2);
 switch (emwhich)
  { case em_ITIMER_REAL:    oswhich = ITIMER_REAL;    break;
    case em_ITIMER_VIRTUAL: oswhich = ITIMER_VIRTUAL; break;
    case em_ITIMER_PROF:    oswhich = ITIMER_PROF;    break;
    default: SYSCALL_ERR(em_EINVAL); break;
  }
 if (!emval && !emoval) SYSCALL_RET(0);
 if (emval)
  { v.it_interval.tv_sec = mem_get_8(emval);
    v.it_interval.tv_usec = mem_get_4(emval+8);
    v.it_value.tv_sec = mem_get_8(emval+16);
    v.it_value.tv_usec = mem_get_4(emval+24);
    vp = &v;
  }
 else
  { vp = 0;
  }
 ovp = emoval ? &ov : 0;
 i = setitimer(oswhich,vp,ovp);
 if (i < 0) SYSCALL_ERR(os2em_errno(errno));
 if (emoval)
  { mem_set_8(emoval,ov.it_interval.tv_sec);
    mem_set_4(emoval+8,ov.it_interval.tv_usec);
    mem_set_8(emoval+16,ov.it_value.tv_sec);
    mem_set_4(emoval+24,ov.it_value.tv_usec);
  }
 SYSCALL_RET(0);
}

/*
 * Implement dup2(2).
 */
static SYSCALL_IMPL(sc_dup2)
{
 uint32_t d1;
 FD *fd1;
 uint32_t d2;
 FD *fd2;
 int e;
 int new;

 d1 = scarg(args,0);
 fd1 = descriptor_arg(d1,0,"dup2");
 if (! fd1) SYSCALL_ERR(em_EBADF);
 d2 = scarg(args,1);
 fd2 = (d2 >= nfds) ? 0 : fds[d2];
 if (fd2)
  { add_vfork_backout(VFB_DUP2,d2,*fd2);
    e = dup2(fd1->fd,fd2->fd);
    if (e < 0) SYSCALL_ERR(os2em_errno(errno));
    fd2->prot = fd1->prot;
  }
 else
  { e = dup(fd1->fd);
    if (e < 0) SYSCALL_ERR(os2em_errno(errno));
    new = new_fd(e,d2,fd1->prot);
    if (new != d2) panic("impossible dup2: wanted %d, got %d",d2,new);
  }
 SYSCALL_RET(d2);
}

/*
 * Implement fcntl(2).
 *
 * We implement only a very few fcntls.
 */
static SYSCALL_IMPL(sc_fcntl)
{
 uint32_t d;
 uint32_t cmd;
 FD *fd;
 int e;
 int i;

 d = scarg(args,0);
 cmd = scarg(args,1);
 switch (cmd)
  { case em_F_DUPFD:
    case em_F_GETFD:
    case em_F_SETFD:
    case em_F_GETFL:
       fd = descriptor_arg(d,0,"fcntl");
       if (! fd) SYSCALL_ERR(em_EBADF);
       break;
    case em_F_CLOSEM:
       break;
    default:
       printf("Unrecognized fcntl %lu\n",(ULI)cmd);
       top();
       break;
  }
 switch (cmd)
  { case em_F_DUPFD:
	{ int newos;
	  newos = dup(fd->fd);
	  if (newos < 0) SYSCALL_ERR(os2em_errno(errno));
	  e = new_fd(newos,scarg(args,2),fd->prot);
	}
       break;
    case em_F_GETFD:
       e = (fd->flags & FDF_CLEX) ? 1 : 0;
       break;
    case em_F_SETFD:
       if (scarg(args,2) & 1)
	{ fd->flags |= FDF_CLEX;
	}
       else
	{ fd->flags &= ~FDF_CLEX;
	}
       e = 0;
       break;
    case em_F_GETFL:
       /*
	* Only a few flags are part of the documented interface for
	*  fcntl (and thus can be counted upon from our underlying OS).
	*  But so much else here depends on our underlying OS being
	*  NetBSD that I can accept depending on its semantics here.
	*/
	{ int v;
	  v = fcntl(fd->fd,F_GETFL,0);
	  e = v & 3;
#define F(bit) do { if (v & bit) e |= em_##bit; } while (0)
	  F(O_NONBLOCK);
	  F(O_APPEND);
	  F(O_ASYNC);
	  F(O_SYNC);
	  F(O_DSYNC);
	  F(O_RSYNC);
	  F(O_ALT_IO);
#undef F
	}
       break;
    case em_F_CLOSEM:
       if (d & 0x80000000)
	{ e = em_EBADF;
	  break;
	}
       for (i=nfds-1;i>=d;i--)
	{ fd = fds[i];
	  if (fd)
	   { add_vfork_backout(VFB_CLOSE,i,*fd);
	     close(fd->fd);
	     fds[i] = 0;
	     free(fd);
	   }
	}
       e = 0;
       break;
    default:
       panic("impossible fcntl");
       break;
  }
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(e);
}

/*
 * Implement select(2).  We actually implement this in terms of
 *  poll(2), because that API makes it easier to map between emulated
 *  descriptors and emulator descriptors.
 */
static SYSCALL_IMPL(sc_select)
{
 uint32_t nfds;
 uint32_t rp;
 uint32_t wp;
 uint32_t xp;
 uint32_t tvp;
 struct timeval tv;
 static struct pollfd *pfds = 0;
 static int *efds = 0;
 static int apfds = 0;
 int npfds;
 int vx;
 uint32_t rval;
 uint32_t wval;
 uint32_t xval;
 int pev;
 FD *fd;
 int pt;
 int prv;
 int i;
 int j;
 int n;
 uint32_t *rm;
 uint32_t *wm;
 uint32_t *xm;
 uint32_t ret;

 nfds = scarg(args,0);
 rp = scarg(args,1);
 wp = scarg(args,2);
 xp = scarg(args,3);
 tvp = scarg(args,4);
 npfds = 0;
 vx = -1;
 for (i=0;i<nfds;i++)
  { if (vx != (i >> 5))
     { vx = i >> 5;
       rval = rp ? mem_get_4(rp+((i>>5)<<2)) : 0;
       wval = wp ? mem_get_4(wp+((i>>5)<<2)) : 0;
       xval = xp ? mem_get_4(xp+((i>>5)<<2)) : 0;
     }
    pev = 0;
    if ((rval >> (i & 31)) & 1) pev |= POLLIN | POLLRDNORM;
    if ((wval >> (i & 31)) & 1) pev |= POLLOUT | POLLWRNORM;
    if ((xval >> (i & 31)) & 1) pev |= POLLERR;
    if (pev)
     { fd = descriptor_arg(i,0,"select");
       if (! fd) SYSCALL_ERR(em_EBADF);
       if (npfds >= apfds)
	{ apfds = npfds + 8;
	  pfds = realloc(pfds,apfds*sizeof(struct pollfd));
	  efds = realloc(efds,apfds*sizeof(int));
	}
       pfds[npfds] = (struct pollfd) { .fd = fd->fd, .events = pev };
       efds[npfds] = i;
       npfds ++;
     }
  }
 if (tvp)
  { tv.tv_sec = mem_get_8(tvp);
    tv.tv_usec = mem_get_4(tvp+8);
    if (tv.tv_usec > 1000000) SYSCALL_ERR(em_EINVAL);
    if (tv.tv_sec > 1000000)
     { pt = INFTIM;
     }
    else
     { pt = (tv.tv_sec * 1000) + ((tv.tv_usec + 999) / 1000);
     }
  }
 else
  { pt = INFTIM;
  }
 prv = poll(pfds,npfds,pt);
 if (prv < 0) SYSCALL_ERR(os2em_errno(errno));
 n = (nfds + 31) >> 5;
 rm = rp ? calloc(n,sizeof(uint32_t)) : 0;
 wm = wp ? calloc(n,sizeof(uint32_t)) : 0;
 xm = xp ? calloc(n,sizeof(uint32_t)) : 0;
 for (i=0;i<npfds;i++)
  { j = efds[i];
    if ((j < 0) || (j >= nfds)) abort();
    if (pfds[i].revents & POLLNVAL) abort(); // a can't-happen
    if (pfds[i].events & POLLERR)
     { if (pfds[i].revents & (POLLIN | POLLRDNORM | POLLHUP))
	{ if (rm) rm[j>>5] |= ((uint32_t)1) << (j & 31);
	}
       if (pfds[i].revents & (POLLOUT | POLLWRNORM | POLLHUP))
	{ if (wm) wm[j>>5] |= ((uint32_t)1) << (j & 31);
	}
       if (pfds[i].revents & (POLLERR | POLLHUP))
	{ if (xm) xm[j>>5] |= ((uint32_t)1) << (j & 31);
	}
     }
    else
     { if (pfds[i].revents & (POLLIN | POLLRDNORM | POLLERR | POLLHUP))
	{ if (rm) rm[j>>5] |= ((uint32_t)1) << (j & 31);
	}
       if (pfds[i].revents & (POLLOUT | POLLWRNORM | POLLERR | POLLHUP))
	{ if (wm) wm[j>>5] |= ((uint32_t)1) << (j & 31);
	}
     }
  }
 ret = 0;
 for (i=n-1;i>=0;i--)
  { ret += bitcount32( (rm ? rm[i] : 0) | (wm ? wm[i] : 0) | (xm ? xm[i] : 0) );
    if (rp) mem_set_4(rp+(i*4),rm[i]);
    if (wp) mem_set_4(wp+(i*4),wm[i]);
    if (xp) mem_set_4(xp+(i*4),xm[i]);
  }
 SYSCALL_RET(ret);
}

/*
 * Implement fsync(2).
 */
static SYSCALL_IMPL(sc_fsync)
{
 uint32_t d;
 FD *fd;
 int e;

 d = scarg(args,0);
 fd = descriptor_arg(d,0,"fsync");
 if (! fd) SYSCALL_ERR(em_EBADF);
 e = fsync(fd->fd);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(e);
}

/*
 * Implement setpriority(2).
 */
static SYSCALL_IMPL(sc_setpriority)
{
 uint32_t which;
 uint32_t who;
 uint32_t pri;
 int oswhich;

 which = scarg(args,0);
 who = scarg(args,1);
 pri = scarg(args,2);
 switch (which)
  { case em_PRIO_PROCESS: oswhich = PRIO_PROCESS; break;
    case em_PRIO_PGRP:    oswhich = PRIO_PGRP;    break;
    case em_PRIO_USER:    oswhich = PRIO_USER;    break;
    default: SYSCALL_ERR(em_EINVAL); break;
  }
 if (setpriority(oswhich,(int)(int32_t)who,(int)(int32_t)pri) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement socket(2).
 */
static SYSCALL_IMPL(sc_socket)
{
 uint32_t dom;
 uint32_t type;
 uint32_t proto;
 int osdom;
 int ostype;
 int osproto;
 int osfd;
 const char *domstr;
 const char *typestr;

 dom = scarg(args,0);
 type = scarg(args,1);
 proto = scarg(args,2);
 switch (dom)
  { case em_AF_LOCAL:
       osdom = AF_LOCAL;
       domstr = "AF_LOCAL";
       break;
    case em_AF_INET:
       osdom = AF_INET;
       domstr = "AF_INET";
       break;
    case em_AF_INET6:
       osdom = AF_INET6;
       domstr = "AF_INET6";
       break;
    default:
       printf("socket: unimplemented AF %lu\n",(ULI)dom);
       top();
       break;
  }
 // Fortunately, we can support the same set of types regardless of AF.
 switch (type)
  { case em_SOCK_STREAM:
       ostype = SOCK_STREAM;
       typestr = "SOCK_STREAM";
       break;
    case em_SOCK_DGRAM:
       ostype = SOCK_DGRAM;
       typestr = "SOCK_DGRAM";
       break;
    default:
       printf("socket: unimplemented type %lu\n",(ULI)type);
       top();
       //SYSCALL_ERR(em_ESOCKTNOSUPPORT);
       break;
  }
 if (proto == 0)
  { osproto = 0;
  }
 else
  { // A few nonzero proto values get used....
    if ((proto == em_IPPROTO_TCP) && (type == em_SOCK_STREAM) && ((dom == em_AF_INET) || (dom == em_AF_INET6)))
     { osproto = IPPROTO_TCP;
     }
    else
     { printf("socket: unimplemented protocol %lu for %s/%s\n",(ULI)proto,domstr,typestr);
       top();
       //SYSCALL_ERR(em_EPROTONOSUPPORT);
     }
  }
 osfd = socket(osdom,ostype,osproto);
 if (osfd < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(new_fd(osfd,0,P_R|P_W));
}

/*
 * Implement connect(2).
 */
static SYSCALL_IMPL(sc_connect)
{
 uint32_t d;
 FD *fd;
 uint32_t addr;
 uint32_t alen;
 int osrv;
 GETSA sa;

 d = scarg(args,0);
 fd = descriptor_arg(d,0,"connect");
 if (! fd) SYSCALL_ERR(em_EBADF);
 addr = scarg(args,1);
 alen = scarg(args,2);
 sa = get_sockaddr(addr,alen);
 if (sa.err) SYSCALL_ERR(sa.err);
 osrv = connect(fd->fd,sa.sa,sa.salen);
 free(sa.sa);
 if (osrv < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement getpriority(2).
 */
static SYSCALL_IMPL(sc_getpriority)
{
 uint32_t which;
 uint32_t who;
 int oswhich;
 int p;

 which = scarg(args,0);
 who = scarg(args,1);
 switch (which)
  { case em_PRIO_PROCESS: oswhich = PRIO_PROCESS; break;
    case em_PRIO_PGRP:    oswhich = PRIO_PGRP;    break;
    case em_PRIO_USER:    oswhich = PRIO_USER;    break;
    default: SYSCALL_ERR(em_EINVAL); break;
  }
 errno = 0;
 p = getpriority(oswhich,(int)(int32_t)who);
 if (errno) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET((int32_t)p);
}

/*
 * Implement setsockopt(2).  We don't support very many socket options.
 */
static SYSCALL_IMPL(sc_setsockopt)
{
 FD *fd;
 uint32_t emlevel;
 uint32_t emoptname;
 uint32_t emoptvalp;
 uint32_t emoptlen;

 fd = descriptor_arg(scarg(args,0),0,"setsockopt");
 emlevel = scarg(args,1);
 emoptname = scarg(args,2);
 emoptvalp = scarg(args,3);
 emoptlen = scarg(args,4);
 if ((emlevel == em_IPPROTO_TCP) && (emoptname == em_TCP_NODELAY))
  { int osv;
    if (!emoptvalp || (emoptlen < 4)) SYSCALL_ERR(em_EINVAL);
    osv = mem_get_4(emoptvalp) ? 1 : 0;
    trace_io_data_em("optval",emoptvalp,4);
    if (setsockopt(fd->fd,IPPROTO_TCP,TCP_NODELAY,&osv,sizeof(int)) < 0) SYSCALL_ERR(os2em_errno(errno));
    SYSCALL_RET(0);
  }
 printf("Unrecognized setsockopt: level=%lld optname=%lld (optvalp=%#08llx optlenp=%lld)\n",
	(LLI)(int32_t)emlevel, (LLI)(int32_t)emoptname,
	(ULLI)emoptvalp, (LLI)(int32_t)emoptlen);
 top();
}

/*
 * Implement gettimeofday(2).
 */
static SYSCALL_IMPL(sc_gettimeofday)
{
 uint32_t tvp;
 uint32_t tzp;
 struct timeval tv;

 tvp = scarg(args,0);
 tzp = scarg(args,1);
 trc(TRC_SYSCALL,"gettimeofday %08llx, %08lx\n",(ULLI)tvp,(ULI)tzp);
 gettimeofday(&tv,0);
 if (tvp)
  { mem_set_8(tvp,tv.tv_sec);
    mem_set_4(tvp+8,tv.tv_usec);
    mem_set_4(tvp+12,0);
  }
 if (tzp)
  { mem_set_4(tzp,0);
    mem_set_4(tzp+4,0);
  }
 SYSCALL_RET(0);
}

/*
 * Implement getrusage(2).
 */
static SYSCALL_IMPL(sc_getrusage)
{
 uint32_t who;
 uint32_t buf;
 struct rusage ru;
 int oswho;

 who = scarg(args,0);
 buf = scarg(args,1);
 trc(TRC_SYSCALL,"getrusage %ld, %08lx\n",(LI)(int32_t)who,(ULI)buf);
 switch (who)
  { default:
       SYSCALL_ERR(em_EINVAL);
       break;
    case em_RUSAGE_SELF:
       oswho = RUSAGE_SELF;
       break;
    case em_RUSAGE_CHILDREN:
       oswho = RUSAGE_CHILDREN;
       break;
  }
 if (getrusage(oswho,&ru) < 0) panic("impossible getrusage failure");
 store_rusage(buf,&ru);
 SYSCALL_RET(0);
}

/*
 * Implement getsockopt(2).  We don't support very many socket options.
 */
static SYSCALL_IMPL(sc_getsockopt)
{
 FD *fd;
 uint32_t emlevel;
 uint32_t emoptname;
 uint32_t emoptvalp;
 uint32_t emoptlenp;
 uint32_t emoptlen;
 socklen_t osoptlen;
 int osv_int;
 int emlen;
 uint8_t emvbuf[4]; // size of largest supported value
 int i;

 fd = descriptor_arg(scarg(args,0),0,"getsockopt");
 emlevel = scarg(args,1);
 emoptname = scarg(args,2);
 emoptvalp = scarg(args,3);
 emoptlenp = scarg(args,4);
 emoptlen = emoptvalp ? mem_get_4(emoptlenp) : 0;
 if ((emlevel == em_SOL_SOCKET) && (emoptname == em_SO_ERROR))
  { uint32_t v;
    osoptlen = sizeof(int);
    if (getsockopt(fd->fd,SOL_SOCKET,SO_ERROR,&osv_int,&osoptlen) < 0) SYSCALL_ERR(os2em_errno(errno));
    v = (osv_int < 0) ? -(uint32_t)-osv_int : osv_int;
    emvbuf[0] = v >> 24;
    emvbuf[1] = (v >> 16) & 0xff;
    emvbuf[2] = (v >> 8) & 0xff;
    emvbuf[3] = v & 0xff;
    emlen = 4;
  }
 else
  { printf("Unrecognized getsockopt: level=%lld optname=%lld (optvalp=%#08llx optlenp=%#08llx [%lld]\n",
	(LLI)(int32_t)emlevel, (LLI)(int32_t)emoptname,
	(ULLI)emoptvalp, (ULLI)emoptlenp, (LLI)(int32_t)emoptlen);
    top();
  }
 if (emoptlen < emlen) emlen = emoptlen;
 for (i=0;i<emlen;i++) mem_set_1(emoptvalp+i,emvbuf[i]);
 trace_io_data_em("optval",emoptvalp,emlen);
 mem_set_4(emoptlenp,emlen);
 SYSCALL_RET(0);
}

/*
 * Implement readv(2).
 *
 * I would _like_ to trace data read as it is written into emulated
 *  memory.  But that seems to me to involve relatively major code
 *  restructuring, so, instead, we do this: report the final values of
 *  the buffers.  (They can differ in case of overlap; also, the
 *  written data may destroy the structs iovec.  The latter could crash
 *  us if we didn't save them first, so we do.  The former just
 *  produces possibly-misleading trace output.)
 */
static SYSCALL_IMPL(sc_readv)
{
 uint32_t d;
 IO_PRIV_RWV priv;
 int n;
 int i;
 uint32_t p;
 uint32_t iovbase;

 d = scarg(args,0);
 iovbase = scarg(args,1);
 priv.niov = scarg(args,2);
 trc(TRC_SYSCALL,"readv %ld, %08lx, %ld\n",(LI)(int32_t)d,(ULI)iovbase,(ULI)(int32_t)priv.niov);
 priv.fd = descriptor_arg(d,P_R,"readv");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.niov < 1)
  { trc(TRC_SYSCALL,"readv -> 0\n");
    SYSCALL_RET(0);
  }
 if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL);
 priv.iov = malloc(priv.niov*sizeof(*priv.iov));
 p = iovbase;
 for (i=0;i<priv.niov;i++)
  { priv.iov[i][0] = mem_get_4(p);
    priv.iov[i][1] = mem_get_4(p+4);
    p += 8;
  }
 n = io_rw(priv.niov,&getiov_rwv,P_W,&doio_read,&priv,"readv");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"readv err %d (%s)\n",n,em_strerror(n));
    free(priv.iov);
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"readv -> %d\n",n);
 if (io_trace_size)
  { int left;
    int nt;
    int x;
    left = n;
    if (left > io_trace_size) left = io_trace_size;
    x = 0;
    while (left > 0)
     { nt = left;
       if (nt > priv.iov[x][1]) nt = priv.iov[x][1];
       trace_io_data_em("data",priv.iov[x][0],nt);
       left -= nt;
       x ++;
     }
  }
 free(priv.iov);
 SYSCALL_RET(n);
}

/*
 * Implement writev(2).
 */
static SYSCALL_IMPL(sc_writev)
{
 uint32_t d;
 IO_PRIV_RWV priv;
 int n;
 int i;
 uint32_t p;
 uint32_t iovbase;

 d = scarg(args,0);
 iovbase = scarg(args,1);
 priv.niov = scarg(args,2);
 trc(TRC_SYSCALL,"writev %ld, %08lx, %ld\n",(LI)(int32_t)d,(ULI)iovbase,(LI)(int32_t)priv.niov);
 priv.fd = descriptor_arg(d,P_W,"writev");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.niov < 1)
  { trc(TRC_SYSCALL,"writev -> 0\n");
    SYSCALL_RET(0);
  }
 if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL);
 priv.iov = malloc(priv.niov*sizeof(*priv.iov));
 p = iovbase;
 for (i=0;i<priv.niov;i++)
  { priv.iov[i][0] = mem_get_4(p);
    priv.iov[i][1] = mem_get_4(p+4);
    p += 8;
  }
 n = io_rw(priv.niov,&getiov_rwv,P_R,&doio_write,&priv,"writev");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"writev err %d (%s)\n",n,em_strerror(n));
    free(priv.iov);
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"writev -> %d\n",n);
 if (io_trace_size)
  { int left;
    int nt;
    int x;
    left = n;
    if (left > io_trace_size) left = io_trace_size;
    x = 0;
    while (left > 0)
     { nt = left;
       if (nt > priv.iov[x][1]) nt = priv.iov[x][1];
       trace_io_data_em("data",priv.iov[x][0],nt);
       left -= nt;
       x ++;
     }
  }
 free(priv.iov);
 SYSCALL_RET(n);
}

/*
 * Implement fchown(2).
 */
static SYSCALL_IMPL(sc_fchown)
{
 uint32_t d;
 FD *fd;
 uint32_t eu;
 uint32_t eg;

 d = scarg(args,0);
 eu = scarg(args,1);
 eg = scarg(args,2);
 fd = descriptor_arg(d,0,"fchown");
 if (! fd) SYSCALL_ERR(em_EBADF);
 if (fchown(fd->fd,(eu==-(uint32_t)1)?-1:eu,(eg==-(uint32_t)1)?-1:eg) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement fchmod(2).
 */
static SYSCALL_IMPL(sc_fchmod)
{
 uint32_t d;
 FD *fd;
 uint32_t mode;

 d = scarg(args,0);
 mode = scarg(args,1);
 fd = descriptor_arg(d,0,"fchmod");
 if (! fd) SYSCALL_ERR(em_EBADF);
 if (fchmod(fd->fd,mode) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement rename(2).
 */
static SYSCALL_IMPL(sc_rename)
{
 const char *p1;
 const char *p2;
 NULTERM_STATUS nts1;
 NULTERM_STATUS nts2;
 int r;
 int e;

 p1 = nulterm_scarg(scarg(args,0),&nts1);
 p2 = nulterm_scarg(scarg(args,1),&nts2);
 r = rename(p1,p2);
 e = errno;
 nulterm_done(&nts1);
 nulterm_done(&nts2);
 if (r < 0) SYSCALL_ERR(os2em_errno(e)); else SYSCALL_RET(0);
}

/*
 * Implement flock(2).
 */
static SYSCALL_IMPL(sc_flock)
{
 uint32_t d;
 uint32_t op;
 int osop;
 FD *fd;
 int e;

 d = scarg(args,0);
 op = scarg(args,1);
 osop = ((op & em_LOCK_EX) ? LOCK_EX : 0) |
	((op & em_LOCK_SH) ? LOCK_SH : 0) |
	((op & em_LOCK_NB) ? LOCK_NB : 0) |
	((op & em_LOCK_UN) ? LOCK_UN : 0);
 fd = descriptor_arg(d,0,"flock");
 if (! fd) SYSCALL_ERR(em_EBADF);
 e = flock(fd->fd,osop);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(e);
}

/*
 * Implement mkfifo(2).
 */
static SYSCALL_IMPL(sc_mkfifo)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t mode;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 mode = scarg(args,1);
 e = mkfifo(path,mode);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement sendto(2).
 *
 * XXX Leverage io_rw maybe?
 */
static SYSCALL_IMPL(sc_sendto)
{
 uint32_t d;
 uint32_t msgptr;
 uint32_t msglen;
 uint32_t emflags;
 uint32_t toptr;
 uint32_t tolen;
 FD *fd;
 MEMSEG *ms;
 int osflags;
 unsigned char *tbuf;
 const void *osbuf;
 GETSA sa;
 int osrv;

 d = scarg(args,0);
 msgptr = scarg(args,1);
 msglen = scarg(args,2);
 emflags = scarg(args,3);
 toptr = scarg(args,4);
 tolen = scarg(args,5);
 fd = descriptor_arg(d,P_W,"sendto");
 if (! fd) SYSCALL_ERR(em_EBADF);
 ms = memseg_find(msgptr,0,"sendto");
 if (! (ms->prot & P_R))
  { printf("%d: sendto %08lx: not accessible\n",mypid,(ULI)msgptr);
    trc(TRC_ERR,"sendto %08lx: not accessible\n",(ULI)msgptr);
    top();
  }
 if (ms->end-msgptr >= msglen)
  { (*ms->ops->check)(ms,msgptr-ms->base,msglen,P_R);
    osbuf = ms->data + (msgptr - ms->base);
    tbuf = 0;
  }
 else
  { tbuf = malloc(msglen);
    if (! tbuf) SYSCALL_ERR(em_ENOBUFS);
    copyin(tbuf,msgptr,msglen,"sendto dtaa",&free,tbuf);
    osbuf = tbuf;
  }
 if (tolen > 0)
  { sa = get_sockaddr(toptr,tolen);
    if (sa.err)
     { free(tbuf);
       SYSCALL_ERR(sa.err);
     }
  }
 else
  { sa.sa = 0;
    sa.salen = 0;
  }
 osflags = em2os_MSG_flags(emflags);
 osrv = sendto(fd->fd,osbuf,msglen,osflags,sa.sa,sa.salen);
 if (osrv < 0)
  { osrv = errno;
    free(tbuf);
    free(sa.sa);
    SYSCALL_ERR(osrv);
  }
 free(tbuf);
 free(sa.sa);
 SYSCALL_RET(osrv);
}

/*
 * Implement socketpair(2).
 */
static SYSCALL_IMPL(sc_socketpair)
{
 uint32_t dom;
 uint32_t type;
 uint32_t proto;
 uint32_t fdsp;
 int osdom;
 int ostype;
 int osd[2];
 uint32_t emd[2];

 dom = scarg(args,0);
 type = scarg(args,1);
 proto = scarg(args,2);
 fdsp = scarg(args,3);
 switch (dom)
  { case em_AF_LOCAL:
       osdom = AF_LOCAL;
       break;
    case em_AF_INET:
       osdom = AF_INET;
       break;
    case em_AF_INET6:
       osdom = AF_INET6;
       break;
    default:
       printf("socketpair: unimplemented AF %lu\n",(ULI)dom);
       top();
       //SYSCALL_ERR(em_EPROTONOSUPPORT);
       break;
  }
 // Fortunately, we can support the same set of types regardless of AF.
 switch (type)
  { case em_SOCK_STREAM:
       ostype = SOCK_STREAM;
       break;
    case em_SOCK_DGRAM:
       ostype = SOCK_DGRAM;
       break;
    default:
       printf("socketpair: unimplemented type %lu\n",(ULI)type);
       top();
       //SYSCALL_ERR(em_ESOCKTNOSUPPORT);
       break;
  }
 if (proto != 0)
  { printf("socketpair: protocol != 0 -> EPROTONOSUPPORT\n");
    SYSCALL_ERR(em_EPROTONOSUPPORT);
  }
 if (socketpair(osdom,ostype,0,&osd[0]) < 0) SYSCALL_ERR(os2em_errno(errno));
 emd[0] = new_fd(osd[0],0,P_R|P_W);
 emd[1] = new_fd(osd[1],0,P_R|P_W);
 mem_set_4(fdsp,emd[0]);
 mem_set_4(fdsp+4,emd[1]);
 SYSCALL_RET(0);
}

/*
 * Implement mkdir(2).
 */
static SYSCALL_IMPL(sc_mkdir)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t mode;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 mode = scarg(args,1);
 if (mkdir(path,mode) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement rmdir(2).
 */
static SYSCALL_IMPL(sc_rmdir)
{
 const char *path;
 NULTERM_STATUS nts;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 if (rmdir(path) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement utimes(2).
 */
static SYSCALL_IMPL(sc_utimes)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t tp;
 struct timeval t[2];
 struct timeval *tvp;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 tp = scarg(args,1);
 if (tp == 0)
  { tvp = 0;
  }
 else
  { t[0].tv_sec = mem_get_8(tp);
    t[0].tv_usec = mem_get_4(tp+8);
    t[1].tv_sec = mem_get_8(tp+16);
    t[1].tv_usec = mem_get_4(tp+24);
    tvp = &t[0];
  }
 if (utimes(path,tvp) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement statfs(2).
 *
 * There is a complication here.  For some underlying OS versions, we
 *  have to use statvfs() instead of statfs(); see the comment near the
 *  head of this file, where STATFS_VIA_STATVFS and
 *  GETFSSTAT_VIA_GETVFSSTAT are potentially set.
 */
static SYSCALL_IMPL(sc_statfs)
{
#ifdef STATFS_VIA_STATVFS
 struct statvfs sf;
#define osCALL statvfs
#define osCOPY store_statvfs_as_statfs
#else
 struct statfs sf;
#define osCALL statfs
#define osCOPY store_statfs
#endif
 uint32_t bufp;
 int e;
 NULTERM_STATUS nts;
 const char *path;

 path = nulterm_scarg(scarg(args,0),&nts);
 bufp = scarg(args,1);
 e = osCALL(path,&sf);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 osCOPY(bufp,&sf,"statfs",0,0);
 SYSCALL_RET(0);
#undef osCALL
#undef osCOPY
}

/*
 * Implement fstatfs(2).
 *
 * There is a complication here.  For some underlying OS versions, we
 *  have to use fstatvfs() instead of fstatfs(); see the comment near
 *  the head of this file, where STATFS_VIA_STATVFS and
 *  GETFSSTAT_VIA_GETVFSSTAT are potentially set.
 */
static SYSCALL_IMPL(sc_fstatfs)
{
#ifdef STATFS_VIA_STATVFS
 struct statvfs sf;
#define osCALL fstatvfs
#define osCOPY store_statvfs_as_statfs
#else
 struct statfs sf;
#define osCALL fstatfs
#define osCOPY store_statfs
#endif
 uint32_t d;
 uint32_t bufp;
 FD *fd;
 int e;

 d = scarg(args,0);
 bufp = scarg(args,1);
 fd = descriptor_arg(d,0,"fstatfs");
 if (! fd) SYSCALL_ERR(em_EBADF);
 e = osCALL(fd->fd,&sf);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 osCOPY(bufp,&sf,"statfs",0,0);
 SYSCALL_RET(0);
#undef osCALL
#undef osCOPY
}

/*
 * Implement pread(2).
 *
 * The comment in sys_pread() in vfs_syscalls.c, appearing to give the
 *  syscall arguments, lies.  The truth, in syscallargs.h, has a pad
 *  value between nbyte and offset.
 */
static SYSCALL_IMPL(sc_pread)
{
 uint32_t d;
 IO_PRIV_RW priv;
 int n;

 syscall_restartable = 1;
 d = scarg(args,0);
 priv.iov.base = scarg(args,1);
 priv.iov.len = scarg(args,2);
 // 3 is unused padding
 priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5);
 trc(TRC_SYSCALL,"pread %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)priv.iov.base,(LI)(int32_t)priv.iov.len,(ULLI)priv.off);
 priv.fd = descriptor_arg(d,P_R,"pread");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.iov.len < 1)
  { trc(TRC_SYSCALL,"pread -> 0\n");
    SYSCALL_RET(0);
  }
 n = io_rw(1,&getiov_rw,P_W,&doio_pread,&priv,"pread");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"pread -> error %d (%s)\n",n,em_strerror(n));
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"pread -> %d\n",n);
 trace_io_data_em("data",priv.iov.base,n);
 SYSCALL_RET(n);
}

/*
 * Implement pwrite(2).
 *
 * The comment in sys_pwrite() in vfs_syscalls.c, appearing to give the
 *  syscall arguments, lies.  The truth, in syscallargs.h, has a pad
 *  value between nbyte and offset.
 */
static SYSCALL_IMPL(sc_pwrite)
{
 uint32_t d;
 IO_PRIV_RW priv;
 int n;

 syscall_restartable = 1;
 d = scarg(args,0);
 priv.iov.base = scarg(args,1);
 priv.iov.len = scarg(args,2);
 // 3 is unused padding
 priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5);
 trc(TRC_SYSCALL,"pwrite %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)priv.iov.base,(LI)(int32_t)priv.iov.len,(ULLI)priv.off);
 priv.fd = descriptor_arg(d,P_W,"pwrite");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.iov.len < 1)
  { trc(TRC_SYSCALL,"pwrite -> 0\n");
    SYSCALL_RET(0);
  }
 n = io_rw(1,&getiov_rw,P_R,&doio_pwrite,&priv,"pwrite");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"pwrite -> error %d (%s)\n",n,em_strerror(n));
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"pwrite -> %d\n",n);
 trace_io_data_em("data",priv.iov.base,n);
 SYSCALL_RET(n);
}

/*
 * Implement setgid(2).
 */
static SYSCALL_IMPL(sc_setgid)
{
 uint32_t gid;

 gid = scarg(args,0);
 if (setgid(gid) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement setegid(2).
 */
static SYSCALL_IMPL(sc_setegid)
{
 uint32_t gid;

 gid = scarg(args,0);
 if (setegid(gid) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement seteuid(2).
 */
static SYSCALL_IMPL(sc_seteuid)
{
 uint32_t uid;

 uid = scarg(args,0);
 if (seteuid(uid) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement getrlimit(2).
 *
 * The 1.4T getrlimit manpage lies by omission.  It does not list
 *  EINVAL, which is generated if the resource ID value is invalid.
 */
static SYSCALL_IMPL(sc_getrlimit)
{
 uint32_t res;
 uint32_t ptr;
 int osres;
 struct rlimit rl;

 res = scarg(args,0);
 ptr = scarg(args,1);
 switch (res)
  { case em_RLIMIT_CPU:     osres = RLIMIT_CPU;     break;
    case em_RLIMIT_FSIZE:   osres = RLIMIT_FSIZE;   break;
    case em_RLIMIT_DATA:    osres = RLIMIT_DATA;    break;
    case em_RLIMIT_STACK:   osres = RLIMIT_STACK;   break;
    case em_RLIMIT_CORE:    osres = RLIMIT_CORE;    break;
    case em_RLIMIT_RSS:     osres = RLIMIT_RSS;     break;
    case em_RLIMIT_MEMLOCK: osres = RLIMIT_MEMLOCK; break;
    case em_RLIMIT_NPROC:   osres = RLIMIT_NPROC;   break;
    case em_RLIMIT_NOFILE:  osres = RLIMIT_NOFILE;  break;
    default: SYSCALL_ERR(em_EINVAL); break;
  }
 if (getrlimit(osres,&rl) < 0) SYSCALL_ERR(os2em_errno(errno));
 store_rlimit(ptr,&rl);
 SYSCALL_RET(0);
}

/*
 * Implement setrlimit(2).
 *
 * The 1.4T setrlimit manpage lies by omission.  It does not list
 *  EINVAL, which is generated if the resource ID value is invalid.
 */
static SYSCALL_IMPL(sc_setrlimit)
{
 uint32_t res;
 uint32_t ptr;
 int osres;
 struct rlimit rl;

 res = scarg(args,0);
 ptr = scarg(args,1);
 switch (res)
  { case em_RLIMIT_CPU:     osres = RLIMIT_CPU;     break;
    case em_RLIMIT_FSIZE:   osres = RLIMIT_FSIZE;   break;
    case em_RLIMIT_DATA:    osres = RLIMIT_DATA;    break;
    case em_RLIMIT_STACK:   osres = RLIMIT_STACK;   break;
    case em_RLIMIT_CORE:    osres = RLIMIT_CORE;    break;
    case em_RLIMIT_RSS:     osres = RLIMIT_RSS;     break;
    case em_RLIMIT_MEMLOCK: osres = RLIMIT_MEMLOCK; break;
    case em_RLIMIT_NPROC:   osres = RLIMIT_NPROC;   break;
    case em_RLIMIT_NOFILE:  osres = RLIMIT_NOFILE;  break;
    default: SYSCALL_ERR(em_EINVAL); break;
  }
 load_rlimit(ptr,&rl);
 if (setrlimit(osres,&rl) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement mmap(2).
 */
static SYSCALL_IMPL(sc_mmap)
{
 uint32_t addr;
 uint32_t len;
 uint32_t prot;
 uint32_t flags;
 uint32_t fd;
 uint64_t offset;
 unsigned int bwp;
 uint32_t end;
 void *mmrv;

 addr = scarg(args,0);
 len = scarg(args,1);
 prot = scarg(args,2);
 flags = scarg(args,3);
 fd = scarg(args,4);
 // 5 not used - padding
 offset = (scarg(args,6) * 0x100000000ULL) | scarg(args,7);
 if (flags & em_MAP_COPY) flags = (flags & ~em_MAP_COPY) | em_MAP_PRIVATE;
 if ((flags & (em_MAP_SHARED|em_MAP_PRIVATE)) == (em_MAP_SHARED|em_MAP_PRIVATE)) SYSCALL_ERR(em_EINVAL);
 if (len & 0x80000000) SYSCALL_ERR(em_EINVAL);
 bwp = offset & (PAGE_SIZE-1);
 offset -= bwp;
 len += bwp;
 len = ROUND_UP(len,PAGE_SIZE);
 if (len & 0x80000000) SYSCALL_ERR(em_EINVAL);
 if (flags & em_MAP_FIXED)
  { addr -= bwp;
    if (addr & (PAGE_SIZE-1)) SYSCALL_ERR(em_EINVAL);
    if (addr >= USRSTACK) SYSCALL_ERR(em_EINVAL);
    end = addr + len;
    if (end < addr) SYSCALL_ERR(em_EINVAL);
  }
 else
  { addr = find_space(MAXDSIZE,len,USRSTACK);
  }
 if (flags & em_MAP_ANON)
  { if (fd != -(uint32_t)1) SYSCALL_ERR(em_EINVAL);
    if (len == 0) SYSCALL_RET(addr);
    mmrv = mmap(
	0,
	len,
	((prot&em_PROT_READ)?PROT_READ:0) | ((prot&em_PROT_WRITE)?PROT_WRITE:0) | ((prot&em_PROT_EXEC)?PROT_EXEC:0),
	MAP_ANON | ((flags & em_MAP_SHARED) ? MAP_SHARED : 0) | ((flags & (em_MAP_PRIVATE|em_MAP_COPY)) ? MAP_PRIVATE : 0),
	-1,
	0 );
    if (mmrv == MAP_FAILED) SYSCALL_ERR(os2em_errno(errno));
  }
 else
  { FD *f;
    if (offset % PAGE_SIZE)
     { printf("File mmap with offset not a multiple of %d not implemented\n",PAGE_SIZE);
       top();
     }
    f = descriptor_arg(fd,0,"mmap");
    if (! f) SYSCALL_ERR(em_EBADF);
    if ((prot & (em_PROT_READ|em_PROT_EXEC)) && !(f->prot & P_R)) SYSCALL_ERR(em_EACCES);
    if ((prot & em_PROT_WRITE) && (flags & em_MAP_SHARED) && !(f->prot & P_W)) SYSCALL_ERR(em_EACCES);
    if (len == 0) SYSCALL_RET(addr);
    mmrv = mmap(
	0,
	len,
	((prot&em_PROT_READ)?PROT_READ:0) | ((prot&em_PROT_WRITE)?PROT_WRITE:0) | ((prot&em_PROT_EXEC)?PROT_EXEC:0),
	MAP_FILE | ((flags & em_MAP_SHARED) ? MAP_SHARED : 0) | ((flags & (em_MAP_PRIVATE|em_MAP_COPY)) ? MAP_PRIVATE : 0),
	f->fd,
	offset );
    if (mmrv == MAP_FAILED) SYSCALL_ERR(os2em_errno(errno));
  }
 memseg_clear_conflict(addr,len,memseg_new_mmap(addr,len,((prot&em_PROT_READ)?P_R:0)|((prot&em_PROT_WRITE)?P_W:0)|((prot&em_PROT_EXEC)?P_X:0),flags,mmrv));
 vm_changed = 1;
 SYSCALL_RET(addr);
}

/*
 * Implement lseek(2).
 */
static SYSCALL_IMPL(sc_lseek)
{
 uint32_t d;
 uint64_t off;
 uint32_t whence;
 FD *fd;
 off_t e;
 int oswhence;

 d = scarg(args,0);
 // args[1] is unused padding
 off = (((uint64_t)scarg(args,2)) << 32) | scarg(args,3);
 whence = scarg(args,4);
 fd = descriptor_arg(d,0,"lseek");
 if (! fd) SYSCALL_ERR(em_EBADF);
 switch (whence)
  { case em_SEEK_SET: oswhence = SEEK_SET; break;
    case em_SEEK_CUR: oswhence = SEEK_CUR; break;
    case em_SEEK_END: oswhence = SEEK_END; break;
    default: SYSCALL_ERR(em_EINVAL); break;
  }
 e = lseek(fd->fd,off,oswhence);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET2((e>>16)>>16,e);
}

/*
 * Implement ftruncate(2).
 */
static SYSCALL_IMPL(sc_ftruncate)
{
 uint32_t d;
 uint64_t off;
 FD *fd;

 d = scarg(args,0);
 // args[1] is unused padding
 off = (((uint64_t)scarg(args,2)) << 32) | scarg(args,3);
 fd = descriptor_arg(d,P_W,"ftruncate");
 if (! fd) SYSCALL_ERR(em_EBADF);
 if (ftruncate(fd->fd,off) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement __sysctl, the syscall behind sysctl(3).
 */
static SYSCALL_IMPL(sc___sysctl)
{
 uint32_t mibp;
 uint32_t v;
 int nmib;
 uint32_t mib[16];
 int i;

 // MIB length in range?
 v = scarg(args,1);
 if ((v < 2) || (v > 16))
  { trc(TRC_SYSCALL,"sysctl: MIB length %lu not in range 2..16 -> EINVAL\n",(ULI)v);
    SYSCALL_ERR(em_EINVAL);
  }
 nmib = v;
 // We don't support setting (yet)
 if (scarg(args,4))
  { trc(TRC_SYSCALL,"sysctl: setting not yet supported -> EPERM\n");
    SYSCALL_ERR(em_EPERM);
  }
 // Read MIB
 mibp = scarg(args,0);
 for (i=nmib-1;i>=0;i--) mib[i] = mem_get_4(mibp+(i<<2));
 trace_io_data_em("MIB",mibp,nmib*4);
 // Do it!
 switch (mib[0])
  { case em_CTL_KERN:
       if (em_sysctl_kern(mib+1,nmib-1,scarg(args,2),scarg(args,3),rv)) return;
       break;
    case em_CTL_VM:
       if (em_sysctl_vm(mib+1,nmib-1,scarg(args,2),scarg(args,3),rv)) return;
       break;
    case em_CTL_HW:
       if (em_sysctl_hw(mib+1,nmib-1,scarg(args,2),scarg(args,3),rv)) return;
       break;
  }
 printf("Unsupported sysctl ");
 for (i=0;i<nmib;i++) printf("%s%lu",i?".":"",(ULI)mib[i]);
 printf("\n");
 top();
}

/*
 * Implement futimes(2).
 */
static SYSCALL_IMPL(sc_futimes)
{
 int fd;
 FD *d;
 uint32_t tp;
 struct timeval t[2];
 struct timeval *tvp;

 fd = scarg(args,0);
 tp = scarg(args,1);
 if (tp == 0)
  { tvp = 0;
  }
 else
  { t[0].tv_sec = mem_get_8(tp);
    t[0].tv_usec = mem_get_4(tp+8);
    t[1].tv_sec = mem_get_8(tp+16);
    t[1].tv_usec = mem_get_4(tp+24);
    tvp = &t[0];
  }
 // Real 1.4T does not check descriptor mode(!).
 // I'd think this a bug, but 5.2 doesn't either(!!).
 d = descriptor_arg(fd,0,"futimes");
 if (! d) SYSCALL_ERR(em_EBADF);
 if (futimes(d->fd,tvp) < 0) SYSCALL_ERR(os2em_errno(errno));
 SYSCALL_RET(0);
}

/*
 * Implement poll(2).
 */
static SYSCALL_IMPL(sc_poll)
{
 uint32_t epfds;
 uint32_t npfds;
 uint32_t timeout;
 uint32_t *fdv = 0;
 uint16_t *evv = 0;
 struct pollfd *pfds = 0;
 unsigned char *inval = 0;
 int i;
 uint32_t efd;
 FD *fd;
 uint16_t ev;
 uint32_t base;
 int ninval;
 int retv;

 epfds = scarg(args,0);
 npfds = scarg(args,1);
 timeout = scarg(args,2);
 // Would npfds*sizeof(struct pollfds) overflow?  ENOMEM.
 if ( (npfds >= 0x20000000) ||				// emulated
      (npfds >= ((~(size_t)0) / sizeof(struct pollfd)))	// emulator
      ) SYSCALL_ERR(em_ENOMEM);
 pfds = malloc(npfds*sizeof(struct pollfd));
 inval = malloc(npfds);
 fdv = malloc(npfds*sizeof(uint32_t));
 evv = malloc(npfds*sizeof(uint16_t));
#define FREETHINGS() do { free(pfds); free(inval); free(fdv); free(evv); } while (0)
 if (!pfds || !inval || !fdv || !evv)
  { FREETHINGS();
    SYSCALL_ERR(em_ENOMEM);
  }
 ninval = 0;
 for (i=0;i<npfds;i++)
  { base = epfds + (i * 8);
    efd = mem_get_4(base);
    fdv[i] = efd;
    ev = mem_get_2(base+4);
    evv[i] = ev;
    fd = descriptor_arg(efd,0,"poll");
    if (! fd)
     { trc(TRC_SYSCALL,"poll: fd [%d] = %ld (invalid)\n",i,(LI)(int32_t)efd);
       inval[i] = 1;
       ninval ++;
     }
    else
     { trc(TRC_SYSCALL,"poll: fd [%d] = fd %ld (OS %d), flags %08lx -> ",i,(LI)(int32_t)efd,fd->fd,(ULI)ev);
       inval[i] = 0;
       pfds[i].fd = fd->fd;
       if (trc_if(TRC_SYSCALL))
	{ const char *pref;
	  pref = "";
#define BIT(n) do { if (ev & em_##n) { trc(TRC_SYSCALL,"%s%s",pref,#n); pref = "|"; } } while (0)
	  BIT(POLLIN);
	  BIT(POLLPRI);
	  BIT(POLLOUT);
	  BIT(POLLRDNORM);
	  BIT(POLLWRNORM);
	  BIT(POLLRDBAND);
	  BIT(POLLWRBAND);
#undef BIT
	  if (ev & ~(em_POLLIN|em_POLLPRI|em_POLLOUT|em_POLLRDNORM|
		     em_POLLWRNORM|em_POLLRDBAND|em_POLLWRBAND))
	   { trc(TRC_SYSCALL,"%s0x%lx",pref,
		(ULI)(ev & ~(em_POLLIN|em_POLLPRI|em_POLLOUT|em_POLLRDNORM|
			     em_POLLWRNORM|em_POLLRDBAND|em_POLLWRBAND)) );
	     pref = "|";
	   }
	  if (! pref[0]) trc(TRC_SYSCALL,"0");
	  trc(TRC_SYSCALL,"\n");
	}
       pfds[i].events = ((ev & em_POLLIN) ? POLLIN : 0) |
			((ev & em_POLLPRI) ? POLLPRI : 0) |
			((ev & em_POLLOUT) ? POLLOUT : 0) |
			((ev & em_POLLRDNORM) ? POLLRDNORM : 0) |
			((ev & em_POLLWRNORM) ? POLLWRNORM : 0) |
			((ev & em_POLLRDBAND) ? POLLRDBAND : 0) |
			((ev & em_POLLWRBAND) ? POLLWRBAND : 0);
     }
  }
 if (ninval)
  { for (i=0;i<npfds;i++) mem_set_2(epfds+(i*8)+6,inval[i]?evv[i]|em_POLLNVAL:0);
    FREETHINGS();
    SYSCALL_RET(ninval);
  }
 // The kernel code treats negative (but not INFTIM) as if 0.
 if ((timeout & 0x80000000) && (timeout != em_INFTIM)) timeout = 0;
 retv = poll(pfds,npfds,(timeout==em_INFTIM)?INFTIM:timeout);
 if (retv < 0)
  { retv = errno;
    FREETHINGS();
    SYSCALL_ERR(os2em_errno(retv));
  }
 for (i=0;i<npfds;i++)
  { short int rev;
    rev = pfds[i].revents;
    mem_set_2(epfds+(i*8)+6,
	((rev & POLLIN) ? em_POLLIN : 0) |
	((rev & POLLPRI) ? em_POLLPRI : 0) |
	((rev & POLLOUT) ? em_POLLOUT : 0) |
	((rev & POLLRDNORM) ? em_POLLRDNORM : 0) |
	((rev & POLLWRNORM) ? em_POLLWRNORM : 0) |
	((rev & POLLRDBAND) ? em_POLLRDBAND : 0) |
	((rev & POLLWRBAND) ? em_POLLWRBAND : 0) |
	((rev & POLLERR) ? em_POLLERR : 0) |
	((rev & POLLHUP) ? em_POLLHUP : 0) );
  }
 FREETHINGS();
 SYSCALL_RET(retv);
#undef FREETHINGS
}

/*
 * Implement nanosleep(2).
 */
static SYSCALL_IMPL(sc_nanosleep)
{
 uint32_t rqp;
 uint32_t rmp;
 struct timespec rq;
 struct timespec rm;
 int e;

 rqp = scarg(args,0);
 rmp = scarg(args,1);
 rq.tv_sec = mem_get_8(rqp);
 rq.tv_nsec = mem_get_4(rqp+8);
 e = nanosleep(&rq,rmp?&rm:0);
 if (e == 0)
  { SYSCALL_RET(0);
  }
 else if (e == -1)
  { e = os2em_errno(errno);
    if (rmp)
     { mem_set_8(rmp,rm.tv_sec);
       mem_set_4(rmp+8,rm.tv_nsec);
     }
    SYSCALL_ERR(os2em_errno(errno));
  }
 panic("impossible nanosleep return %d",e);
}

/*
 * Implement getdents(2).
 *
 * This is a hard case.
 *
 * getdents()'s API is defined (by the 1.4T manpage) in terms of a
 *  struct containing unsigned long and unsigned short, but actually
 *  defined by the implementation in terms of uint32_t, uint16_t, and
 *  uint8_t - and provides d_type, which is not in the manpage.
 *
 * Worse, we can't really tell whether the implementation we're running
 *  on has a larger or smaller struct dirent than 1.4T/sparc, and/or
 *  has weaker alignment requirements.  This means that we may get more
 *  entries than can fit into the buffer provided.
 *
 * However, no known version has such problems, so we panic if we run
 *  into them.
 *
 * We really should copy structs dirent out of osbuf into a struct
 *  dirent.  But we don't have any good way to know how much to copy.
 *  So we do what the API is apparently designed for and overlay the
 *  struct onto the buffer - which means we have to malloc the buffer,
 *  since we have no other way of ensuring it's suitably aligned (we
 *  could allocate it as an array of structs dirent, but that has its
 *  own problems).
 */
static SYSCALL_IMPL(sc_getdents)
{
 uint32_t d;
 uint32_t buf;
 uint32_t size;
 FD *fd;
 char *osbuf;
 int e;
 int oso;
 int emo;
 uint32_t left;
 struct dirent *osde;
 uint8_t dt;
 int l;

 d = scarg(args,0);
 buf = scarg(args,1);
 size = scarg(args,2);
 fd = descriptor_arg(d,P_R,"getdents");
 if (! fd) SYSCALL_ERR(em_EBADF);
 osbuf = malloc(size);
 if (! osbuf)
  { printf("Out of memory allocating __getcwd() buffer\n");
    top();
  }
 e = getdents(fd->fd,osbuf,size);
 if (e < 0) SYSCALL_ERR(os2em_errno(errno));
 oso = 0;
 emo = 0;
 left = size;
 while (oso < e)
  { if (oso+_DIRENT_MINSIZE(osde) > e) panic("getdents() partial (min)");
    osde = (void *)(osbuf+oso);
    if (oso+osde->d_namlen > e) panic("getdents() partial (actual)");
    if (osde->d_namlen > 255)
     { printf("getdents: d_namlen %d > 255, skipping entry\n",(int)osde->d_namlen);
     }
    else
     { l = 8 + ROUND_UP(osde->d_namlen+1,4);
       if (left < l) panic("getdents() overrun");
       mem_set_4(buf+emo,osde->d_fileno);
       mem_set_2(buf+emo+4,l);
       switch (osde->d_type)
	{ case DT_UNKNOWN: dt = em_DT_UNKNOWN; break;
	  case DT_FIFO: dt = em_DT_FIFO; break;
	  case DT_CHR: dt = em_DT_CHR; break;
	  case DT_DIR: dt = em_DT_DIR; break;
	  case DT_BLK: dt = em_DT_BLK; break;
	  case DT_REG: dt = em_DT_REG; break;
	  case DT_LNK: dt = em_DT_LNK; break;
	  case DT_SOCK: dt = em_DT_SOCK; break;
	  case DT_WHT: dt = em_DT_WHT; break;
	  default: dt = em_DT_UNKNOWN; break;
	}
       mem_set_1(buf+emo+6,dt);
       mem_set_1(buf+emo+7,osde->d_namlen);
       copyout(&osde->d_name[0],buf+emo+8,osde->d_namlen,"getdents",&free,osbuf);
       copyout(&nulbuf[0],buf+emo+8+osde->d_namlen,l-(8+osde->d_namlen),"getdents",&free,osbuf);
       emo += l;
     }
    oso += osde->d_reclen;
  }
 SYSCALL_RET(emo);
}

/*
 * Implement lchmod(2).
 */
static SYSCALL_IMPL(sc_lchmod)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t mode;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 mode = scarg(args,1);
 if (lchmod(path,mode) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement lchown(2).
 */
static SYSCALL_IMPL(sc_lchown)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t eu;
 uint32_t eg;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 eu = scarg(args,1);
 eg = scarg(args,2);
 if (lchown(path,(eu==-(uint32_t)1)?-1:eu,(eg==-(uint32_t)1)?-1:eg) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement lutimes(2).
 */
static SYSCALL_IMPL(sc_lutimes)
{
 const char *path;
 NULTERM_STATUS nts;
 uint32_t tp;
 struct timeval t[2];
 struct timeval *tvp;
 int e;

 path = nulterm_scarg(scarg(args,0),&nts);
 tp = scarg(args,1);
 if (tp == 0)
  { tvp = 0;
  }
 else
  { t[0].tv_sec = mem_get_8(tp);
    t[0].tv_usec = mem_get_4(tp+8);
    t[1].tv_sec = mem_get_8(tp+16);
    t[1].tv_usec = mem_get_4(tp+24);
    tvp = &t[0];
  }
 if (lutimes(path,tvp) < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 SYSCALL_RET(0);
}

/*
 * Implement __stat13, versioned stat(2).
 */
static SYSCALL_IMPL(sc___stat13)
{
 const char *path;
 uint32_t stp;
 struct stat stb;
 int e;
 NULTERM_STATUS nts;

 path = nulterm_scarg(scarg(args,0),&nts);
 stp = scarg(args,1);
 e = stat(path,&stb);
 if (e < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 store_stat(stp,&stb);
 SYSCALL_RET(0);
}

/*
 * Implement __fstat13, versioned fstat(2).
 */
static SYSCALL_IMPL(sc___fstat13)
{
 uint32_t d;
 uint32_t stp;
 struct stat stb;
 FD *fd;

 d = scarg(args,0);
 stp = scarg(args,1);
 fd = descriptor_arg(d,0,"__fstat13");
 if (! fd) SYSCALL_ERR(em_EBADF);
 if (fstat(fd->fd,&stb) < 0) panic("impossible fstat failure");
 store_stat(stp,&stb);
 SYSCALL_RET(0);
}

/*
 * Implement __lstat13, versioned lstat(2).
 */
static SYSCALL_IMPL(sc___lstat13)
{
 const char *path;
 uint32_t stp;
 struct stat stb;
 int e;
 NULTERM_STATUS nts;

 path = nulterm_scarg(scarg(args,0),&nts);
 stp = scarg(args,1);
 e = lstat(path,&stb);
 if (e < 0)
  { e = errno;
    nulterm_done(&nts);
    SYSCALL_ERR(os2em_errno(e));
  }
 nulterm_done(&nts);
 store_stat(stp,&stb);
 SYSCALL_RET(0);
}

/*
 * Implement __sigaltstack14, versioned sigaltstack(2).
 *
 * We don't yet support on-signal-stack signal delivery.
 */
static SYSCALL_IMPL(sc___sigaltstack14)
{
 uint32_t ssp;
 uint32_t ossp;
 uint32_t ss_base;
 uint32_t ss_size;
 uint32_t ss_flags;

 ssp = scarg(args,0);
 ossp = scarg(args,1);
 if (ssp)
  { ss_base = mem_get_4(ssp);
    ss_size = mem_get_4(ssp+4);
    ss_flags = mem_get_4(ssp+8);
  }
 if (ossp)
  { mem_set_4(ossp,s.sigstack_base);
    mem_set_4(ossp+4,s.sigstack_size);
    mem_set_4(ossp+8,(s.onsigstack?em_SS_ONSTACK:0)|(s.sigstack_enabled?0:em_SS_DISABLE));
  }
 if (! ssp) SYSCALL_RET(0);
 if (ss_flags & ~(uint32_t)em_SS_ALLBITS) SYSCALL_ERR(em_EINVAL);
 if (ss_flags & em_SS_DISABLE)
  { if (s.onsigstack) SYSCALL_ERR(em_EINVAL);
  }
 else
  { if (ss_size < em_MINSIGSTKSZ) SYSCALL_ERR(em_ENOMEM);
  }
 s.sigstack_enabled = ! (ss_flags & em_SS_DISABLE);
 if (ss_flags & em_SS_ONSTACK)
  { printf("Signal stack support incomplete\n");
    top();
  }
 s.sigstack_base = ss_base;
 s.sigstack_size = ss_size;
 SYSCALL_RET(0);
}

/*
 * Implement __vfork14, versioned vfork(2).
 *
 * See the comment on sc_fork for a discussion of return value
 *  semantics.
 *
 * We have to help the OS a bit.  Most of the semantics of vfork are
 *  taken care of by the underlying OS.  But there are some cases where
 *  OS state can be changed and, under real 1.4T, would be handled
 *  entirely by the kernel, but in our case has traces in userland
 *  memory (which thus survive the return to the parent).  The only
 *  example at this writing is file descriptors, which correspond to
 *  not only OS file descriptors but small pieces of memory as well.
 *  This is why VFORKBACKOUT exists.
 *
 * Furthermore, we can't return from here in a vforked child, or we'll
 *  trash the parent's stack (our stack, not the emulated machine's
 *  stack).  So the actual OS-level vfork has to happen all the way up
 *  in run().
 *
 * We could, maybe, arrange to share emulated VM with an OS child
 *  created with fork() rather than vfork(), but then we have the
 *  problem of breaking that association when doing an emulated fork().
 *
 * In order to leverage dosyscall()'s logic, though, run() arranges to
 *  reenter us to handle syscall return.  Keeping track of the control
 *  flow is what vfork_stage is for.  It is VFORK_NONE during normal
 *  operation.  We set it to VFORK_START before returning the first
 *  time.  run() then sets it to VFORK_FAIL or VFORK_SUCCESS before we
 *  are reentered, with vfork_value set to the errno (VFORK_FAIL) or
 *  PID (VFORK_SUCCESS) to return.
 */
static SYSCALL_IMPL(sc___vfork14)
{
 trc(TRC_VFORK,"%s entered, stage = %d (%s)\n",__func__,(int)vfork_stage,vfork_stage_str(vfork_stage));
 switch (vfork_stage)
  { case VFORK_NONE:
       window_flush();
       vfork_stage = VFORK_START;
       alert_run = 1;
       s.npc = s.pc;
       s.pc = s.xa;
       rv->flags |= SCRV_BYPASS;
       return;
       break;
    case VFORK_FAIL:
       vfork_stage = VFORK_NONE;
       SYSCALL_ERR(vfork_value);
       break;
    case VFORK_SUCCESS:
       vfork_stage = VFORK_NONE;
       if (vfork_value)
	{ trc(TRC_PROC,"vfork parent, child %lu\n",(ULI)vfork_value);
	}
       else
	{ trc(TRC_PROC,"vfork child, parent %lu\n",(ULI)getppid());
	}
       SYSCALL_RET2(vfork_value?:0,!vfork_value);
       break;
    default:
       panic("invalid vfork stage %d in %s",(int)vfork_stage,__func__);
       break;
  }
}

/*
 * Implement preadv(2).
 *
 * The comment in sys_preadv() in vfs_syscalls.c, appearing to give the
 *  syscall arguments, lies.  The truth, in syscallargs.h, has a pad
 *  value between iovcnt and offset.
 */
static SYSCALL_IMPL(sc_preadv)
{
 uint32_t d;
 IO_PRIV_RWV priv;
 int n;
 int i;
 uint32_t p;
 uint32_t iovbase;

 syscall_restartable = 1;
 d = scarg(args,0);
 iovbase = scarg(args,1);
 priv.niov = scarg(args,2);
 // 3 is unused padding
 priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5);
 trc(TRC_SYSCALL,"preadv %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)iovbase,(LI)(int32_t)priv.niov,(ULLI)priv.off);
 priv.fd = descriptor_arg(d,P_R,"preadv");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.niov < 1)
  { trc(TRC_SYSCALL,"preadv -> 0\n");
    SYSCALL_RET(0);
  }
 if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL);
 priv.iov = malloc(priv.niov*sizeof(*priv.iov));
 p = iovbase;
 for (i=0;i<priv.niov;i++)
  { priv.iov[i][0] = mem_get_4(p);
    priv.iov[i][1] = mem_get_4(p+4);
    p += 8;
  }
 n = io_rw(priv.niov,&getiov_rwv,P_W,&doio_pread,&priv,"preadv");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"preadv -> error %d (%s)\n",n,em_strerror(n));
    free(priv.iov);
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"preadv -> %d\n",n);
 if (io_trace_size)
  { int left;
    int nt;
    int x;
    left = n;
    if (left > io_trace_size) left = io_trace_size;
    x = 0;
    while (left > 0)
     { nt = left;
       if (nt > priv.iov[x][1]) nt = priv.iov[x][1];
       trace_io_data_em("data",priv.iov[x][0],nt);
       left -= nt;
       x ++;
     }
  }
 free(priv.iov);
 SYSCALL_RET(n);
}

/*
 * Implement pwritev(2).
 *
 * The comment in sys_pwritev() in vfs_syscalls.c, appearing to give
 *  the syscall arguments, lies.  The truth, in syscallargs.h, has a
 *  pad value between iovcnt and offset.
 */
static SYSCALL_IMPL(sc_pwritev)
{
 uint32_t d;
 IO_PRIV_RWV priv;
 int n;
 int i;
 uint32_t p;
 uint32_t iovbase;

 syscall_restartable = 1;
 d = scarg(args,0);
 iovbase = scarg(args,1);
 priv.niov = scarg(args,2);
 // 3 is unused padding
 priv.off = (((uint64_t)scarg(args,4)) << 32) | scarg(args,5);
 trc(TRC_SYSCALL,"pwritev %ld, %08lx, %ld, %016llx\n",(LI)(int32_t)d,(ULI)iovbase,(LI)(int32_t)priv.niov,(ULLI)priv.off);
 priv.fd = descriptor_arg(d,P_W,"pwritev");
 if (! priv.fd) SYSCALL_ERR(em_EBADF);
 if (priv.niov < 1)
  { trc(TRC_SYSCALL,"pwritev -> 0\n");
    SYSCALL_RET(0);
  }
 if (priv.niov > 1024) SYSCALL_ERR(em_EINVAL);
 priv.iov = malloc(priv.niov*sizeof(*priv.iov));
 p = iovbase;
 for (i=0;i<priv.niov;i++)
  { priv.iov[i][0] = mem_get_4(p);
    priv.iov[i][1] = mem_get_4(p+4);
    p += 8;
  }
 n = io_rw(priv.niov,&getiov_rwv,P_R,&doio_pwrite,&priv,"pwritev");
 if (n < 0)
  { n = os2em_errno(errno);
    trc(TRC_SYSCALL,"pwritev -> error %d (%s)\n",n,em_strerror(n));
    free(priv.iov);
    SYSCALL_ERR(n);
  }
 trc(TRC_SYSCALL,"pwritev -> %d\n",n);
 if (io_trace_size)
  { int left;
    int nt;
    int x;
    left = n;
    if (left > io_trace_size) left = io_trace_size;
    x = 0;
    while (left > 0)
     { nt = left;
       if (nt > priv.iov[x][1]) nt = priv.iov[x][1];
       trace_io_data_em("data",priv.iov[x][0],nt);
       left -= nt;
       x ++;
     }
  }
 free(priv.iov);
 SYSCALL_RET(n);
}

/*
 * Implement __sigaction14, versioned sigaction(2).
 */
static SYSCALL_IMPL(sc___sigaction14)
{
 uint32_t sig;
 uint32_t act;
 uint32_t oact;
 uint32_t flags;
 uint32_t handler;
 int i;
 EMSIGSET mask;

 sig = scarg(args,0);
 act = scarg(args,1);
 oact = scarg(args,2);
 if ((sig < 1) || (sig >= em__NSIG)) SYSCALL_ERR(em_EINVAL);
 if (act)
  { handler = mem_get_4(act);
    for (i=0;i<4;i++) mask.bits[i] = mem_get_4(act+4+(i<<2));
    flags = mem_get_4(act+20);
    if (flags & em_SA_ONSTACK)
     { printf("Signal stack support incomplete\n");
       top();
     }
  }
 if (oact)
  { mem_set_4(oact,s.sigh[sig].handler);
    for (i=0;i<4;i++) mem_set_4(oact+4+(i*4),s.sigh[sig].mask.bits[i]);
    mem_set_4(oact+20,s.sigh[sig].flags);
  }
 if (act)
  { switch (sig)
     { case em_SIGKILL:
       case em_SIGSTOP:
	  if (handler != em_SIG_DFL) SYSCALL_ERR(em_EINVAL);
	  break;
     }
    if (trc_if(TRC_SIGNAL))
     { FILE *f;
       int j;
       const char *pref;
       const char *sn;
       SYM *hsym;
       uint32_t fv;
       f = trc_f(TRC_SIGNAL);
       fprintf(f,"installing handler %08lx",(ULI)handler);
       switch (handler)
	{ case em_SIG_DFL: fprintf(f," (SIG_DFL)"); break;
	  case em_SIG_IGN: fprintf(f," (SIG_IGN)"); break;
	  case em_SIG_ERR: fprintf(f," (SIG_ERR)"); break;
	  default:
	     hsym = lookup_fxn(handler);
	     if (hsym) fprintf(f," (%s)",hsym->name);
	     break;
	}
       fprintf(f," for %lu",(ULI)sig);
       sn = em_signame(sig,0);
       if (sn) fprintf(f," (%s)",sn);
       fprintf(f,", mask ");
       print_em_sig_mask(f,&mask);
       fprintf(f," flags %08lx",(ULI)flags);
       pref = " (";
       fv = flags;
       for (j=0;sigaction_flags[j].name;j++)
	{ if (fv & sigaction_flags[j].bit)
	   { fprintf(f,"%s%s",pref,sigaction_flags[j].name);
	     pref = "|";
	     fv &= ~sigaction_flags[j].bit;
	   }
	}
       if (pref[0] != ' ')
	{ if (fv) fprintf(f,"|%lx",(ULI)fv);
	  fprintf(f,")");
	}
       else
	{ if (fv) fprintf(f," (unrecognized)");
	}
       fprintf(f,"\n");
     }
    s.sigh[sig].handler = handler;
    s.sigh[sig].mask = mask;
    s.sigh[sig].flags = flags;
    if ( (handler == em_SIG_IGN) ||
	 ((handler == em_SIG_DFL) && (sigdef[sig] == SIGDEF_IGNORE)) )
     { if (! (s.ignsigs & (1ULL << sig)))
	{ s.ignsigs |= 1ULL << sig;
	  if (handler == em_SIG_IGN)
	   { trc(TRC_SIGNAL,"emulator now setting %s ignored\n",em_signame(sig,"(unknown)"));
	     set_our_catcher(em2os_signal(sig),SIG_IGN);
	   }
	  else
	   { trc(TRC_SIGNAL,"emulator now setting %s default\n",em_signame(sig,"(unknown)"));
	     set_our_catcher(em2os_signal(sig),SIG_DFL);
	   }
	}
       s.sigpend[sig] = 0;
     }
    else
     { if (s.ignsigs & (1ULL << sig))
	{ s.ignsigs &= ~(1ULL << sig);
	  trc(TRC_SIGNAL,"emulator now catching %s\n",em_signame(sig,"(unknown)"));
	  set_our_catcher(em2os_signal(sig),&catch_signal);
	}
     }
  }
 SYSCALL_RET(0);
}

/*
 * Implement __sigprocmask14, versioned sigprocmask(2).
 */
static SYSCALL_IMPL(sc___sigprocmask14)
{
 uint32_t how;
 uint32_t set;
 uint32_t oset;
 uint64_t mask;

 how = scarg(args,0);
 set = scarg(args,1);
 oset = scarg(args,2);
 if (oset)
  { mem_set_4(oset,s.sigmask>>1);
    mem_set_4(oset+4,s.sigmask>>33);
    mem_set_4(oset+8,0);
    mem_set_4(oset+12,0);
  }
 if (set)
  { switch (how)
     { default:
	  SYSCALL_ERR(em_EINVAL);
	  break;
       case em_SIG_BLOCK:
       case em_SIG_UNBLOCK:
       case em_SIG_SETMASK:
	  break;
     }
    mask = mem_get_4(set);
    mask |= mem_get_4(set+4) * 0x100000000ULL;
    mask <<= 1;
    mask &= SIG_ALLMASK;
    switch (how)
     { case em_SIG_BLOCK:
	  s.sigmask |= mask;
	  break;
       case em_SIG_UNBLOCK:
	  s.sigmask &= ~mask;
	  break;
       case em_SIG_SETMASK:
	  s.sigmask = mask;
	  break;
     }
    s.sigmask &= SIG_CANBLOCK;
    if (trc_if(TRC_SIGNAL))
     { FILE *f;
       f = trc_f(TRC_SIGNAL);
       fprintf(f,"signal mask now ");
       if (s.sigmask == 0)
	{ fprintf(f,"empty");
	}
       else
	{ int i;
	  const char *pref;
	  pref = "";
	  mask = s.sigmask;
	  for (i=1;i<em__NSIG;i++)
	   { if ((mask >> i) & 1U)
	      { const char *n;
		n = em_signame(i,0);
		if (n) fprintf(f,"%s%s",pref,n); else fprintf(f,"%s?%d",pref,i);
		mask &= ~(((uint64_t)1) << i);
		pref = "|";
	      }
	   }
	  if (mask) fprintf(f,"+0x%llx",(ULLI)mask);
	}
       fprintf(f,"\n");
     }
    alert_run = 1;
  }
 SYSCALL_RET(0);
}

/*
 * Implement __sigsuspend14, versioned sigsuspend(2).
 *
 * There is a bit of a problem here.  On real hardware, the syscall
 *  returns and signal delivery happens immediately after (before
 *  userland can do much with the syscall return); the signal handler
 *  runs between the syscall returning to userland and the libc stub
 *  returning to its caller.
 *
 * But, here, simply doing this naïvely, calling deliver_signals()
 *  here, does not work.  When we do that, signal delivery saves - and
 *  restores - userland state that does not include the syscall return;
 *  syscall return then mangles the machine state set up by signal
 *  delivery.  Arguably ideal would be to return from here and let
 *  run()'s check for signals handle delivery.  But it's complicated
 *  enough to tell whether deliver_signals() would actually deliver
 *  anything that it's difficult to do.  So, instead, we replace the
 *  usual SYSCALL_ERR(em_EINTR) with frobbing of the first-saved signal
 *  state, so that when the first-delivered signal's handler returns it
 *  restores the return-to-userland state.  This is why deliver_signals
 *  takes an argument: so that we can fiddle it here.
 *
 * Also, there is a race here.  If implemented naïvely, as just
 *	 while (1)
 *	  { if (anysigpend && deliver_signals(&fp) && fp) break;
 *	    sigemptyset(&osmask);
 *	    sigsuspend(&osmask);
 *	  }
 *  then a signal could arrive between checking anysigpend and entering
 *  sigsuspend.  So we burn two more syscalls to block signals during
 *  the critical part of that test.
 */
static SYSCALL_IMPL(sc___sigsuspend14)
{
 uint32_t emset;
 uint64_t oldmask;
 sigset_t osmask;
 uint32_t fp;
 sigset_t osblock;
 sigset_t osold;
 EMSIGSET emmask;
 FILE *f;

 emset = scarg(args,0);
 oldmask = s.sigmask;
 emmask.bits[0] = mem_get_4(emset);
 emmask.bits[1] = mem_get_4(emset+4);
 emmask.bits[2] = mem_get_4(emset+8);
 emmask.bits[3] = mem_get_4(emset+12);
 f = trc_f(TRC_SIGNAL);
 if (f)
  { fprintf(f,"__sigsuspend14 entry, mask = ");
    print_em_sig_mask(f,&emmask);
    fprintf(f,"\n");
  }
 s.sigmask = ((emmask.bits[0] | (((uint64_t)emmask.bits[1]) << 32)) << 1) & SIG_CANBLOCK;
 trc(TRC_SIGNAL,"__sigsuspend14 entering loop\n");
 sigfillset(&osblock);
 sigemptyset(&osold); // XXX API botch
 sigprocmask(SIG_BLOCK,&osblock,&osold);
 while (1)
  { if (anysigpend)
     { trc(TRC_SIGNAL,"__sigsuspend14 noticing anysigpend\n");
       if (deliver_signals(&fp))
	{ trc(TRC_SIGNAL,"__sigsuspend14 deliver_signals returned true\n");
	  if (fp)
	   { trc(TRC_SIGNAL,"__sigsuspend14 breaking from loop\n");
	     break;
	   }
	}
     }
    sigemptyset(&osmask);
    trc(TRC_SIGNAL,"__sigsuspend14 calling underlying sigsuspend\n");
    sigsuspend(&osmask);
    trc(TRC_SIGNAL,"__sigsuspend14 underlying sigsuspend returned\n");
  }
 sigprocmask(SIG_SETMASK,&osold,0);
 trc(TRC_SIGNAL,"__sigsuspend14 out of loop\n");
 /*
  * Error return sets CC_C and leaves the errno in %o0, and we want to
  *  reset the signal mask to oldmask.  So, fiddle the
  *  delivered-signal saved state correspondingly.
  */
 mem_set_4(fp+20,oldmask>>1);		// sf.sf_sc.__sc_mask13
 mem_set_4(fp+36,mem_get_4(fp+36)|(em_PSR_CC_C<<em_PSR_CC_S)); // sf.sf_sc.sc_psr
 mem_set_4(fp+44,em_EINTR);		// sf.sf_sc.sc_o0
 mem_set_4(fp+48,oldmask>>1);		// sf.sf_sc.sc_mask, first word
 mem_set_4(fp+52,oldmask>>33);		// sf.sf_sc.sc_mask, second word
 rv->flags |= SCRV_BYPASS;
}

/*
 * Implement __sigreturn14, versioned sigreturn(2).
 *
 * There are subtleties here, mostly bearing the use of this in the
 *  implementation of longjmp.  On return, real hardware reloads the
 *  ins and locals (%i* and %l*) from the register save area based off
 *  the restored %sp.  This is important to get them right when jumping
 *  up the stack.  In a real kernel it happens as part of syscall
 *  return; here we have to do it manually.  But, in case the longjmp
 *  doesn't switch stack frames, we have to make sure the current
 *  values are in the register save area.
 *
 * But first, we want to flush all windows to the stack, so we can give
 *  a clean environment to the returned-to code.  We actually need to,
 *  for once, push the current window to the stack too, in case that's
 *  the window being restored.  The kernel gets this for free, because
 *  the syscall implementation runs in a window inner to the innermost
 *  user window.
 */
static SYSCALL_IMPL(sc___sigreturn14)
{
 uint32_t ctx;
 uint32_t pc;
 uint32_t npc;
 uint32_t psr;
 uint32_t g1;
 uint32_t o0;
 uint32_t sp;
 uint32_t maskl;
 uint32_t maskh;
 int i;

 window_flush(); // does a save_cwindow()
 spill_window(s.cwp);
 s.iwp = cwp_r(s.cwp);
 ctx = scarg(args,0);
 trc(TRC_SYSCALL,"__sigreturn14 restoring from ctx=%08lx\n",(ULI)ctx);
 sp = mem_get_4(ctx+8);
 pc = mem_get_4(ctx+12);
 npc = mem_get_4(ctx+16);
 psr = mem_get_4(ctx+20);
 g1 = mem_get_4(ctx+24);
 o0 = mem_get_4(ctx+28);
 maskl = mem_get_4(ctx+32);
 maskh = mem_get_4(ctx+36);
 if ((pc | npc) & 3) SYSCALL_ERR(em_EINVAL);
 s.cc = psr_to_cc(psr);
 if (psr & em_PSR_EF) s.flags |= SF_FPU;
 s.pc = pc;
 s.npc = npc;
 s.regs[R_G1] = g1;
 s.regs[R_O0] = o0;
 s.regs[R_SP] = sp;
 // no sigstack; on-stack delivery not implemented
 s.sigmask = (((((uint64_t)maskh) << 32) | maskl) << 1) & SIG_CANBLOCK;
 if (trc_if(TRC_SYSCALL))
  { FILE *f;
    f = trc_f(TRC_SYSCALL);
    fprintf(f,"__sigreturn14 restored pc %08lx npc %08lx g1 %08lx o0 %08lx sp %08lx cc ",
	(ULI)s.pc,(ULI)s.npc,
	(ULI)s.regs[R_G1],
	(ULI)s.regs[R_O0],
	(ULI)s.regs[R_SP]);
    print_cc(f,s.cc);
    fprintf(f,"\n");
  }
 for (i=0;i<8;i++) s.regs[R_L0+i] = mem_get_4(sp+(i*4));
 for (i=0;i<8;i++) s.regs[R_I0+i] = mem_get_4(sp+32+(i*4));
 rv->flags |= SCRV_BYPASS;
}

/*
 * Implement __getcwd, the syscall behind getcwd(3).
 *
 * __getcwd's API is not documented (I got it by UTSLing); I see no
 *  good way to implement it in terms of getcwd(3), so we call the
 *  underlying OS's __getcwd().  Ugh.
 *
 * __getcwd has no declaration visible outside libc.  So, we declare it
 *  ourselves.  Double ugh.
 */
extern int __getcwd(char *, size_t);
static SYSCALL_IMPL(sc___getcwd)
{
 uint32_t bufp;
 uint32_t len;
 char *osbuf;
 int e;

 bufp = scarg(args,0);
 len = scarg(args,1);
 osbuf = malloc(len?:1);
 if (! osbuf)
  { printf("Out of memory allocating __getcwd() buffer\n");
    top();
  }
 e = __getcwd(osbuf,len);
 if (e < 0)
  { e = errno;
    free(osbuf);
    SYSCALL_ERR(os2em_errno(e));
  }
 if (e > len) panic("impossible __getcwd return");
 copyout(osbuf,bufp,e,"__getcwd",&free,osbuf);
 free(osbuf);
 SYSCALL_RET(e);
}

#define F(n) [em_SYS_##n] = &sc_##n
static void (*sysent_fn[])(SCARGS *, SCRV *)
 = { F(exit),					// 1
     F(fork),					// 2
     F(read),					// 3
     F(write),					// 4
     F(open),					// 5
     F(close),					// 6
     F(wait4),					// 7
     F(link),					// 9
     F(unlink),					// 10
     F(chdir),					// 12
     F(fchdir),					// 13
     F(chmod),					// 15
     F(chown),					// 16
     F(break),					// 17
     F(getfsstat),				// 18
     F(getpid),					// 20
     F(setuid),					// 23
     F(getuid),					// 24
     F(geteuid),				// 25
     F(recvfrom),				// 29
     F(access),					// 33
     F(fchflags),				// 35
     F(kill),					// 37
     F(getppid),				// 39
     F(dup),					// 41
     F(pipe),					// 42
     F(getegid),				// 43
     F(getgid),					// 47
     F(__getlogin),				// 49
     F(ioctl),					// 54
     F(revoke),					// 56
     F(symlink),				// 57
     F(readlink),				// 58
     F(execve),					// 59
     F(umask),					// 60
     F(munmap),					// 73
     F(madvise),				// 75
     F(getgroups),				// 79
     F(setgroups),				// 80
     F(getpgrp),				// 81
     F(setpgid),				// 82
     F(setitimer),				// 83
     F(dup2),					// 90
     F(fcntl),					// 92
     F(select),					// 93
     F(fsync),					// 95
     F(setpriority),				// 96
     F(socket),					// 97
     F(connect),				// 98
     F(getpriority),				// 100
     F(setsockopt),				// 105
     F(gettimeofday),				// 116
     F(getrusage),				// 117
     F(getsockopt),				// 118
     F(readv),					// 120
     F(writev),					// 121
     F(fchown),					// 123
     F(fchmod),					// 124
     F(rename),					// 128
     F(flock),					// 131
     F(mkfifo),					// 132
     F(sendto),					// 133
     F(socketpair),				// 135
     F(mkdir),					// 136
     F(rmdir),					// 137
     F(utimes),					// 138
     F(statfs),					// 157
     F(fstatfs),				// 158
     F(pread),					// 173
     F(pwrite),					// 174
     F(setgid),					// 181
     F(setegid),				// 182
     F(seteuid),				// 183
     F(getrlimit),				// 194
     F(setrlimit),				// 195
     F(mmap),					// 197
     F(lseek),					// 199
     F(ftruncate),				// 201
     F(__sysctl),				// 202
     F(futimes),				// 206
     F(poll),					// 209
     F(nanosleep),				// 240
     F(getdents),				// 272
     F(lchmod),					// 274
     F(lchown),					// 275
     F(lutimes),				// 276
     F(__stat13),				// 278
     F(__fstat13),				// 279
     F(__lstat13),				// 280
     F(__sigaltstack14),			// 281
     F(__vfork14),				// 282
     F(preadv),					// 289
     F(pwritev),				// 290
     F(__sigaction14),				// 291
     F(__sigprocmask14),			// 293
     F(__sigsuspend14),				// 294
     F(__sigreturn14),				// 295
     F(__getcwd),				// 296
     [0] = 0 };
#undef F

/*
 * Perform a syscall.  This is called when a trap instruction specifies
 *  the code that means "do a syscall".  We set up an SCARGS for any
 *  arguments, extract the call number, handle __syscall here (we don't
 *  cascade __syscall references), trace the arguments, perform the
 *  call, trace the return values or error, and return by whichever
 *  method is appropriate.
 */
static void dosyscall(uint32_t id)
{
 SCARGS args;
 uint32_t callno;
 SCRV rv;
 uint32_t g2;
 uint32_t g7;
 void (*fn)(SCARGS *, SCRV *);
 FILE *f;

 g2 = s.regs[R_G2];
 g7 = s.regs[R_G7];
 args.nreg = 6;
 args.regs[0] = s.regs[R_O0];
 args.regs[1] = s.regs[R_O1];
 args.regs[2] = s.regs[R_O2];
 args.regs[3] = s.regs[R_O3];
 args.regs[4] = s.regs[R_O4];
 args.regs[5] = s.regs[R_O5];
 args.sp = s.regs[R_SP];
 callno = id & ~(em_SYSCALL_G2RFLAG | em_SYSCALL_G7RFLAG);
 trc(TRC_SYSCALL,"(%llu) syscall CALL %d (",s.instrs,callno);
 if (callno == em_SYS___syscall)
  { callno = args.regs[1];
    args.regs[0] = args.regs[2];
    args.regs[1] = args.regs[3];
    args.regs[2] = args.regs[4];
    args.regs[3] = args.regs[5];
    args.nreg = 4;
    trc(TRC_SYSCALL,"__syscall -> %d (",callno);
    if ((callno >= nsysent) || !sysent[callno].name)
     { trc(TRC_SYSCALL,"?)");
     }
    else
     { trc(TRC_SYSCALL,"%s)",sysent[callno].name);
     }
  }
 else
  { if ((callno >= nsysent) || !sysent[callno].name)
     { trc(TRC_SYSCALL,"?");
     }
    else
     { trc(TRC_SYSCALL,"%s",sysent[callno].name);
     }
  }
 f = trc_f(TRC_SYSCALL);
 if (f)
  { fprintf(f,") (");
    print_syscall_values(f,sysent[callno].args,&args);
    fprintf(f,")\n");
  }
 rv.err = 0;
 rv.flags = ((id & em_SYSCALL_G2RFLAG) ? SCRV_G2R : 0) | ((id & em_SYSCALL_G7RFLAG) ? SCRV_G7R : 0);
 if (callno >= nsysent)
  { printf("Unknown syscall %08lx\n",(ULI)callno);
    top();
  }
 fn = sysent_fn[callno];
 if (! fn)
  { printf("Unknown syscall %08lx\n",(ULI)callno);
    top();
  }
 syscall_restartable = 0;
 (*fn)(&args,&rv);
 if (rv.flags & SCRV_BYPASS)
  { trc(TRC_SYSCALL,"(%llu) syscall BYPASS\n",s.instrs);
    return;
  }
 trc(TRC_SYSCALL,"(%llu) syscall RET ",s.instrs);
 if (rv.err == 0)
  { f = trc_f(TRC_SYSCALL);
    if (f)
     { fprintf(f,"success");
       if (sysent[callno].rv[0] != 'V')
	{ SCARGS a;
	  a.regs[0] = rv.rv;
	  a.regs[1] = rv.rv2;
	  a.sp = 0;
	  a.nreg = 2;
	  fprintf(f," ");
	  print_syscall_values(f,sysent[callno].rv,&a);
	}
       fprintf(f,", returning to ");
     }
    if (rv.flags & SCRV_G2R)
     { s.pc = g2;
       s.npc = s.pc + 4;
       trc(TRC_SYSCALL,"%%g2\n");
     }
    else if (rv.flags & SCRV_G7R)
     { s.pc = g7;
       s.npc = s.pc + 4;
       trc(TRC_SYSCALL,"%%g7\n");
     }
    else
     { s.cc &= ~CC_C;
       trc(TRC_SYSCALL,"pc/npc\n");
     }
    if (rv.flags & SCRV_RVSET) s.regs[R_O0] = rv.rv;
    if (rv.flags & SCRV_RV2SET) s.regs[R_O1] = rv.rv2;
  }
 else
  { if ((rv.err == em_EINTR) && syscall_restartable)
     { trc(TRC_SYSCALL,"restartable syscall showing EINTR\n");
       s.npc = s.pc;
       s.pc = s.xa;
       s.flags |= SF_SIGRESTART;
       alert_run = 1;
       return;
     }
    trc(TRC_SYSCALL,"error %lu (%s)\n",(ULI)rv.err,em_strerror(rv.err));
    // Must match no-SA_RESTART code in deliver_signals()
    s.regs[R_O0] = rv.err;
    s.cc |= CC_C;
  }
}

/*
 * Implement a trap: either an unconditional trap or a taken
 *  conditional trap.
 */
static void trap(uint32_t arg)
{
 switch (arg)
  { case 0: // syscall
       dosyscall(s.regs[R_G1]);
       break;
    case 3: // flush windows
       window_flush();
       break;
    case 0x30: // get %wim stuff
#if NWINDOWS > 32
#error "Trap 0x30 assumes NWINDOWS is at most 32"
#endif
       s.regs[R_O0] = ((uint32_t)1) << s.iwp;
#if NWINDOWS == 32
       s.regs[R_O1] = ~(uint32_t)0;
#else
       s.regs[R_O1] = ~((~(uint32_t)0)<<NWINDOWS);
#endif
       s.regs[R_O2] = build_psr();
       break;
    case 0x7f:
       emu_malloc();
       break;
    case 0x7e:
       emu_free();
       break;
    case 0x7d:
       emu_realloc();
       break;
    case 0x7c:
       emu_calloc();
       break;
    case 0x7b:
       native_exec();
       break;
    case 0x7a:
    case 0x79:
    case 0x78:
    case 0x77:
    case 0x76:
    case 0x75:
    case 0x74:
    case 0x73:
    case 0x72:
    case 0x71:
    case 0x70:
    default:
       printf("Unknown trap code %08lx\n",(ULI)arg);
       top();
       break;
  }
}

/*
 * Implement a trap instruction.
 */
static void ctrap(int cond, uint32_t arg)
{
 if ((conds[cond&15] >> (s.cc & 15)) & 1) trap(arg);
}

/*
 * Implement mulscc.  The arguments are the DREG number, the SREG1
 *  number, and the value from I, SIMM13, and/or regs[SREG2].
 */
static void mulscc(int dr, int sr1, uint32_t v)
{
 uint32_t t;
 uint32_t t2;

 t2 = s.regs[sr1] & 1;
 t = s.regs[sr1] >> 1;
 switch (s.cc & (CC_N | CC_V))
  { case 0:
    case CC_N | CC_V:
       break;
    case CC_N:
    case CC_V:
       t |= 0x80000000;
       break;
  }
 s.regs[dr] = addcc(t,(s.y&1)?v:0);
 s.y = (s.y >> 1) | (t2 << 31);
}

/*
 * Check that a register number is even (eg, for ldd/std).  If not,
 *  handle it as an unimplemented instruction.
 */
static void even_regno(int reg, uint32_t inst)
{
 if (reg & 1)
  { unimp(s.xa,inst);
    top();
  }
}

/*
 * Take a single, v, and pull it apart into an FPNUM.
 */
static FPNUM crack_single(uint32_t v)
{
 int e;
 FPNUM n;

 n.raw = v;
 e = (v >> MANTBITS_S) & 0x1ff;
 n.sign = (v >> 31) & 1;
 n.bexp = e;
 n.exp = e - EXPBIAS_S;
 n.mant = (v & ((1ULL << MANTBITS_S) - 1));
 switch (e)
  { case 0:
       n.kind = n.mant ? FPK_DENORM : FPK_ZERO;
       break;
    case MAXBEXP_S:
       n.kind = n.mant ? FPK_NaN : FPK_INFTY;
       if (0)
	{
    default:
	  n.kind = FPK_NORMAL;
	}
       n.mant |= 1ULL << MANTBITS_S;
       break;
  }
 return(n);
}

/*
 * Take a double, v, and pull it apart into an FPNUM.
 */
static FPNUM crack_double(uint64_t v)
{
 int e;
 FPNUM n;

 n.raw = v;
 e = (v >> MANTBITS_D) & 0x7ff;
 n.sign = (v >> 63) & 1;
 n.bexp = e;
 n.exp = e - EXPBIAS_D;
 n.mant = (v & ((1ULL << MANTBITS_D) - 1));
 switch (e)
  { case 0:
       n.kind = n.mant ? FPK_DENORM : FPK_ZERO;
       break;
    case MAXBEXP_D:
       n.kind = n.mant ? FPK_NaN : FPK_INFTY;
       if (0)
	{
    default:
	  n.kind = FPK_NORMAL;
	}
       n.mant |= 1ULL << MANTBITS_D;
       break;
  }
 return(n);
}

/*
 * Take a single represented as an FPNUM and collapse it back into a
 *  32-bit single.  Both exp and bexp must be set correctly (this code
 *  may use whichever it finds more convenient), but raw need not.
 *  kind must be correct.
 */
static uint32_t merge_single(FPNUM n)
{
 switch (n.kind)
  { default:
       abort();
       break;
    case FPK_NORMAL:
       return( (n.sign ? 0x80000000 : 0) |
	       (((uint32_t)n.bexp) << 23) |
	       (n.mant & 0x007fffff) );
       break;
    case FPK_ZERO:
       return(n.sign?0x80000000:0);
       break;
    case FPK_DENORM:
       return( (n.sign ? 0x80000000 : 0) | n.mant );
       break;
    case FPK_INFTY:
       return(n.sign?0xff800000:0x7f800000);
       break;
    case FPK_NaN:
       return( (n.sign ? 0xff800000 : 0x7f800000) |
	       (n.mant & 0x007fffff) );
       break;
  }
}

/*
 * Take a double represented as an FPNUM and collapse it back into a
 *  64-bit double.  Both exp and bexp must be set correctly (this code
 *  may use whichever it finds more convenient), but raw need not.
 *  kind must be correct.
 */
static uint64_t merge_double(FPNUM n)
{
 switch (n.kind)
  { default:
       abort();
       break;
    case FPK_NORMAL:
       return( (n.sign ? 0x8000000000000000ULL : 0) |
	       (((uint64_t)n.bexp) << 52) |
	       (n.mant & 0x000fffffffffffffULL) );
       break;
    case FPK_ZERO:
       return(n.sign?0x8000000000000000ULL:0);
       break;
    case FPK_DENORM:
       return( (n.sign ? 0x8000000000000000ULL : 0) | n.mant );
       break;
    case FPK_INFTY:
       return(n.sign?0xfff0000000000000ULL:0x7ff0000000000000ULL);
       break;
    case FPK_NaN:
       return( (n.sign ? 0xfff0000000000000ULL : 0x7ff0000000000000ULL) |
	       (n.mant & 0x000fffffffffffffULL) );
       break;
  }
}

/*
 * Dump out a single FPNUM, in a form appropriate for tracing.
 */
static void dump_single(FILE *to, FPNUM n, int rawvalid)
{
 uint32_t hosti;
 float hostf;

 if (rawvalid) fprintf(to,"%08llx",(ULLI)n.raw);
 fprintf(to,"<k=");
 switch (n.kind)
  { case FPK_NORMAL: fprintf(to,"N"); break;
    case FPK_ZERO:   fprintf(to,"Z"); break;
    case FPK_DENORM: fprintf(to,"D"); break;
    case FPK_INFTY:  fprintf(to,"I"); break;
    case FPK_NaN:    fprintf(to,"X"); break;
    default: panic("impossible number kind %d in %s",(int)n.kind,__func__); break;
  }
 fprintf(to,",s=%d,e=%d(%d),m=",n.sign,n.bexp,n.exp);
 switch (n.kind)
  { case FPK_NORMAL:
       if ((n.mant & 0xff800000) != 0x00800000) panic("impossible normal hidden bit");
       fprintf(to,"(1.)");
       break;
    case FPK_DENORM:
       fprintf(to,"(0.)");
       /* fall through */
    default:
       if ((n.mant & 0xff800000) != 0) panic("impossible mantissa high bits");
       break;
  }
 fprintf(to,"%06llx",(ULLI)(n.mant&0x007fffff));
 hosti = merge_single(n);
 bcopy(&hosti,&hostf,4);
 fprintf(to,":%.8g>",hostf);
}

/*
 * Dump out a double FPNUM, in a form appropriate for tracing.
 */
static void dump_double(FILE *to, FPNUM n, int rawvalid)
{
 uint64_t hosti;
 double hostd;

 if (rawvalid) fprintf(to,"%016llx",(ULLI)n.raw);
 fprintf(to,"<k=");
 switch (n.kind)
  { case FPK_NORMAL: fprintf(to,"N"); break;
    case FPK_ZERO:   fprintf(to,"Z"); break;
    case FPK_DENORM: fprintf(to,"D"); break;
    case FPK_INFTY:  fprintf(to,"I"); break;
    case FPK_NaN:    fprintf(to,"X"); break;
    default: panic("impossible number kind %d in %s",(int)n.kind,__func__); break;
  }
 fprintf(to,",s=%d,e=%d(%d),m=",n.sign,n.bexp,n.exp);
 switch (n.kind)
  { case FPK_NORMAL:
       if ((n.mant & 0xfff0000000000000ULL) != 0x0010000000000000ULL) panic("impossible normal hidden bit");
       fprintf(to,"(1.)");
       break;
    case FPK_DENORM:
       fprintf(to,"(0.)");
       /* fall through */
    default:
       if ((n.mant & 0xfff0000000000000ULL) != 0) panic("impossible mantissa high bits");
       break;
  }
 fprintf(to,"%013llx",(ULLI)(n.mant&0x000fffffffffffffULL));
 hosti = merge_double(n);
 bcopy(&hosti,&hostd,8);
 fprintf(to,":%.16g>",hostd);
}

/*
 * Take action appropriate on encountering a trapping NaN (ie, a
 *  signaling NaN or, for instructions like fcmpe*, any NaN).
 */
static void fp_nan_s(const char *iname, uint32_t num)
{
 printf("%s encountered NaN %08lx\n",iname,(ULI)num);
 top();
}

/*
 * Take action appropriate on encountering a trapping NaN (ie, a
 *  signaling NaN or, for instructions like fcmpe*, any NaN).
 */
static void fp_nan_d(const char *iname, uint64_t num)
{
 printf("%s encountered NaN %016llx\n",iname,(ULLI)num);
 top();
}

/*
 * Handle the can't-happen of discovering an impossible value in an
 *  FPNUM's kind field.
 */
static void badfpkind(FPKIND) __attribute__((__noreturn__));
static void badfpkind(FPKIND k)
{
 panic("impossible FP kind %d",(int)k);
}

/*
 * If n is a signaling NaN, or if alltrap is true and it's a quiet NaN,
 *  and FPU invalid exceptions are enabled, trap, reporting iname as
 *  the instruction name.
 */
static void nantrap_s(FPNUM n, int alltrap, const char *iname)
{
#if 1
 // We currently have TEM permanently all 0, so the FPU never traps.
 (void)n;
 (void)alltrap;
 (void)iname;
 (void)&fp_nan_s;
#else
 if ( (n.kind == FPK_NaN) &&
      (alltrap || SIGNAN_S(n.mant)) ) fp_nan_s(iname,n.raw);
#endif
}

/*
 * If n is a signaling NaN, or if alltrap is true and it's a quiet NaN,
 *  and FPU invalid exceptions are enabled, trap, reporting iname as
 *  the instruction name.
 */
static void nantrap_d(FPNUM n, int alltrap, const char *iname)
{
#if 1
 // We currently have TEM permanently all 0, so the FPU never traps.
 (void)n;
 (void)alltrap;
 (void)iname;
 (void)&fp_nan_d;
#else
 if ( (n.kind == FPK_NaN) &&
      (alltrap || SIGNAN_D(n.mant)) ) fp_nan_d(iname,n.raw);
#endif
}

/*
 * If n is a NaN, return it converted into a quiet NaN.  If not, the
 *  call is erroneous and it doesn't matter what happens.
 */
static FPNUM quiet_nan_s(FPNUM n)
{
 n.mant |= 1ULL << (MANTBITS_S-1);
 return(n);
}

/*
 * If n is a NaN, return it converted into a quiet NaN.  If not, the
 *  call is erroneous and it doesn't matter what happens.
 */
static FPNUM quiet_nan_d(FPNUM n)
{
 n.mant |= 1ULL << (MANTBITS_D-1);
 return(n);
}

/*
 * Return the NaN that's produced for invalid operations such as
 *  sqrt(-1) or 0÷0, as a single.
 */
static FPNUM gen_nan_s(void)
{
 return((FPNUM){ .sign = 0,
		 .bexp = MAXBEXP_S,
		 .exp = MAXBEXP_S - EXPBIAS_S,
		 .mant = (1ULL << MANTBITS_S) - 1,
		 .kind = FPK_NaN });
}

/*
 * Return the NaN that's produced for invalid operations such as
 *  sqrt(-1) or 0÷0, as a double.
 */
static FPNUM gen_nan_d(void)
{
 return((FPNUM){ .sign = 0,
		 .bexp = MAXBEXP_D,
		 .exp = MAXBEXP_D - EXPBIAS_D,
		 .mant = (1ULL << MANTBITS_D) - 1,
		 .kind = FPK_NaN });
}

/*
 * Return infinity, taking the sign from the argument.
 */
static FPNUM gen_infty_s(int sign)
{
 return((FPNUM){ .sign = sign,
		 .bexp = MAXBEXP_S,
		 .exp = MAXBEXP_S - EXPBIAS_S,
		 .mant = 0,
		 .kind = FPK_INFTY });
}

/*
 * Return infinity, taking the sign from the argument.
 */
static FPNUM gen_infty_d(int sign)
{
 return((FPNUM){ .sign = sign,
		 .bexp = MAXBEXP_D,
		 .exp = MAXBEXP_D - EXPBIAS_D,
		 .mant = 0,
		 .kind = FPK_INFTY });
}

/*
 * Return single-precision zero, taking the sign from the argument.
 */
static FPNUM gen_zero_s(int sign)
{
 return((FPNUM){ .sign = sign,
		 .bexp = 0,
		 .exp = -EXPBIAS_S,
		 .mant = 0,
		 .kind = FPK_ZERO });
}

/*
 * Return double-precision zero, taking the sign from the argument.
 */
static FPNUM gen_zero_d(int sign)
{
 return((FPNUM){ .sign = sign,
		 .bexp = 0,
		 .exp = -EXPBIAS_D,
		 .mant = 0,
		 .kind = FPK_ZERO });
}

/*
 * Return lg(n) rounded down to the nearest integer.  Equivalently,
 *  return the number of bits that n must be shifted right to yield 1.
 *
 * An argument of 0 is invalid and yields a panic().
 */
static unsigned int lg(uint64_t v)
{
 int n;

 if (v == 0) panic("lg(0)");
 n = 0;
 if (v & 0xffffffff00000000ULL)
  { v >>= 32;
    n += 32;
  }
 if (v & 0xffff0000ULL)
  { v >>= 16;
    n += 16;
  }
 if (v & 0xff00ULL)
  { v >>= 8;
    n += 8;
  }
 if (v & 0xf0ULL)
  { v >>= 4;
    n += 4;
  }
 return(n+((0x3333333322221100ULL>>(v*4))&3));
}

/*
 * 64 * 64 -> 128 multiply.  We do it in 32-bit pieces:
 *
 *	                  ---32--- ---32--- a
 *	                x ---32--- ---32--- b
 *	                 -------------------
 *	                  ---32--- ---32--- P1
 *	         ---32--- ---32---          P2
 *	         ---32--- ---32---          P3
 *	---32--- ---32---                   P4
 *	------------------------------------
 *	---32--- ---32--- ---32--- ---32--- product
 */
static INT128 mul_64_64_128(uint64_t a, uint64_t b)
{
 INT128 p;
 uint64_t t;

#define LOW32(x) ((uint64_t)(uint32_t)(x))
#define HIGH32(x) ((x) >> 32)
 p.l = LOW32(a) * LOW32(b); // P1
 t = HIGH32(a) * LOW32(b); // P2
 p.h = HIGH32(t);
 t = LOW32(t) + HIGH32(p.l);
 p.l = (LOW32(t) << 32) | LOW32(p.l);
 p.h += HIGH32(t); // p now holds P1 + P2
 t = LOW32(a) * HIGH32(b); // P3
 p.h += HIGH32(t);
 t = LOW32(t) + HIGH32(p.l);
 p.l = (LOW32(t) << 32) | LOW32(p.l);
 p.h += HIGH32(t); // p now holds P1 + P2 + P3
 p.h += HIGH32(a) * HIGH32(b); // compute P4 and product
#undef LOW32
#undef HIGH32
 return(p);
}

/*
 * 128 / 64 -> 64 divide, numerator split into two arguments.
 */
static uint64_t div_128s_64(uint64_t nh, uint64_t nl, uint64_t d)
{
 uint64_t q;
 int nlg;
 int dlg;
 uint64_t sh;
 uint64_t sl;
 int qbit;

 if (! d) panic("128/64 divide by zero");
 if (! nh) return(nl/d);
 nlg = lg(nh) + 64;
 dlg = lg(d);
 // Align high bits
 if (nlg-dlg >= 64)
  { sh = (d << (nlg-dlg-64));
    sl = 0;
  }
 else
  { sh = (d >> (64-(nlg-dlg)));
    sl = (d << (nlg-dlg));
  }
 q = 0;
 for (qbit=nlg-dlg;qbit>=0;qbit--)
  { q <<= 1;
    if ((nh > sh) || ((nh == sh) && (nl >= sl)))
     { q |= 1;
       /*
	* Conceptually, this is
	*	if (nl < sl)
	*		nh -= sh + 1;
	*		nl = nl + (1<<64) - sl;
	*	else
	*		nh -= sh;
	*		nl -= sl;
	* This code depends on unsigned subtraction to wrap around when
	*  subtracting sl from nl.
	*/
       nh -= sh + (nl < sl);
       nl -= sl;
     }
    sl = ((sh & 1) << 63) | (sl >> 1);
    sh >>= 1;
  }
 return(q);
}

/*
 * Convert a single to an int.  "The result is always rounded toward
 *  zero (the RD field of the FSR register is ignored)".
 */
static void fstoi(uint32_t inst)
{
 int sr;
 FPNUM v;
 unsigned int iv;

 s.flags |= SF_FPU;
 sr = SREG2(inst);
 v = crack_single(s.fregs[sr]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"fstoi input (%%f%d) = ",sr);
    dump_single(f,v,1);
    fprintf(f,"\n");
  }
 nantrap_s(v,0,"fstoi");
 if (v.exp < 0)
  { // All such numbers are less than 1 in absolute value,
    // including denormals and zeros.  So....
    trc(TRC_FP,"fstoi output (%%f%d) = 0\n",DREG(inst));
    s.fregs[DREG(inst)] = 0;
    return;
  }
 if (v.exp < 31)
  { iv = v.mant >> (23 - v.exp);
    if (v.sign) iv = - iv;
  }
 else
  { // Overflow.  This behaviour (including what it does for infinities
    // and NaNs) is what I see on a real SS20, and is what is specced.
    // -2147483648 does not overflow; could special case it, but why?
    // This code generates the right thing for it.
    iv = v.sign ? 0x80000000 : 0x7fffffff;
    trc(TRC_FP,"fstoi overflow\n");
  }
 trc(TRC_FP,"fstoi output (%%f%d) = 0x%08x %lld %u\n",DREG(inst),iv,(iv&0x80000000)?-(LLI)((1ULL<<32)-(ULLI)iv):(LLI)(ULLI)iv,iv);
 s.fregs[DREG(inst)] = iv;
}

/*
 * Convert a double to an int.  "The result is always rounded toward
 *  zero (the RD field of the FSR register is ignored)".
 */
static void fdtoi(uint32_t inst)
{
 int sr;
 FPNUM v;
 unsigned int iv;

 s.flags |= SF_FPU;
 sr = SREG2(inst);
 even_regno(sr,inst);
 v = crack_double((((uint64_t)s.fregs[sr]) << 32) | s.fregs[sr+1]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"fdtoi input (%%f%d/%%f%d) = ",sr,sr+1);
    dump_double(f,v,1);
    fprintf(f,"\n");
  }
 nantrap_d(v,0,"fdtoi");
 if (v.exp < 0)
  { // All such numbers are less than 1 in absolute value,
    // including denormals and zeros.  So....
    trc(TRC_FP,"fdtoi output (%%f%d) = 0\n",DREG(inst));
    s.fregs[DREG(inst)] = 0;
    return;
  }
 if (v.exp < 31)
  { iv = v.mant >> (52 - v.exp);
    if (v.sign) iv = - iv;
  }
 else
  { // Overflow.  This behaviour (including what it does for infinities
    // and NaNs) is what I see on a real SS20, and is what is specced.
    // -2147483648 does not overflow; could special case it, but why?
    // This code generates the right thing for it.
    iv = v.sign ? 0x80000000 : 0x7fffffff;
    trc(TRC_FP,"fdtoi overflow\n");
  }
 trc(TRC_FP,"fdtoi output (%%f%d) = 0x%08x %lld %u\n",DREG(inst),iv,(iv&0x80000000)?-(LLI)((1ULL<<32)-(ULLI)iv):(LLI)(ULLI)iv,iv);
 s.fregs[DREG(inst)] = iv;
}

/*
 * Convert an int to a double.  This is simple and cannot fail.
 */
static void fitod(uint32_t inst)
{
 FPNUM v;
 uint32_t i;
 uint64_t r;

 s.flags |= SF_FPU;
 even_regno(DREG(inst),inst);
 i = s.fregs[SREG2(inst)];
 trc(TRC_FP,"fitod input (%%f%d) = 0x%08x %lld %u\n",SREG2(inst),i,(i&0x80000000)?-(LLI)((1ULL<<32)-(ULLI)i):(LLI)(ULLI)i,i);
 if (i == 0)
  { r = 0;
    trc(TRC_FP,"fitod zero\n");
  }
 else
  { if (i & 0x80000000)
     { i = - i;
       v.sign = 1;
     }
    else
     { v.sign = 0;
     }
    v.exp = lg(i);
    v.bexp = v.exp + EXPBIAS_D;
    v.mant = ((uint64_t)i) << (52 - v.exp);
    v.kind = FPK_NORMAL;
    r = merge_double(v);
    if (trc_if(TRC_FP))
     { FILE *f;
       f = trc_f(TRC_FP);
       fprintf(f,"fitod double = ");
       dump_double(f,v,0);
       fprintf(f,"\n");
     }
  }
 trc(TRC_FP,"fitod result (%%f%d/%%f%d) = %08llx/%08llx\n",DREG(inst),DREG(inst)+1,(ULLI)(r>>32),(ULLI)(r&0xffffffffULL));
 s.fregs[DREG(inst)] = r >> 32;
 s.fregs[DREG(inst)+1] = r;
}

/*
 * Convert a single to a double.  This is simple and cannot fail.
 */
static void fstod(uint32_t inst)
{
 FPNUM v;
 int i;
 uint64_t r;

 s.flags |= SF_FPU;
 even_regno(DREG(inst),inst);
 v = crack_single(s.fregs[SREG2(inst)]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"fstod input (%%f%d) = ",SREG2(inst));
    dump_single(f,v,1);
    fprintf(f,"\n");
  }
 switch (v.kind)
  { default:
       badfpkind(v.kind);
       break;
    case FPK_DENORM:
       i = lg(v.mant);
       v.mant <<= 23 - i;
       v.exp -= 23 - i;
       /* fall through */
    case FPK_NORMAL:
       v.bexp = v.exp + EXPBIAS_D;
       v.mant <<= 52 - 23;
       break;
    case FPK_ZERO:
       v = gen_zero_d(v.sign);
       break;
    case FPK_INFTY:
       v = gen_infty_d(v.sign);
       break;
    case FPK_NaN:
       v.mant <<= 52 - 23;
       v = quiet_nan_d(v);
       break;
  }
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"fstod producing ");
    dump_double(f,v,0);
    fprintf(f,"\n");
  }
 r = merge_double(v);
 trc(TRC_FP,"fstod result (%%f%d/%%f%d) = %08llx/%08llx\n",DREG(inst),DREG(inst)+1,(ULLI)(r>>32),(ULLI)(r&0xffffffffULL));
 s.fregs[DREG(inst)] = r >> 32;
 s.fregs[DREG(inst)+1] = r;
}

/*
 * Convert a double to a single.
 */
static void fdtos(uint32_t inst)
{
 int sr;
 FPNUM v;
 uint32_t r;

 s.flags |= SF_FPU;
 sr = SREG2(inst);
 even_regno(sr,inst);
 v = crack_double((((uint64_t)s.fregs[sr]) << 32) | s.fregs[sr+1]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"fdtos input (%%f%d/%%f%d) = ",sr,sr+1);
    dump_double(f,v,1);
    fprintf(f,"\n");
  }
 switch (v.kind)
  { default:
       badfpkind(v.kind);
       break;
    case FPK_DENORM:
       // Any double denorm is well below the min single.
       v = gen_zero_s(v.sign);
       break;
    case FPK_NORMAL:
       if ((v.mant & 0x10000000ULL) && (v.mant & 0x5fffffffULL))
	{ v.mant += 0x10000000ULL;
	  if (v.mant & 0x0020000000000000ULL)
	   { v.mant >>= 1;
	     v.exp ++;
	   }
	}
       if (v.exp < 1-EXPBIAS_S)
	{ // Denormal - or zero
	  printf("%s: denormal or zero\n",__func__);
	  top();
	}
       if (v.exp > MAXBEXP_S-1-EXPBIAS_S)
	{ // Overflow!
	  trc(TRC_FP,"overflow\n");
	  v = gen_infty_s(v.sign);
	}
       v.bexp = v.exp + EXPBIAS_S;
       v.mant >>= 52 - 23;
       break;
    case FPK_ZERO:
       v = gen_zero_d(v.sign);
       break;
    case FPK_INFTY:
       v = gen_infty_d(v.sign);
       break;
    case FPK_NaN:
       v.mant <<= 52 - 23;
       v = quiet_nan_d(v);
       break;
  }
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"fdtos producing ");
    dump_single(f,v,0);
    fprintf(f,"\n");
  }
 r = merge_single(v);
 trc(TRC_FP,"fdtos result (%%f%d) = %08llx\n",DREG(inst),(ULLI)r);
 s.fregs[DREG(inst)] = r;
}

/*
 * Convert an int to a single.  This is simple and cannot fail, though
 *  it can lose the low few bits for large arguments.
 */
static void fitos(uint32_t inst)
{
 FPNUM v;
 uint32_t i;
 uint32_t r;

 s.flags |= SF_FPU;
 i = s.fregs[SREG2(inst)];
 trc(TRC_FP,"fitos input (%%f%d) = 0x%08x %lld %u\n",SREG2(inst),i,(i&0x80000000)?-(LLI)((1ULL<<32)-(ULLI)i):(LLI)(ULLI)i,i);
 if (i == 0)
  { r = 0;
    trc(TRC_FP,"fitos zero\n");
  }
 else
  { if (i & 0x80000000)
     { i = - i;
       v.sign = 1;
     }
    else
     { v.sign = 0;
     }
    v.exp = lg(i);
    v.bexp = v.exp + EXPBIAS_S;
    if (v.exp > 23)
     { // Spec says round per FSR RD bits
       // We don't implement RD != 0, so....
       uint32_t mlow;
       v.mant = i >> (v.exp - 23);
       mlow = 1U << (v.exp - 23);
       if ((v.mant & (mlow >> 1)) && (v.mant & (mlow | ((mlow >> 1) - 1))))
	{ trc(TRC_FP,"fitos rounding up\n");
	  v.mant ++;
	  if (v.mant & 0x01000000)
	   { v.mant >>= 1;
	     v.exp ++;
	   }
	}
     }
    else
     { v.mant = ((uint32_t)i) << (23 - v.exp);
     }
    v.kind = FPK_NORMAL;
    r = merge_single(v);
    if (trc_if(TRC_FP))
     { FILE *f;
       f = trc_f(TRC_FP);
       fprintf(f,"fitos single = ");
       dump_single(f,v,0);
       fprintf(f,"\n");
     }
  }
 trc(TRC_FP,"fitos result (%%f%d) = %08llx\n",DREG(inst),(ULLI)r);
 s.fregs[DREG(inst)] = r;
}

/*
 * Compare two singles - fcmps (exc_q==0) or fcmpes (exc_q!=0).
 */
static void fcmp_s(uint32_t inst, int exc_q)
{
 int r1;
 int r2;
 FPNUM v1;
 FPNUM v2;
 const char *iname;

 iname = exc_q ? "fcmpes" : "fcmps";
 s.flags |= SF_FPU;
 r1 = SREG1(inst);
 r2 = SREG2(inst);
 v1 = crack_double(s.fregs[r1]);
 v2 = crack_double(s.fregs[r2]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s lhs (%%f%d) = ",iname,r1);
    dump_single(f,v1,1);
    fprintf(f,", rhs (%%f%d) = ",r2);
    dump_single(f,v2,1);
    fprintf(f,"\n");
  }
 nantrap_s(v1,exc_q,iname);
 nantrap_s(v2,exc_q,iname);
 if ((v1.kind == FPK_NaN) || (v2.kind == FPK_NaN))
  { s.fcc = FCC_UN;
    trc(TRC_FP,"%s unordered\n",iname);
    return;
  }
 switch (v1.kind)
  { default:
       badfpkind(v1.kind);
       break;
    case FPK_NORMAL:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	     if (v1.sign != v2.sign)
	      { s.fcc = v1.sign ? FCC_LT : FCC_GT;
	      }
	     else if (v1.exp != v2.exp)
	      { s.fcc = (v1.exp < v2.exp) ? v1.sign ? FCC_GT : FCC_LT : v1.sign ? FCC_LT : FCC_GT;
	      }
	     else if (v1.mant != v2.mant)
	      { s.fcc = (v1.mant < v2.mant) ? v1.sign ? FCC_GT : FCC_LT : v1.sign ? FCC_LT : FCC_GT;
	      }
	     else
	      { s.fcc = FCC_EQ;
	      }
	     break;
	  case FPK_ZERO:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_DENORM:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_INFTY:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	     break;
	}
       break;
    case FPK_ZERO:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_INFTY:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	     break;
	  case FPK_ZERO:
	     s.fcc = FCC_EQ;
	     break;
	}
       break;
    case FPK_DENORM:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	  case FPK_ZERO:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_DENORM:
	     if (v1.sign != v2.sign)
	      { s.fcc = v1.sign ? FCC_LT : FCC_GT;
	      }
	     else if (v1.mant != v2.mant)
	      { s.fcc = (v1.mant < v2.mant) ? v1.sign ? FCC_GT : FCC_LT : v1.sign ? FCC_LT : FCC_GT;
	      }
	     else
	      { s.fcc = FCC_EQ;
	      }
	     break;
	  case FPK_INFTY:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	     break;
	}
       break;
    case FPK_INFTY:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_ZERO:
	  case FPK_DENORM:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_INFTY:
	     s.fcc = (v1.sign == v2.sign) ? FCC_EQ : v1.sign ? FCC_LT : FCC_GT;
	}
       break;
  }
 switch (s.fcc)
  { case FCC_EQ: trc(TRC_FP,"%s: =\n",iname); break;
    case FCC_LT: trc(TRC_FP,"%s: <\n",iname); break;
    case FCC_GT: trc(TRC_FP,"%s: >\n",iname); break;
    default: panic("impossible fcc in %s",iname); break;
  }
}

/*
 * Compare two doubles - fcmpd (exc_q==0) or fcmped (exc_q!=0).
 */
static void fcmp_d(uint32_t inst, int exc_q)
{
 int r1;
 int r2;
 FPNUM v1;
 FPNUM v2;
 const char *iname;

 iname = exc_q ? "fcmped" : "fcmpd";
 s.flags |= SF_FPU;
 r1 = SREG1(inst);
 r2 = SREG2(inst);
 even_regno(r1,inst);
 even_regno(r2,inst);
 v1 = crack_double((((uint64_t)s.fregs[r1]) << 32) | s.fregs[r1+1]);
 v2 = crack_double((((uint64_t)s.fregs[r2]) << 32) | s.fregs[r2+1]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s lhs (%%f%d/%%f%d) = ",iname,r1,r1+1);
    dump_double(f,v1,1);
    fprintf(f,", rhs (%%f%d/%%f%d) = ",r2,r2+1);
    dump_double(f,v2,1);
    fprintf(f,"\n");
  }
 nantrap_d(v1,exc_q,iname);
 nantrap_d(v2,exc_q,iname);
 if ((v1.kind == FPK_NaN) || (v2.kind == FPK_NaN))
  { s.fcc = FCC_UN;
    trc(TRC_FP,"%s unordered\n",iname);
    return;
  }
 switch (v1.kind)
  { default:
       badfpkind(v1.kind);
       break;
    case FPK_NORMAL:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	     if (v1.sign != v2.sign)
	      { s.fcc = v1.sign ? FCC_LT : FCC_GT;
	      }
	     else if (v1.exp != v2.exp)
	      { s.fcc = (v1.exp < v2.exp) ? v1.sign ? FCC_GT : FCC_LT : v1.sign ? FCC_LT : FCC_GT;
	      }
	     else if (v1.mant != v2.mant)
	      { s.fcc = (v1.mant < v2.mant) ? v1.sign ? FCC_GT : FCC_LT : v1.sign ? FCC_LT : FCC_GT;
	      }
	     else
	      { s.fcc = FCC_EQ;
	      }
	     break;
	  case FPK_ZERO:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_DENORM:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_INFTY:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	     break;
	}
       break;
    case FPK_ZERO:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_INFTY:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	     break;
	  case FPK_ZERO:
	     s.fcc = FCC_EQ;
	     break;
	}
       break;
    case FPK_DENORM:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	  case FPK_ZERO:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_DENORM:
	     if (v1.sign != v2.sign)
	      { s.fcc = v1.sign ? FCC_LT : FCC_GT;
	      }
	     else if (v1.mant != v2.mant)
	      { s.fcc = (v1.mant < v2.mant) ? v1.sign ? FCC_GT : FCC_LT : v1.sign ? FCC_LT : FCC_GT;
	      }
	     else
	      { s.fcc = FCC_EQ;
	      }
	     break;
	  case FPK_INFTY:
	     s.fcc = v2.sign ? FCC_GT : FCC_LT;
	     break;
	}
       break;
    case FPK_INFTY:
       switch (v2.kind)
	{ default:
	     badfpkind(v2.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_ZERO:
	  case FPK_DENORM:
	     s.fcc = v1.sign ? FCC_LT : FCC_GT;
	     break;
	  case FPK_INFTY:
	     s.fcc = (v1.sign == v2.sign) ? FCC_EQ : v1.sign ? FCC_LT : FCC_GT;
	}
       break;
  }
 switch (s.fcc)
  { case FCC_EQ: trc(TRC_FP,"%s: =\n",iname); break;
    case FCC_LT: trc(TRC_FP,"%s: <\n",iname); break;
    case FCC_GT: trc(TRC_FP,"%s: >\n",iname); break;
    default: panic("impossible fcc in %s",iname); break;
  }
}

/*
 * The boilerplate part of unadic floating-point arithmetic, for
 *  singles.  Takes the instruction (for error cases) and a function to
 *  actually perform the operation, do boilerplate like NaN handling,
 *  then perform the call, stuffing the result into the destination
 *  register.
 */
static void single_op1(uint32_t inst, FPNUM (*op)(FPNUM), const char *iname)
{
 int rs2;
 int rd;
 FPNUM n2;
 FPNUM vn;
 uint32_t v32;

 s.flags |= SF_FPU;
 rs2 = SREG2(inst);
 rd = DREG(inst);
 n2 = crack_single(s.fregs[rs2]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s arg (%%f%d) = ",iname,rs2);
    dump_single(f,n2,1);
    fprintf(f,"\n");
  }
 nantrap_s(n2,0,iname);
 if (n2.kind == FPK_NaN)
  { vn = quiet_nan_s(n2);
  }
 else
  { vn = (*op)(n2);
  }
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s single = ",iname);
    dump_single(f,vn,0);
    fprintf(f,"\n");
  }
 v32 = merge_single(vn);
 trc(TRC_FP,"%s result (%%f%d) = %08llx\n",iname,rd,(ULLI)v32);
 s.fregs[rd] = v32;
}

/*
 * The boilerplate part of unadic floating-point arithmetic, for
 *  doubles.  Takes the instruction (for error cases) and a function to
 *  actually perform the operation, do  boilerplate like NaN handling,
 *  then perform the call, stuffing the result into the destination
 *  registers.
 */
static void double_op1(uint32_t inst, FPNUM (*op)(FPNUM), const char *iname)
{
 int rs2;
 int rd;
 FPNUM n2;
 FPNUM vn;
 uint64_t v64;

 s.flags |= SF_FPU;
 rs2 = SREG2(inst);
 rd = DREG(inst);
 even_regno(rs2,inst);
 even_regno(rd,inst);
 n2 = crack_double((((uint64_t)s.fregs[rs2]) << 32) | s.fregs[rs2+1]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s arg (%%f%d/%%f%d) = ",iname,rs2,rs2+1);
    dump_double(f,n2,1);
    fprintf(f,"\n");
  }
 nantrap_d(n2,0,iname);
 if (n2.kind == FPK_NaN)
  { vn = quiet_nan_d(n2);
  }
 else
  { vn = (*op)(n2);
  }
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s double = ",iname);
    dump_double(f,vn,0);
    fprintf(f,"\n");
  }
 v64 = merge_double(vn);
 trc(TRC_FP,"%s result (%%f%d/%%f%d) = %08llx/%08llx\n",iname,rd,rd+1,(ULLI)(v64>>32),(ULLI)(v64&0xffffffffULL));
 s.fregs[rd] = v64 >> 32;
 s.fregs[rd+1] = v64;
}

/*
 * The boilerplate part of dyadic floating-point arithmetic, for
 *  singles.  Takes the instruction (for error cases) and a function to
 *  actually perform the operation, do boilerplate like NaN handling,
 *  then perform the call, stuffing the result into the destination
 *  register.
 */
static void single_op2(uint32_t inst, FPNUM (*op)(FPNUM, FPNUM), const char *iname)
{
 int rs1;
 int rs2;
 int rd;
 FPNUM n1;
 FPNUM n2;
 FPNUM vn;
 uint32_t v32;

 s.flags |= SF_FPU;
 rs1 = SREG1(inst);
 rs2 = SREG2(inst);
 rd = DREG(inst);
 n1 = crack_single(s.fregs[rs1]);
 n2 = crack_single(s.fregs[rs2]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s lhs (%%f%d) = ",iname,rs1);
    dump_single(f,n1,1);
    fprintf(f,", rhs (%%f%d) = ",rs2);
    dump_single(f,n2,1);
    fprintf(f,"\n");
  }
 nantrap_s(n1,0,iname);
 nantrap_s(n2,0,iname);
 if (n2.kind == FPK_NaN)
  { if (n1.kind == FPK_NaN)
     { vn = quiet_nan_s((SIGNAN_S(n2.mant)||!SIGNAN_S(n1.mant))?n2:n1);
     }
    else
     { vn = quiet_nan_s(n2);
     }
  }
 else if (n1.kind == FPK_NaN)
  { vn = quiet_nan_s(n1);
  }
 else
  { vn = (*op)(n1,n2);
  }
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s single = ",iname);
    dump_single(f,vn,0);
    fprintf(f,"\n");
  }
 v32 = merge_single(vn);
 trc(TRC_FP,"%s result (%%f%d) = %08llx\n",iname,rd,(ULLI)v32);
 s.fregs[rd] = v32;
}

/*
 * The boilerplate part of dyadic floating-point arithmetic, for
 *  doubles.  Takes the instruction (for cases like odd register-pair
 *  numbers) and a function to actually perform the operation, do
 *  boilerplate like NaN handling, then perform the call, stuffing the
 *  result into the destination registers.
 */
static void double_op2(uint32_t inst, FPNUM (*op)(FPNUM, FPNUM), const char *iname)
{
 int rs1;
 int rs2;
 int rd;
 FPNUM n1;
 FPNUM n2;
 FPNUM vn;
 uint64_t v64;

 s.flags |= SF_FPU;
 rs1 = SREG1(inst);
 rs2 = SREG2(inst);
 rd = DREG(inst);
 even_regno(rs1,inst);
 even_regno(rs2,inst);
 even_regno(rd,inst);
 n1 = crack_double((((uint64_t)s.fregs[rs1]) << 32) | s.fregs[rs1+1]);
 n2 = crack_double((((uint64_t)s.fregs[rs2]) << 32) | s.fregs[rs2+1]);
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s lhs (%%f%d/%%f%d) = ",iname,rs1,rs1+1);
    dump_double(f,n1,1);
    fprintf(f,", rhs (%%f%d/%%f%d) = ",rs2,rs2+1);
    dump_double(f,n2,1);
    fprintf(f,"\n");
  }
 nantrap_d(n1,0,iname);
 nantrap_d(n2,0,iname);
 if (n2.kind == FPK_NaN)
  { if (n1.kind == FPK_NaN)
     { vn = quiet_nan_d((SIGNAN_D(n2.mant)||!SIGNAN_D(n1.mant))?n2:n1);
     }
    else
     { vn = quiet_nan_d(n2);
     }
  }
 else if (n1.kind == FPK_NaN)
  { vn = quiet_nan_d(n1);
  }
 else
  { vn = (*op)(n1,n2);
  }
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"%s double = ",iname);
    dump_double(f,vn,0);
    fprintf(f,"\n");
  }
 v64 = merge_double(vn);
 trc(TRC_FP,"%s result (%%f%d/%%f%d) = %08llx/%08llx\n",iname,rd,rd+1,(ULLI)(v64>>32),(ULLI)(v64&0xffffffffULL));
 s.fregs[rd] = v64 >> 32;
 s.fregs[rd+1] = v64;
}

/*
 * Single-precision floating-point square root.
 */
static FPNUM fpop_sqrt_s(FPNUM a)
{
 int hb;
 FPNUM r;
 uint64_t rm2;

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       if (a.sign) return(gen_nan_s());
       hb = MANTBITS_S;
       break;
    case FPK_ZERO:
       // XXX What does sqrt(-0) do?
       return(gen_zero_s(0));
       break;
    case FPK_DENORM:
       if (a.sign) return(gen_nan_s());
       a.exp ++;
       hb = lg(a.mant);
       break;
    case FPK_INFTY:
       if (a.sign) return(gen_nan_s());
       return(a);
       break;
  }
 a.mant <<= 60 < hb;
 /*
  * a.mant has implicit binary point after three bits; that is, it has
  *  0001.xxx...xxx.  a.exp is the power of two this fixed-point number
  *  needs to be multiplied by to give the argument.
  *
  * If a.exp is even, arg is in range [1..2)*2^(2k); if odd,
  *  [1..2)*2^(2k+1) = [2..4)*2^(2k).  Shift the odd case.
  */
 if (a.exp & 1) a.mant <<= 1;
 r.exp = a.exp >> 1;
 /*
  * a.mant is now fixed-point number in range [1..4), with binary point
  *  as described above.  The binary point location was carefully
  *  chosen so that integer square root of the mantissa is the high
  *  bits of the mantissa of the result.
  *
  * a.mant is 0001.000...000 through 0011.111...111.  Its square root
  *  is somewhere between 01.000...000 and 01.111...111.  Start with
  *  01.100...000 and do Newton-Raphson.  Shouldn't need more than five
  *  or six loops, since the initial guess is accurate to two bits and
  *  N-R doubles the number of correct bits each iteration.
  */
 trc(TRC_FP,"a.mant %08llx\n",(ULLI)a.mant);
 r.mant = 0x60000000;
 while (1)
  { trc(TRC_FP,"Newton-Raphson loop: %08llx\n",(ULLI)r.mant);
    rm2 = r.mant;
    r.mant = ((a.mant / rm2) + rm2) >> 1;
    if ((r.mant == rm2) || (r.mant == rm2-1)) break;
  }
 rm2 = r.mant * r.mant;
 if ((rm2 > a.mant) || (rm2+(2*r.mant)+1 <= a.mant)) panic("impossible square root");
 // Handle rounding.  r.mant now has 01.xxx...xxx in its low 32 bits.
 if (rm2 < a.mant) r.mant |= 1;
 if ((r.mant & 0x40ULL) && (r.mant & 0xbfULL))
  { trc(TRC_FP,"rounding up\n");
    r.mant += 0x40ULL;
    if (r.mant & 0x80000000)
     { r.mant >>= 1;
       r.exp ++;
     }
  }
 // Shift the mantissa into place
 r.mant >>= 30 - 23;
 // Set other pieces of r, and done!
 r.sign = 0;
 r.kind = FPK_NORMAL;
 r.bexp = r.exp + EXPBIAS_S;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"single sqrt returning ");
    dump_single(f,r,0);
    fprintf(f,"\n");
  }
 return(r);
}

/*
 * Double-precision floating-point square root.
 */
static FPNUM fpop_sqrt_d(FPNUM a)
{
 int hb;
 FPNUM r;
 uint64_t rm2;
 INT128 i128;

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       if (a.sign) return(gen_nan_d());
       hb = MANTBITS_D;
       break;
    case FPK_ZERO:
       // XXX What does sqrt(-0) do?
       return(gen_zero_d(0));
       break;
    case FPK_DENORM:
       if (a.sign) return(gen_nan_d());
       a.exp ++;
       hb = lg(a.mant);
       break;
    case FPK_INFTY:
       if (a.sign) return(gen_nan_d());
       return(a);
       break;
  }
 a.mant <<= 60 - hb;
 /*
  * a.mant has implicit binary point after four bits; that is, it has
  *  0001.xxx...xxx.  a.exp is the power of two this fixed-point number
  *  needs to be multiplied by to give the argument.
  *
  * If a.exp is even, arg is in range [1..2)*2^(2k); if odd,
  *  [1..2)*2^(2k+1) = [2..4)*2^(2k).  Shift the odd case.
  */
 if (a.exp & 1) a.mant <<= 1;
 r.exp = a.exp >> 1;
 /*
  * a.mant is now fixed-point number in range [1..4), with binary point
  *  as described above.  The binary point location was carefully
  *  chosen so that integer square root of the mantissa is the high
  *  bits of the mantissa of the result.
  *
  * a.mant is 0001.000...000 through 0011.111...111.  Its square root
  *  is somewhere between 01.000...000 and 01.111...111.  Start with
  *  01.100...000 and do Newton-Raphson.  Shouldn't need more than five
  *  or six loops, since the initial guess is accurate to two bits and
  *  N-R doubles the number of correct bits each iteration.  This will
  *  get us 31 bits of square root.  For the low 32 bits, we do N-R
  *  using 128÷64->64 division.  (We could do the whole thing that way,
  *  but 128÷64->64 division is slow compared to regular integer, even
  *  if 64-bit integer, division.)
  */
 trc(TRC_FP,"a.mant %016llx\n",(ULLI)a.mant);
 r.mant = 0x60000000;
 while (1)
  { trc(TRC_FP,"Newton-Raphson loop 1: %08llx\n",(ULLI)r.mant);
    rm2 = r.mant;
    r.mant = ((a.mant / rm2) + rm2) >> 1;
    if ((r.mant == rm2) || (r.mant == rm2+1) || (r.mant == rm2-1)) break;
  }
 r.mant <<= 32;
 while (1)
  { trc(TRC_FP,"Newton-Raphson loop 2: %016llx\n",(ULLI)r.mant);
    rm2 = r.mant;
    r.mant = (div_128s_64(a.mant,0,rm2) + rm2) >> 1;
    if ((r.mant == rm2) || (r.mant == rm2+1) || (r.mant == rm2-1)) break;
  }
 trc(TRC_FP,"mantissa square root: %016llx\n",(ULLI)r.mant);
 i128 = mul_64_64_128(r.mant+1,r.mant+1);
 trc(TRC_FP,"+1 square: %016llx %016llx\n",(ULLI)i128.h,(ULLI)i128.l);
 if ((i128.h < a.mant) || ((i128.h == a.mant) && (i128.l == 0))) panic("impossible square root");
 i128 = mul_64_64_128(r.mant,r.mant);
 trc(TRC_FP,"square: %016llx %016llx\n",(ULLI)i128.h,(ULLI)i128.l);
 if (i128.h > a.mant) panic("impossible square root");
 // Handle rounding.  r.mant now has 01.xxx...xxx in its low 64 bits.
 if (i128.l || (i128.h < a.mant)) r.mant |= 1;
 if ((r.mant & 0x200ULL) && (r.mant & 0x5ffULL))
  { trc(TRC_FP,"rounding up\n");
    r.mant += 0x200ULL;
    if (r.mant & 0x8000000000000000ULL)
     { r.mant >>= 1;
       r.exp ++;
     }
  }
 // Shift the mantissa into place
 r.mant >>= 62 - 52;
 // Set other pieces of r, and done!
 r.sign = 0;
 r.kind = FPK_NORMAL;
 r.bexp = r.exp + EXPBIAS_D;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"double sqrt returning ");
    dump_double(f,r,0);
    fprintf(f,"\n");
  }
 return(r);
}

/*
 * Single-precision floating-point addition (and subtraction, since we
 *  handle adding numbers with differing signs).
 */
static FPNUM fpop_add_s(FPNUM a, FPNUM b)
{
 int hb1;
 int hb2;
 int nzbl; // NonZero Bits Lost

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_S;
	     break;
	  case FPK_ZERO:
	     return(a);
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       hb1 = MANTBITS_S;
       break;
    case FPK_ZERO:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_INFTY:
	     return(b);
	     break;
	  case FPK_ZERO:
	     return(gen_zero_s(a.sign^b.sign));
	     break;
	}
       break;
    case FPK_DENORM:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_S;
	     break;
	  case FPK_ZERO:
	     return(a);
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       a.exp ++;
       hb1 = lg(a.mant);
       break;
    case FPK_INFTY:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_ZERO:
	  case FPK_DENORM:
	     return(a);
	     break;
	  case FPK_INFTY:
	     return((a.sign==b.sign)?a:gen_nan_s());
	     break;
	}
       break;
  }
 a.mant <<= 30 - hb1;
 a.exp -= 30 - hb1;
 b.mant <<= 30 - hb2;
 b.exp -= 30 - hb2;
 if ((a.exp < b.exp) || ((a.exp == b.exp) && (a.mant < b.mant)))
  { FPNUM nt;
    nt = a; a = b; b = nt;
  }
 trc(TRC_FP,"single add e1=%d m1=%08lx sign=%d\n",a.exp,(ULI)a.mant,a.sign);
 trc(TRC_FP,"single add e2=%d m2=%08lx sign=%d\n",b.exp,(ULI)b.mant,b.sign);
 if (a.exp-b.exp > 28) return(a);
 if (a.sign == b.sign)
  { nzbl = ((a.exp > b.exp) && (b.mant & ((1ULL << (a.exp - b.exp)) - 1)));
    a.mant += b.mant >> (a.exp - b.exp);
    if (a.mant & 0x80000000)
     { a.mant >>= 1;
       a.exp ++;
     }
    if (nzbl) a.mant |= 1;
    trc(TRC_FP,"single sum e=%d m=%08lx sign=%d\n",a.exp,(ULI)a.mant,a.sign);
  }
 else
  { int l;
    nzbl = ((a.exp > b.exp) && (b.mant & ((1ULL << (a.exp - b.exp)) - 1)));
    a.mant -= b.mant >> (a.exp - b.exp);
    if (a.mant == 0)
     { // XXX maybe denormal?
       return(gen_zero_s(0));
     }
    if (nzbl) a.mant --;
    l = lg(a.mant);
    a.mant <<= 30 - l;
    a.exp -= 30 - l;
    trc(TRC_FP,"single dif e=%d m=%08lx sign=%d\n",a.exp,(ULI)a.mant,a.sign);
  }
 if ((a.mant & 0x40ULL) && (a.mant & 0xbfULL))
  { trc(TRC_FP,"rounding up\n");
    a.mant += 0x40ULL;
    if (a.mant & 0x80000000)
     { a.mant >>= 1;
       a.exp ++;
     }
  }
 a.exp += 7;
 if (a.exp > MAXBEXP_S-1-EXPBIAS_S)
  { // Overflow!
    trc(TRC_FP,"overflow\n");
    return(gen_infty_s(a.sign));
  }
 if (a.exp < 1-EXPBIAS_S)
  { // Denormal - or zero!
    printf("%s: denormal or zero\n",__func__); // XXX
    top();
  }
 a.bexp = a.exp + EXPBIAS_S;
 a.mant >>= 7;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"single add returning ");
    dump_single(f,a,0);
    fprintf(f,"\n");
  }
 return(a);
}

/*
 * Double-precision floating-point addition (and subtraction, since we
 *  handle adding numbers with differing signs).
 */
static FPNUM fpop_add_d(FPNUM a, FPNUM b)
{
 int hb1;
 int hb2;
 int nzbl; // NonZero Bits Lost

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_D;
	     break;
	  case FPK_ZERO:
	     return(a);
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       hb1 = MANTBITS_D;
       break;
    case FPK_ZERO:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_INFTY:
	     return(b);
	     break;
	  case FPK_ZERO:
	     return(gen_zero_d(a.sign^b.sign));
	     break;
	}
       break;
    case FPK_DENORM:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_D;
	     break;
	  case FPK_ZERO:
	     return(a);
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       a.exp ++;
       hb1 = lg(a.mant);
       break;
    case FPK_INFTY:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_ZERO:
	  case FPK_DENORM:
	     return(a);
	     break;
	  case FPK_INFTY:
	     return((a.sign==b.sign)?a:gen_nan_d());
	     break;
	}
       break;
  }
 a.mant <<= 62 - hb1;
 a.exp -= 62 - hb1;
 b.mant <<= 62 - hb2;
 b.exp -= 62 - hb2;
 if ((a.exp < b.exp) || ((a.exp == b.exp) && (a.mant < b.mant)))
  { FPNUM nt;
    int hbt;
    nt = a; a = b; b = nt;
    hbt = hb1; hb1 = hb2; hb2 = hbt;
  }
 trc(TRC_FP,"double add e1=%d m1=%016llx sign=%d\n",a.exp,(ULLI)a.mant,a.sign);
 trc(TRC_FP,"double add e2=%d m2=%016llx sign=%d\n",b.exp,(ULLI)b.mant,b.sign);
 if (a.exp-b.exp > 60)
  { a.mant >>= 62 - hb1;
    a.exp += 62 - hb1;
    trc(TRC_FP,"double add all significance lost, easy case\n");
    return(a);
  }
 if (a.sign == b.sign)
  { nzbl = ((a.exp > b.exp) && (b.mant & ((1ULL << (a.exp - b.exp)) - 1)));
    a.mant += b.mant >> (a.exp - b.exp);
    if (a.mant & 0x8000000000000000ULL)
     { if (a.mant & 1) nzbl = 1;
       a.mant >>= 1;
       a.exp ++;
     }
    if (nzbl) a.mant |= 1;
    trc(TRC_FP,"double sum e=%d m=%016llx sign=%d\n",a.exp,(ULLI)a.mant,a.sign);
  }
 else
  { int l;
    nzbl = ((a.exp > b.exp) && (b.mant & ((1ULL << (a.exp - b.exp)) - 1)));
    a.mant -= b.mant >> (a.exp - b.exp);
    if (a.mant == 0)
     { // XXX maybe denormal?
       return(gen_zero_d(0));
     }
    if (nzbl) a.mant --;
    l = lg(a.mant);
    a.mant <<= 62 - l;
    a.exp -= 62 - l;
    trc(TRC_FP,"double dif e=%d m=%016llx sign=%d\n",a.exp,(ULLI)a.mant,a.sign);
  }
 if ((a.mant & 0x200ULL) && (a.mant & 0x5ffULL))
  { trc(TRC_FP,"rounding up\n");
    a.mant += 0x200ULL;
    if (a.mant & 0x8000000000000000ULL)
     { a.mant >>= 1;
       a.exp ++;
     }
  }
 a.exp += 10;
 if (a.exp > MAXBEXP_D-1-EXPBIAS_D)
  { // Overflow!
    trc(TRC_FP,"overflow\n");
    return(gen_infty_d(a.sign));
  }
 if (a.exp < 1-EXPBIAS_D)
  { // Denormal - or zero!
    printf("%s: denormal or zero\n",__func__); // XXX
    top();
  }
 a.bexp = a.exp + EXPBIAS_D;
 a.mant >>= 10;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"double add returning ");
    dump_double(f,a,0);
    fprintf(f,"\n");
  }
 return(a);
}

/*
 * Single-precision floating-point multiplication.
 */
static FPNUM fpop_mul_s(FPNUM a, FPNUM b)
{
 int hb1;
 int hb2;
 uint64_t pm;
 int px;

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_S;
	     break;
	  case FPK_ZERO:
	     return(gen_zero_s(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       hb1 = MANTBITS_S;
       break;
    case FPK_ZERO:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_ZERO:
	     return(gen_zero_s(a.sign^b.sign));
	     break;
	  case FPK_INFTY:
	     return(gen_nan_s());
	     break;
	}
       break;
    case FPK_DENORM:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_S;
	     break;
	  case FPK_ZERO:
	     return(gen_zero_s(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       a.exp ++;
       hb1 = lg(a.mant);
       break;
    case FPK_INFTY:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	     return(a);
	     break;
	  case FPK_ZERO:
	     return(gen_nan_s());
	     break;
	  case FPK_INFTY:
	     a.sign = (a.sign != b.sign);
	     return(a);
	     break;
	}
       break;
  }
 a.mant <<= 30 - hb1;
 a.exp -= 30 - hb1;
 b.mant <<= 30 - hb2;
 b.exp -= 30 - hb2;
 trc(TRC_FP,"single mul e1=%d m1=%016llx sign=%d\n",a.exp,(ULLI)a.mant,a.sign);
 trc(TRC_FP,"single mul e2=%d m2=%016llx sign=%d\n",b.exp,(ULLI)b.mant,b.sign);
 pm = a.mant * b.mant;
 px = a.exp + b.exp;
 trc(TRC_FP,"raw product %016llx %d\n",(ULLI)pm,px);
 // 01... * 01... can range from 000100... to 001111..., so:
 if (pm & 0x2000000000000000ULL)
  { pm <<= 1;
    px --;
  }
 else
  { pm <<= 2;
    px -= 2;
  }
 /*
  * 0x40(00000000) is the first bit rounded away; round up if that's
  *  set and either (a) at least one of the bits below it is set (round
  *  up) or (b) the bit above it is set (round halfway cases to even).
  *  If the 0x40 bit is clear, we always want to round down.
  */
 if ((pm & 0x4000000000ULL) && (pm & 0xbfffffffffULL))
  { trc(TRC_FP,"rounding up\n");
    pm += 0x4000000000ULL;
    if (pm & 0x8000000000000000ULL)
     { pm >>= 1;
       px ++;
     }
  }
 if (b.sign) a.sign = ! a.sign;
 /*
  * 16: 7 for each shift from 23 to 30,
  *  2 for the difference between 30 and 32.
  */
 a.exp = px + 16;
 if (a.exp > MAXBEXP_S-1-EXPBIAS_S)
  { // Overflow!
    trc(TRC_FP,"overflow\n");
    return(gen_infty_s(a.sign));
  }
 if (a.exp < 1-EXPBIAS_S)
  { // Denormal - or zero!
    printf("%s: denormal or zero\n",__func__); // XXX
    top();
  }
 a.bexp = a.exp + EXPBIAS_S;
 a.mant = pm >> 39; // from 30 to 23, plus 32
 a.kind = FPK_NORMAL;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"single mul returning ");
    dump_single(f,a,0);
    fprintf(f,"\n");
  }
 return(a);
}

/*
 * Double-precision floating-point multiplication.
 */
static FPNUM fpop_mul_d(FPNUM a, FPNUM b)
{
 int hb1;
 int hb2;
 INT128 p;
 int px;

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_D;
	     break;
	  case FPK_ZERO:
	     return(gen_zero_d(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       hb1 = MANTBITS_D;
       break;
    case FPK_ZERO:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_ZERO:
	     return(gen_zero_d(a.sign^b.sign));
	     break;
	  case FPK_INFTY:
	     return(gen_nan_d());
	     break;
	}
       break;
    case FPK_DENORM:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_D;
	     break;
	  case FPK_ZERO:
	     return(gen_zero_d(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(b);
	     break;
	}
       a.exp ++;
       hb1 = lg(a.mant);
       break;
    case FPK_INFTY:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	     return(a);
	     break;
	  case FPK_ZERO:
	     return(gen_nan_d());
	     break;
	  case FPK_INFTY:
	     a.sign = (a.sign != b.sign);
	     return(a);
	     break;
	}
       break;
  }
 a.mant <<= 62 - hb1;
 a.exp -= 62 - hb1;
 b.mant <<= 62 - hb2;
 b.exp -= 62 - hb2;
 trc(TRC_FP,"double mul e1=%d m1=%016llx sign=%d\n",a.exp,(ULLI)a.mant,a.sign);
 trc(TRC_FP,"double mul e2=%d m2=%016llx sign=%d\n",b.exp,(ULLI)b.mant,b.sign);
 p = mul_64_64_128(a.mant,b.mant);
 px = a.exp + b.exp;
 trc(TRC_FP,"raw product %016llx%016llx %d\n",(ULLI)p.h,(ULLI)p.l,px);
 // 01... * 01... can range from 000100... to 001111..., so:
 if (p.h & 0x2000000000000000ULL)
  { p.h = (p.h << 1) | (p.l >> 63);
    p.l <<= 1;
    px --;
  }
 else
  { p.h = (p.h << 2) | (p.l >> 62);
    p.l <<= 2;
    px -= 2;
  }
 /*
  * 0x200 is the first bit rounded away; round up if that's set and
  *  either (a) at least one of the bits below it is set (round up) or
  *  (b) the bit above it is set (round halfway cases to even).  If the
  *  0x200 bit is clear, we always want to round down.
  */
 if ((p.h & 0x200ULL) && (p.l || (p.h & 0x5ffULL)))
  { trc(TRC_FP,"rounding up\n");
    p.h += 0x200ULL;
    if (p.h & 0x8000000000000000ULL)
     { p.h >>= 1;
       px ++;
     }
  }
 if (b.sign) a.sign = ! a.sign;
 /*
  * 22: 10 for each shift from 52 to 62,
  *  2 for the difference between 62 and 64.
  */
 a.exp = px + 22;
 if (a.exp > MAXBEXP_D-1-EXPBIAS_D)
  { // Overflow!
    trc(TRC_FP,"overflow\n");
    return(gen_infty_d(a.sign));
  }
 if (a.exp < 1-EXPBIAS_D)
  { // Denormal - or zero!
    printf("%s: denormal or zero\n",__func__); // XXX
    top();
  }
 a.bexp = a.exp + EXPBIAS_D;
 a.mant = p.h >> 10; // from 62 to 52, plus 64 implicit in use of pmh
 a.kind = FPK_NORMAL;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"double mul returning ");
    dump_double(f,a,0);
    fprintf(f,"\n");
  }
 return(a);
}

/*
 * Single-precision floating-point division.
 */
static FPNUM fpop_div_s(FPNUM a, FPNUM b)
{
 int hb1;
 int hb2;
 uint32_t q;
 int i;

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_S;
	     break;
	  case FPK_ZERO:
	     return(gen_infty_s(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(gen_zero_s(a.sign^b.sign));
	     break;
	}
       hb1 = MANTBITS_S;
       break;
    case FPK_ZERO:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_INFTY:
	     return(gen_zero_s(a.sign^b.sign));
	     break;
	  case FPK_ZERO:
	     return(gen_nan_s());
	     break;
	}
       break;
    case FPK_DENORM:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_S;
	     break;
	  case FPK_ZERO:
	     return(gen_infty_s(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(gen_zero_s(a.sign^b.sign));
	     break;
	}
       a.exp ++;
       hb1 = lg(a.mant);
       break;
    case FPK_INFTY:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_ZERO:
	     return(gen_infty_s(a.sign^b.sign));
	     break;
	  case FPK_INFTY:
	     return(gen_nan_s());
	     break;
	}
       break;
  }
 a.mant <<= 30 - hb1;
 a.exp -= 30 - hb1;
 b.mant <<= 30 - hb2;
 b.exp -= 30 - hb2;
 trc(TRC_FP,"single div ne=%d nm=%08lx ns=%d\n",a.exp,(ULI)a.mant,a.sign);
 trc(TRC_FP,"single div de=%d dm=%08lx ds=%d\n",b.exp,(ULI)b.mant,b.sign);
 q = 0;
 for (i=0;i<32;i++)
  { q <<= 1;
    if (b.mant <= a.mant)
     { q |= 1;
       a.mant -= b.mant;
     }
    a.mant <<= 1;
  }
 // Ensure rounding is handled correctly
 if (a.mant) q |= 2; // not 1 in case we >>1 below
 a.exp = a.exp - b.exp;
 if (b.sign) a.sign = ! a.sign;
 // 01xxx divided by 01xxx is either 1.xxx or 0.1xxx, so...
 if (q & 0x80000000)
  { q >>= 1;
  }
 else
  { a.exp --;
  }
 if ((q & 0x40U) && (q & 0xbfU))
  { q += 0x40U;
    if (q & 0x80000000)
     { q >>= 1;
       a.exp ++;
     }
  }
 a.mant = q >> 7;
 trc(TRC_FP,"single quo e=%d m=%08lx s=%d\n",a.exp,(ULI)a.mant,a.sign);
 if (a.exp > MAXBEXP_S-1-EXPBIAS_S)
  { // Overflow!
    trc(TRC_FP,"overflow\n");
    return(gen_infty_s(a.sign));
  }
 if (a.exp < 1-EXPBIAS_S)
  { // Denormal - or zero!
    printf("%s: denormal or zero\n",__func__); // XXX
    top();
  }
 a.bexp = a.exp + EXPBIAS_S;
 a.kind = FPK_NORMAL;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"single div returning ");
    dump_single(f,a,0);
    fprintf(f,"\n");
  }
 return(a);
}

/*
 * Double-precision floating-point division.
 */
static FPNUM fpop_div_d(FPNUM a, FPNUM b)
{
 int hb1;
 int hb2;
 uint64_t q;
 int i;

 switch (a.kind)
  { default:
       badfpkind(a.kind);
       break;
    case FPK_NORMAL:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_D;
	     break;
	  case FPK_ZERO:
	     return(gen_infty_d(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(gen_zero_d(a.sign^b.sign));
	     break;
	}
       hb1 = MANTBITS_D;
       break;
    case FPK_ZERO:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_INFTY:
	     return(gen_zero_d(a.sign^b.sign));
	     break;
	  case FPK_ZERO:
	     return(gen_nan_d());
	     break;
	}
       break;
    case FPK_DENORM:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	     hb2 = MANTBITS_D;
	     break;
	  case FPK_ZERO:
	     return(gen_infty_d(a.sign^b.sign));
	     break;
	  case FPK_DENORM:
	     b.exp ++;
	     hb2 = lg(b.mant);
	     break;
	  case FPK_INFTY:
	     return(gen_zero_d(a.sign^b.sign));
	     break;
	}
       a.exp ++;
       hb1 = lg(a.mant);
       break;
    case FPK_INFTY:
       switch (b.kind)
	{ default:
	     badfpkind(b.kind);
	     break;
	  case FPK_NORMAL:
	  case FPK_DENORM:
	  case FPK_ZERO:
	     return(gen_infty_d(a.sign^b.sign));
	     break;
	  case FPK_INFTY:
	     return(gen_nan_d());
	     break;
	}
       break;
  }
 a.mant <<= 62 - hb1;
 a.exp -= 62 - hb1;
 b.mant <<= 62 - hb2;
 b.exp -= 62 - hb2;
 trc(TRC_FP,"double div ne=%d nm=%016llx ns=%d\n",a.exp,(ULLI)a.mant,a.sign);
 trc(TRC_FP,"double div de=%d dm=%016llx ds=%d\n",b.exp,(ULLI)b.mant,b.sign);
 q = 0;
 for (i=0;i<64;i++)
  { q <<= 1;
    if (b.mant <= a.mant)
     { q |= 1;
       a.mant -= b.mant;
     }
    a.mant <<= 1;
  }
 // Ensure rounding is handled correctly
 if (a.mant) q |= 2; // not 1 in case we >>1 below
 a.exp = a.exp - b.exp;
 if (b.sign) a.sign = ! a.sign;
 // 01xxx divided by 01xxx is either 1.xxx or 0.1xxx, so...
 if (q & 0x8000000000000000ULL)
  { q >>= 1;
  }
 else
  { a.exp --;
  }
 if ((q & 0x200U) && (q & 0x5ffU))
  { q += 0x200;
    if (q & 0x8000000000000000ULL)
     { q >>= 1;
       a.exp ++;
     }
  }
 a.mant = q >> 10;
 trc(TRC_FP,"double quo e=%d m=%016llx s=%d\n",a.exp,(ULLI)a.mant,a.sign);
 if (a.exp > MAXBEXP_D-1-EXPBIAS_D)
  { // Overflow!
    trc(TRC_FP,"overflow\n");
    return(gen_infty_d(a.sign));
  }
 if (a.exp < 1-EXPBIAS_D)
  { // Denormal - or zero!
    printf("%s: denormal or zero\n",__func__); // XXX
    top();
  }
 a.bexp = a.exp + EXPBIAS_D;
 a.kind = FPK_NORMAL;
 if (trc_if(TRC_FP))
  { FILE *f;
    f = trc_f(TRC_FP);
    fprintf(f,"double div returning ");
    dump_double(f,a,0);
    fprintf(f,"\n");
  }
 return(a);
}

/*
 * Double-precision floating-point subtraction.  Since fpop_add_d
 *  handles differing signs, we just negate b and call it.  The only
 *  values for which we can't just do this are NaNs, which have already
 *  been handled in double_op2().
 */
static FPNUM fpop_sub_d(FPNUM a, FPNUM b)
{
 b.sign = ! b.sign;
 return(fpop_add_d(a,b));
}

/*
 * Single square root.
 */
static void fsqrts(uint32_t inst)
{
 single_op1(inst,&fpop_sqrt_s,"fsqrts");
}

/*
 * Double square root.
 */
static void fsqrtd(uint32_t inst)
{
 double_op1(inst,&fpop_sqrt_d,"fsqrtd");
}

/*
 * Add two singles.
 */
static void fadds(uint32_t inst)
{
 single_op2(inst,&fpop_add_s,"fadds");
}

/*
 * Add two doubles.
 */
static void faddd(uint32_t inst)
{
 double_op2(inst,&fpop_add_d,"faddd");
}

/*
 * Subtract two doubles.
 */
static void fsubd(uint32_t inst)
{
 double_op2(inst,&fpop_sub_d,"fsubd");
}

/*
 * Multiply two floats.
 */
static void fmuls(uint32_t inst)
{
 single_op2(inst,&fpop_mul_s,"fmuls");
}

/*
 * Multiply two doubles.
 */
static void fmuld(uint32_t inst)
{
 double_op2(inst,&fpop_mul_d,"fmuld");
}

/*
 * Divide two doubles.
 */
static void fdivd(uint32_t inst)
{
 double_op2(inst,&fpop_div_d,"fdivd");
}

/*
 * Divide two singles.
 */
static void fdivs(uint32_t inst)
{
 single_op2(inst,&fpop_div_s,"fdivs");
}

/*
 * Check the alignment of a memory address, complaining on failure.
 */
static void aligncheck(uint32_t addr, uint32_t alignment, const char *iname)
{
 if (addr & (alignment-1))
  { printf("%s: %08lx: not %lu-byte aligned\n",iname,(ULI)addr,(ULI)alignment);
    top();
  }
}

/*
 * Like mem_get_4, but the address must be aligned.
 */
static uint32_t aligned_get_4(uint32_t a, const char *iname)
{
 aligncheck(a,4,iname);
 return(mem_get_4(a));
}

/*
 * Like mem_get_2, but the address must be aligned.
 */
static uint16_t aligned_get_2(uint32_t a, const char *iname)
{
 aligncheck(a,2,iname);
 return(mem_get_2(a));
}

/*
 * Like mem_set_4, but the address must be aligned.
 */
static void aligned_set_4(uint32_t a, uint32_t v, const char *iname)
{
 aligncheck(a,4,iname);
 mem_set_4(a,v);
}

/*
 * Like mem_set_2, but the address must be aligned.
 */
static void aligned_set_2(uint32_t a, uint16_t v, const char *iname)
{
 aligncheck(a,2,iname);
 mem_set_2(a,v);
}

/*
 * Emulate one instruction.  This is where instruction fetch and decode
 *  happens.  Most instructions are emulated inline here; the more
 *  complex call out to separate implementation functions.
 */
static void instr_(void)
{
 uint32_t inst;
 uint32_t a;
 uint32_t v;
 uint32_t v2;

 s.xa = s.pc;
 if (s.xa & 3)
  { printf("Misaligned instruction at %08lx\n",(ULI)s.xa);
    top();
  }
 s.pc = s.npc;
 s.npc += 4;
 if (s.flags & SF_ANNUL)
  { s.flags &= ~SF_ANNUL;
    return;
  }
 s.instrs ++;
 inst = mem_exe_4(s.xa);
#define UNIMP() unimp(s.xa,inst)
 switch (OPC(inst))
  { case 1:
       // format 1
       // 01oo oooo  oooo oooo  oooo oooo  oooo oooo
       // o = offset, in longword units
       s.regs[R_O7] = s.xa;
       s.npc = s.xa + (inst << 2);
       break;
    case 0:
       // format 2
       // 00rr rrro  ooii iiii  iiii iiii  iiii iiii
       // 00ac ccco  oodd dddd  dddd dddd  dddd dddd
       // r = dreg, a = annul, c = cond, o = opcode
       // i = immediate data, d = displacement
       switch (OP2(inst))
	{ case 0:
	     UNIMP();
	     break;
	  case 1:
	     UNIMP();
	     break;
	  case 2: // bCC
	     cbranch(COND(inst),A(inst),s.xa+(DISP22(inst)<<2));
	     break;
	  case 3:
	     UNIMP();
	     break;
	  case 4: // sethi
	     s.regs[DREG(inst)] = IMM22(inst) << 10;
	     break;
	  case 5:
	     UNIMP();
	     break;
	  case 6: // fbCC
	     fcbranch(COND(inst),A(inst),s.xa+(DISP22(inst)<<2));
	     break;
	  case 7:
	     UNIMP();
	     break;
	}
       break;
    case 2:
       // format 3
       switch (OP3(inst))
	{ case 0: // add
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] + (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 1: // and
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] & (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 2: // or
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] | (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 3: // xor
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] ^ (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 4: // sub
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] - (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 5: // andn
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] & ~(I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 6: // orn
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] | ~(I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 7: // xnor
	     // 80380020 is xnor %g0, %g0, %g0, with 1 in the unused bits
	     if ((inst == 0x80380020) && (s.flags & SF_EMU_MAGIC))
	      { s.regs[R_O0] = 1;
		trc(TRC_MAGIC,"emulator-detect instruction detecting emulator\n");
	      }
	     else
	      { s.regs[DREG(inst)] = ~(s.regs[SREG1(inst)] ^ (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]));
	      }
	     break;
	  case 8: // addx
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] + (I(inst)?SIMM13(inst):s.regs[SREG2(inst)]) + ((s.cc&CC_C)?1:0);
	     break;
	  case 12: // subx
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] - (I(inst)?SIMM13(inst):s.regs[SREG2(inst)]) - ((s.cc&CC_C)?1:0);
	     break;
	  case 16: // addcc
	     s.regs[DREG(inst)] = addcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 17: // andcc
	     s.regs[DREG(inst)] = andcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 18: // orcc
	     s.regs[DREG(inst)] = orcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 19: // xorcc
	     s.regs[DREG(inst)] = xorcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 20: // subcc
	     s.regs[DREG(inst)] = subcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 21: // andncc
	     s.regs[DREG(inst)] = andncc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 23: // xnorcc
	     s.regs[DREG(inst)] = xnorcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 24: // addxcc
	     s.regs[DREG(inst)] = addxcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)],(s.cc&CC_C)?1:0);
	     break;
	  case 28: // subxcc
	     s.regs[DREG(inst)] = subxcc(s.regs[SREG1(inst)],I(inst)?SIMM13(inst):s.regs[SREG2(inst)],(s.cc&CC_C)?1:0);
	     break;
	  case 36: // mulscc
	     mulscc(DREG(inst),SREG1(inst),I(inst)?SIMM13(inst):s.regs[SREG2(inst)]);
	     break;
	  case 37: // sll
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] << ((I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) & 31);
	     break;
	  case 38: // srl
	     s.regs[DREG(inst)] = s.regs[SREG1(inst)] >> ((I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) & 31);
	     break;
	  case 39: // sra
	     s.regs[DREG(inst)] = sra(s.regs[SREG1(inst)],(I(inst)?SIMM13(inst):s.regs[SREG2(inst)])&31);
	     break;
	  case 40: // rd %y
	     s.regs[DREG(inst)] = s.y;
	     break;
	  case 48: // wr %y
	     s.y = s.regs[SREG1(inst)] ^ (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     break;
	  case 52: // FPop1
	     switch (OPF(inst))
	      { case 1: // fmovs
		   s.flags |= SF_FPU;
		   s.fregs[DREG(inst)] = s.fregs[SREG2(inst)];
		   break;
		case 5: // fnegs
		   s.flags |= SF_FPU;
		   s.fregs[DREG(inst)] = s.fregs[SREG2(inst)] ^ 0x80000000;
		   break;
		case 41: // fsqrts
		   fsqrts(inst);
		   break;
		case 42: // fsqrtd
		   fsqrtd(inst);
		   break;
		case 65: // fadds
		   fadds(inst);
		   break;
		case 66: // faddd
		   faddd(inst);
		   break;
		case 70: // fsubd
		   fsubd(inst);
		   break;
		case 73: // fmuls
		   fmuls(inst);
		   break;
		case 74: // fmuld
		   fmuld(inst);
		   break;
		case 77: // fdivs
		   fdivs(inst);
		   break;
		case 78: // fdivd
		   fdivd(inst);
		   break;
		case 196: // fitos
		   fitos(inst);
		   break;
		case 198: // fdtos
		   fdtos(inst);
		   break;
		case 200: // fitod
		   fitod(inst);
		   break;
		case 201: // fstod
		   fstod(inst);
		   break;
		case 209: // fstoi
		   fstoi(inst);
		   break;
		case 210: // fdtoi
		   fdtoi(inst);
		   break;
		default:
		   UNIMP();
		   break;
	      }
	     break;
	  case 53: // FPop2
	     switch (OPF(inst))
	      { case 82: // fcmpd
		   fcmp_d(inst,0);
		   break;
		case 85: // fcmpes
		   fcmp_s(inst,1);
		   break;
		case 86: // fcmped
		   fcmp_d(inst,1);
		   break;
		default:
		   UNIMP();
		   break;
	      }
	     break;
	  case 56: // jmpl
	     s.npc = s.regs[SREG1(inst)] + (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     s.regs[DREG(inst)] = s.xa;
	     break;
	  case 58: // tCC
	     ctrap(DREG(inst)&15,s.regs[SREG1(inst)]+(I(inst)?SIMM13(inst):s.regs[SREG2(inst)]));
	     break;
	  case 59: // iflush
	     // We are strictly von Neumann; iflush is irrelevant.
	     break;
	  case 60: // save
	     v = s.regs[SREG1(inst)] + (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     window_save();
	     s.regs[DREG(inst)] = v;
	     break;
	  case 61: // restore
	     v = s.regs[SREG1(inst)] + (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]);
	     window_restore();
	     s.regs[DREG(inst)] = v;
	     break;
	  default:
	     UNIMP();
	     break;
	}
       break;
    case 3:
       // format 3
       switch (OP3(inst))
	{ case 0: // ld
	     s.regs[DREG(inst)] = aligned_get_4((I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)],"ld");
	     break;
	  case 1: // ldub
	     s.regs[DREG(inst)] = mem_get_1((I(inst)?SIMM13(inst):s.regs[SREG2(inst)])+s.regs[SREG1(inst)]);
	     break;
	  case 2: // lduh
	     s.regs[DREG(inst)] = aligned_get_2((I(inst)?SIMM13(inst):s.regs[SREG2(inst)])+s.regs[SREG1(inst)],"lduh");
	     break;
	  case 3: // (integer) ldd
	     even_regno(DREG(inst),inst);
	     a = (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)];
	     aligncheck(a,8,"ldd");
	     v = mem_get_4(a);
	     v2 = mem_get_4(a+4);
	     s.regs[DREG(inst)] = v;
	     s.regs[DREG(inst)+1] = v2;
	     break;
	  case 4: // st
	     aligned_set_4(
		(I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)],
		s.regs[DREG(inst)], "st" );
	     break;
	  case 5: // stb
	     mem_set_1(
		(I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)],
		s.regs[DREG(inst)] );
	     break;
	  case 6: // sth
	     aligned_set_2(
		(I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)],
		s.regs[DREG(inst)], "sth" );
	     break;
	  case 7: // (integer) std
	     even_regno(DREG(inst),inst);
	     a = (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)];
	     aligncheck(a,8,"std");
	     mem_set_4(a,s.regs[DREG(inst)]);
	     mem_set_4(a+4,s.regs[DREG(inst)+1]);
	     break;
	  case 9: // ldsb
	     s.regs[DREG(inst)] = signextend(mem_get_1((I(inst)?SIMM13(inst):s.regs[SREG2(inst)])+s.regs[SREG1(inst)]),8);
	     break;
	  case 10: // ldsh
	     s.regs[DREG(inst)] = signextend(aligned_get_2((I(inst)?SIMM13(inst):s.regs[SREG2(inst)])+s.regs[SREG1(inst)],"ldsh"),16);
	     break;
	  case 32: // (floating) ld
	     s.fregs[DREG(inst)] = aligned_get_4((I(inst)?SIMM13(inst):s.regs[SREG2(inst)])+s.regs[SREG1(inst)],"ld");
	     s.flags |= SF_FPU;
	     break;
	  case 33: // ld %fsr
	     set_fsr(aligned_get_4((I(inst)?SIMM13(inst):s.regs[SREG2(inst)])+s.regs[SREG1(inst)],"ld"));
	     s.flags |= SF_FPU;
	     break;
	  case 35: // (floating) ldd
	     even_regno(DREG(inst),inst);
	     a = (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)];
	     aligncheck(a,8,"ldd");
	     s.fregs[DREG(inst)] = mem_get_4(a);
	     s.fregs[DREG(inst)+1] = mem_get_4(a+4);
	     s.flags |= SF_FPU;
	     break;
	  case 36: // (floating) st
	     aligned_set_4(
		(I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)],
		s.fregs[DREG(inst)], "st" );
	     s.flags |= SF_FPU;
	     break;
	  case 37: // st %fsr
	     aligned_set_4(
		(I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)],
		build_fsr(), "st" );
	     s.flags |= SF_FPU;
	     break;
	  case 39: // (floating) std
	     even_regno(DREG(inst),inst);
	     a = (I(inst) ? SIMM13(inst) : s.regs[SREG2(inst)]) + s.regs[SREG1(inst)];
	     aligncheck(a,8,"std");
	     mem_set_4(a,s.fregs[DREG(inst)]);
	     mem_set_4(a+4,s.fregs[DREG(inst)+1]);
	     s.flags |= SF_FPU;
	     break;
	  default:
	     UNIMP();
	     break;
	}
       break;
  }
}

/*
 * Print regs[SREG1]+(I?SIMM13:regs[SREG2]), used as an address by the
 *  memory reference instructions.
 */
static void print_address(FILE *mf, uint32_t opc)
{
 int anything;

 anything = 0;
 if (SREG1(opc) != 0)
  { fprintf(mf,"%s",regnames[SREG1(opc)]);
    anything = 1;
  }
 if (I(opc))
  { int32_t off;
    off = SIMM13(opc);
    if (off || !anything)
     { if (off < 0) fprintf(mf,"-%#lx",(ULI)-off);
       else         fprintf(mf,"%s%#lx",anything?"+":"",(ULI)off);
     }
  }
 else
  { if (SREG2(opc) || !anything)
     { fprintf(mf,"%s%s",anything?"+":"",regnames[SREG2(opc)]);
     }
  }
}

/*
 * Disassemble the instruction inst, assuming it's located at pc, with
 *  the resulting text written to to.
 */
static void disassemble(uint32_t inst, uint32_t pc, FILE *to)
{
 switch (OPC(inst))
  { case 1:
       /* format 1 */
       /* 01oo oooo  oooo oooo  oooo oooo  oooo oooo */
       /* o = offset, in longword units */
	{ uint32_t dst;
	  SYM *sym;
	  dst = pc + ((inst&0x3fffffff) << 2);
	  sym = lookup_fxn(dst);
	  fprintf(to,"call        %#lx",(ULI)dst);
	  if (sym) fprintf(to," (%s)",sym->name);
	}
       break;
    case 0:
       /* format 2 */
       /* 00rr rrro  ooii iiii  iiii iiii  iiii iiii */
       /* 00ac ccco  oodd dddd  dddd dddd  dddd dddd */
       /* r = dreg, a = annul, c = cond, o = opcode */
       /* i = immediate data, d = displacement */
       switch (OP2(inst))
	{ case 0:
	     fprintf(to,"unimp       0x%lx",(ULI)IMM22(inst));
	     break;
	  case 4:
	     if (inst == 0x01000000)
	      { fprintf(to,"nop");
	      }
	     else
	      { fprintf(to,"sethi       %%hi(0x%08lx), %s",(ULI)(IMM22(inst)<<10),regnames[DREG(inst)]);
	      }
	     break;
	      { int n;
		const char * const *ccvec;
		const char *pref;
	  case 2:
		ccvec = &icc[0];
		pref = "b";
		if (0)
		 {
	  case 6:
		   ccvec = &fcc[0];
		   pref = "fb";
		 }
		if (0)
		 {
	  case 7:
		   ccvec = &ccc[0];
		   pref = "cb";
		 }
		n = fprintf(to,"%s%s",pref,ccvec[COND(inst)]);
		if (A(inst)) n += fprintf(to,",a");
		fprintf(to,"%*s%#lx",12-n,"",(ULI)(pc+(DISP22(inst)<<2)));
	      }
	     break;
	  case 1:
	  case 3:
	  case 5:
	     fprintf(to,"<fmt2 op2=%lu dreg=%lu const22=0x%lx>",(ULI)OP2(inst),(ULI)DREG(inst),(ULI)IMM22(inst));
	     break;
	}
       break;
    case 2:
       /* format 3 */
	{ static const char *rs1_ri_rd_opc[]
	  = { "Dadd", "Xand", "Xor", "Xxor",			/* 0000xx */
	      "Dsub", "Xandn", "Xorn", "Xxnor",			/* 0001xx */
	      "Daddx", 0, 0, 0, "Dsubx", 0, 0, 0,		/* 001xxx */
	      "Daddcc", "Xandcc", "Xorcc", "Xxorcc",		/* 0100xx */
	      "Dsubcc", "Xandncc", "Xorncc", "Xxnorcc",		/* 0101xx */
	      "Daddxcc", 0, 0, 0, "Dsubxcc", 0, 0, 0,		/* 011xxx */
	      "Dtaddcc", "Dtsubcc", "Dtaddcctv", "Dtsubcctv",	/* 1000xx */
	      "Dmulscc", "5sll", "5srl", "5sra",		/* 1001xx */
	      REP3b(0),						/* 101xxx */
	      REP3b(0),						/* 110xxx */
	      0, 0, 0, 0, "Dsave", "Drestore", 0, 0 };		/* 111xxx */
	  static const char *rdreg[]
		= { REP5b(0),					/* 0xxxxx */
		    REP3b(0),					/* 100xxx */
		    "y", "psr", "wim", "tbr", 0, 0, 0, 0,	/* 101xxx */
		    REP4b(0) };					/* 11xxxx */
	  static const char *wrreg[]
		= { REP5b(0),					/* 0xxxxx */
		    REP4b(0),					/* 10xxxx */
		    "y", "psr", "wim", "tbr", 0, 0, 0, 0,	/* 110xxx */
		    REP3b(0) };					/* 111xxx */
	  const char *os;
	  int op3;
	  int opf;
	  op3 = OP3(inst);
	  if ((os = rs1_ri_rd_opc[op3]))
	   { if (inst == 0x81e80000)
	      { /* restore %g0, %g0, %g0 -> restore */
		fprintf(to,"restore");
	      }
	     else if (inst == 0x81e00000)
	      { /* save %g0, %g0, %g0 -> save */
		fprintf(to,"save");
	      }
	     else if ((op3 == 20) && (DREG(inst) == 0))
	      { /* subcc x, y, %g0 -> cmp x, y */
		fprintf(to,"cmp         %s, ",regnames[SREG1(inst)]);
		if (I(inst)) fprintf(to,"%ld",(LI)(int32_t)SIMM13(inst));
		else         fprintf(to,"%s",regnames[SREG2(inst)]);
	      }
	     else if ( (op3 == 18) &&
		       !I(inst) &&
		       (SREG2(inst) == 0) &&
		       (DREG(inst) == 0) )
	      { /* orcc x, %g0, %g0 -> tst x */
		fprintf(to,"tst         %s",regnames[SREG1(inst)]);
	      }
	     else if ( (op3 == 18) &&
		       !I(inst) &&
		       (SREG1(inst) == 0) &&
		       (DREG(inst) == 0) )
	      { /* orcc %g0, x, %g0 -> tst x */
		fprintf(to,"tst         %s",regnames[SREG2(inst)]);
	      }
	     else if ((op3 == 7) && !I(inst) && (SREG2(inst) == 0))
	      { /* xnor x, %g0, y -> not x, y */
		/* xnor x, %g0, x -> not x */
		fprintf(to,"not         %s",regnames[SREG1(inst)]);
		if (DREG(inst) != SREG1(inst)) fprintf(to,", %s",regnames[DREG(inst)]);
	      }
	     else if ((op3 == 4) && !I(inst) && (SREG1(inst) == 0))
	      { /* sub %g0, x, y -> neg x, y */
		/* sub %g0, x, x -> neg x */
		fprintf(to,"neg         %s",regnames[SREG2(inst)]);
		if (DREG(inst) != SREG2(inst)) fprintf(to,", %s",regnames[DREG(inst)]);
	      }
	     else if ((op3 == 0) && I(inst) && (SREG1(inst) == DREG(inst)))
	      { /* add x, y, x -> inc y, x */
		/* add x, 1, x -> inc x */
		fprintf(to,"inc         ");
		if (SIMM13(inst) != 1) fprintf(to,"%ld, ",(LI)(int32_t)SIMM13(inst));
		fprintf(to,"%s",regnames[SREG1(inst)]);
	      }
	     else if ((op3 == 4) && I(inst) && (SREG1(inst) == DREG(inst)))
	      { /* sub x, y, x -> dec y, x */
		/* sub x, 1, x -> dec x */
		fprintf(to,"dec         ");
		if (SIMM13(inst) != 1) fprintf(to,"%ld, ",(LI)(int32_t)SIMM13(inst));
		fprintf(to,"%s",regnames[SREG1(inst)]);
	      }
	     else if ( (op3 == 16) &&
		       I(inst) &&
		       (SREG1(inst) == DREG(inst)) &&
		       (SIMM13(inst) == 1) )
	      { /* addcc x, 1, x -> inccc x */
		fprintf(to,"inccc       %s",regnames[SREG1(inst)]);
	      }
	     else if ( (op3 == 20) &&
		       I(inst) &&
		       (SREG1(inst) == DREG(inst)) &&
		       (SIMM13(inst) == 1) )
	      { /* subcc x, 1, x -> deccc x */
		fprintf(to,"deccc       %s",regnames[SREG1(inst)]);
	      }
	     else if ((op3 == 17) && (DREG(inst) == 0))
	      { /* andcc x, y, %g0 -> btst y, x */
		fprintf(to,"btst        ");
		if (I(inst)) fprintf(to,"%#lx",(ULI)SIMM13(inst));
		else        fprintf(to,"%s",regnames[SREG2(inst)]);
		fprintf(to,", %s",regnames[SREG1(inst)]);
	      }
	     else if ( (op3 == 2) &&
		       !I(inst) &&
		       (SREG1(inst) == 0) &&
		       (SREG2(inst) == 0) )
	      { /* or %g0, %g0, x -> clr x */
		fprintf(to,"clr         %s",regnames[DREG(inst)]);
	      }
	     else if ( (op3 == 2) &&
		       (SREG1(inst) == 0) )
	      { /* or %g0, x, y -> mov x, y */
		fprintf(to,"mov         ");
		if (I(inst)) fprintf(to,"%#lx",(ULI)SIMM13(inst));
		else        fprintf(to,"%s",regnames[SREG2(inst)]);
		fprintf(to,", %s",regnames[DREG(inst)]);
	      }
	     else
	      { fprintf(to,"%s%*s%s, ",os+1,12-(int)strlen(os+1),"",regnames[SREG1(inst)]);
		if (I(inst))
		 { switch (os[0])
		    { case 'D': fprintf(to,"%ld",(LI)(int32_t)SIMM13(inst)); break;
		      case 'X': fprintf(to,"0x%lx",(ULI)SIMM13(inst)); break;
		      case '5': fprintf(to,"%d",(int)(SIMM13(inst)&31)); break;
		    }
		 }
		else
		 { fprintf(to,"%s",regnames[SREG2(inst)]);
		 }
		fprintf(to,", %s",regnames[DREG(inst)]);
	      }
	   }
	  else if ((os = rdreg[op3]))
	   { fprintf(to,"rd          %%%s, %s",os,regnames[DREG(inst)]);
	   }
	  else if ((os = wrreg[op3]))
	   { fprintf(to,"wr          %s, ",regnames[SREG1(inst)]);
	     if (I(inst)) fprintf(to,"0x%lx",(ULI)SIMM13(inst));
	     else        fprintf(to,"%s",regnames[SREG2(inst)]);
	     fprintf(to,", %%%s",os);
	   }
	  else
	   { switch (op3)
	      { case 52: /* 110100 - fpop1 */
		    { static const char *fs1_fs2_fd[]
		      = { REP5b(0),				/*0000xxxxx*/
			  REP3b(0),				/*000100xxx*/
			  0, "fsqrts", "fsqrtd", "fsqrtx",	/*0001010xx*/
			  REP2b(0),				/*0001011xx*/
			  REP4b(0),				/*00011xxxx*/
			  0, "fadds", "faddd", "faddx",		/*0010000xx*/
			  0, "fsubs", "fsubd", "fsubx",		/*0010001xx*/
			  0, "fmuls", "fmuld", "fmulx",		/*0010010xx*/
			  0, "fdivs", "fdivd", "fdivx",		/*0010011xx*/
			  REP4b(0),				/*00101xxxx*/
			  REP3b(0),				/*001100xxx*/
			  0, "fsmuld", 0, 0,			/*0011010xx*/
			  0, 0, "fdmulx", 0,			/*0011011xx*/
			  REP4b(0),				/*00111xxxx*/
			  REP7b(0),				/*01xxxxxxx*/
			  REP8b(0) };				/*1xxxxxxxx*/
		      static const char *fs2_fd[]
		      = { 0, "fmovs", 0, 0,			/*0000000xx*/
			  0, "fnegs", 0, 0,			/*0000001xx*/
			  0, "fabss", 0, 0,			/*0000010xx*/
			  REP2b(0),				/*0000011xx*/
			  REP4b(0),				/*00001xxxx*/
			  REP5b(0),				/*0001xxxxx*/
			  REP6b(0),				/*001xxxxxx*/
			  REP6b(0),				/*010xxxxxx*/
			  0, "fstoir", "fdtoir", "fxtoir",	/*0110000xx*/
			  "fitos", 0, "fdtos", "fxtos",		/*0110001xx*/
			  "fitod", "fstod", 0, "fxtod",		/*0110010xx*/
			  "fitox", "fstox", "fdtox", 0,		/*0110011xx*/
			  0, "fstoi", "fdtoi", "fxtoi",		/*0110100xx*/
			  REP2b(0),				/*0110101xx*/
			  REP3b(0),				/*011011xxx*/
			  REP5b(0),				/*0111xxxxx*/
			  REP8b(0) };				/*1xxxxxxxx*/
		      if ((sizeof(fs1_fs2_fd)/sizeof(fs1_fs2_fd[0])) != 512) panic("bad fs1_fs2_fd");
		      if ((sizeof(fs2_fd)/sizeof(fs2_fd[0])) != 512) panic("bad fs2_fd");
		      opf = OPF(inst);
		      if ((os = fs1_fs2_fd[opf]))
		       { fprintf(to,"%s%*s%%f%d, %%f%d, %%f%d",os,12-(int)strlen(os),"",(int)SREG1(inst),(int)SREG2(inst),(int)DREG(inst));
		       }
		      else if ((os = fs2_fd[opf]))
		       { fprintf(to,"%s%*s%%f%d, %%f%d",os,12-(int)strlen(os),"",(int)SREG2(inst),(int)DREG(inst));
		       }
		      else
		       { fprintf(to,"<fpop1 opf=%d sreg1=%d sreg2=%d dreg=%d>",(int)OPF(inst),(int)SREG1(inst),(int)SREG2(inst),(int)DREG(inst));
		       }
		    }
		   break;
		case 53: /* 110101 - fpop2 */
		    { static const char *cmps[]
		      = { REP6b(0),				/*000xxxxxx*/
			  REP4b(0),				/*00100xxxx*/
			  0, "fcmps", "fcmpd", "fcmpx",		/*0010100xx*/
			  0, "fcmpes", "fcmped", "fcmpex",	/*0010101xx*/
			  REP3b(0),				/*001011xxx*/
			  REP5b(0),				/*0011xxxxx*/
			  REP7b(0),				/*01xxxxxxx*/
			  REP8b(0) };				/*1xxxxxxxx*/
		      opf = OPF(inst);
		      if ((os = cmps[opf]))
		       { fprintf(to,"%s%*s%%f%d, %%f%d",os,12-(int)strlen(os),"",(int)SREG1(inst),(int)SREG2(inst));
		       }
		      else
		       { fprintf(to,"<fpop2 opf=%d sreg1=%d sreg2=%d dreg=%d>",(int)OPF(inst),(int)SREG1(inst),(int)SREG2(inst),(int)DREG(inst));
		       }
		    }
		   break;
		case 54: /* 110110 - cpop1 */
		   fprintf(to,"<cpop1 opf=%d sreg1=%d sreg2=%d dreg=%d>",(int)OPF(inst),(int)SREG1(inst),(int)SREG2(inst),(int)DREG(inst));
		   break;
		case 55: /* 110111 - cpop2 */
		   fprintf(to,"<cpop2 opf=%d sreg1=%d sreg2=%d dreg=%d>",(int)OPF(inst),(int)SREG1(inst),(int)SREG2(inst),(int)DREG(inst));
		   break;
		case 56: /* 111000 */
		   if (inst == 0x81c7e008) /* jmpl %i7+8, %g0 */
		    { fprintf(to,"ret");
		    }
		   else if (inst == 0x81c3e008) /* jmpl %o7+8, %g0 */
		    { fprintf(to,"retl");
		    }
		   else if (DREG(inst) == 0) /* jmpl x, %g0 */
		    { fprintf(to,"jmp         ");
		      print_address(to,inst);
		    }
		   else if (DREG(inst) == 15) /* jmpl x, %o7 */
		    { fprintf(to,"call        ");
		      print_address(to,inst);
		    }
		   else
		    { fprintf(to,"jmpl        ");
		      print_address(to,inst);
		      fprintf(to,", %s",regnames[DREG(inst)]);
		    }
		   break;
		case 57: /* 111001 */
		   fprintf(to,"rett        ");
		   print_address(to,inst);
		   break;
		case 58: /* 111010 */
		   os = icc[DREG(inst)&15];
		   fprintf(to,"t%s%*s",os,11-(int)strlen(os),"");
		   print_address(to,inst);
		   break;
		case 59: /* 111011 */
		   fprintf(to,"iflush      ");
		   print_address(to,inst);
		   break;
		default:
		   fprintf(to,"<fmt3 op3=%d dreg=%d sreg1=%d i=%d asi=%d opf=%d imm=%d sreg2=%d>",(int)OP3(inst),(int)DREG(inst),(int)SREG1(inst),(int)I(inst),(int)ASI(inst),(int)OPF(inst),(int)SIMM13(inst),(int)SREG2(inst));
		   break;
	      }
	   }
	}
       break;
    case 3:
       /* format 3 */
	{ static const char *addr_rd[]
	  = { "ld", "ldub", "lduh", "ldd", 0, 0, 0, 0,		/* 000xxx */
	      0, "ldsb", "ldsh", 0, 0, "ldstub", 0, "swap",	/* 001xxx */
	      REP4b(0),						/* 01xxxx */
	      REP5b(0) };					/* 1xxxxx */
	  static const char *addr_asi_rd[]
	  = { REP4b(0),						/* 00xxxx */
	      "lda", "lduba", "lduha", "ldda", 0, 0, 0, 0,	/* 010xxx */
	      0, "ldsba", "ldsha", 0, 0, "ldstuba", 0, "swapa",	/* 011xxx */
	      REP5b(0) };					/* 1xxxxx */
	  static const char *rd_addr[]
	  = { 0, 0, 0, 0, "st", "stb", "sth", "std",		/* 000xxx */
	      REP3b(0),						/* 001xxx */
	      REP4b(0),						/* 01xxxx */
	      REP5b(0) };					/* 1xxxxx */
	  static const char *rd_addr_asi[]
	  = { REP4b(0),						/* 00xxxx */
	      0, 0, 0, 0, "sta", "stba", "stha", "stda",	/* 010xxx */
	      REP3b(0),						/* 011xxx */
	      REP5b(0) };					/* 1xxxxx */
	  static const char *ld[]
	  = { REP5b(0),						/* 0xxxxx */
	      "frld", "fsld", 0, "frldd", 0, 0, 0, 0,		/* 100xxx */
	      REP3b(0),						/* 101xxx */
	      "crld", "csld", 0, "crldd", 0, 0, 0, 0,		/* 110xxx */
	      REP3b(0) };					/* 111xxx */
	  static const char *st[]
	  = { REP5b(0),						/* 0xxxxx */
	      0, 0, 0, 0, "frst", "fsst", "fqst", "frstd",	/* 100xxx */
	      REP3b(0),						/* 101xxx */
	      0, 0, 0, 0, "crst", "csst", "cqst", "crstd",	/* 110xxx */
	      REP3b(0) };					/* 111xxx */
	  int op3;
	  const char *os;
	  op3 = OP3(inst);
	  if ((os = addr_rd[op3]))
	   { fprintf(to,"%s%*s[",os,12-(int)strlen(os),"");
	     print_address(to,inst);
	     fprintf(to,"], %s",regnames[DREG(inst)]);
	   }
	  else if ((os = addr_asi_rd[op3]) && !I(inst))
	   { fprintf(to,"%s%*s[%s+%s], %s",os,12-(int)strlen(os),"",regnames[SREG1(inst)],regnames[SREG2(inst)],regnames[DREG(inst)]);
	   }
	  else if ((os = rd_addr[op3]))
	   { if (DREG(inst) == 0)
	      { fprintf(to,"clr%s%*s[",os+2,9-(int)strlen(os+2),"");
	      }
	     else
	      { fprintf(to,"%s%*s%s, [",os,12-(int)strlen(os),"",regnames[DREG(inst)]);
	      }
	     print_address(to,inst);
	     fprintf(to,"]");
	   }
	  else if ((os = rd_addr_asi[op3]) && !I(inst))
	   { fprintf(to,"%s%*s%s, [%s+%s]",os,12-(int)strlen(os),"",regnames[DREG(inst)],regnames[SREG1(inst)],regnames[SREG2(inst)]);
	   }
	  else if ((os = ld[op3]))
	   { fprintf(to,"%s%*s[",os+2,12-(int)strlen(os+2),"");
	     print_address(to,inst);
	     switch (os[1])
	      { case 'r': fprintf(to,"], %%%c%d",os[0],(int)DREG(inst)); break;
		case 's': fprintf(to,"], %%%csr",os[0]); break;
	      }
	   }
	  else if ((os = st[op3]))
	   { fprintf(to,"%s%*s",os+2,12-(int)strlen(os+2),"");
	     switch (os[1])
	      { case 'r': fprintf(to,"%%%c%d",os[0],(int)DREG(inst)); break;
		case 's': fprintf(to,"%%%csr",os[0]); break;
		case 'q': fprintf(to,"%%%cq",os[0]); break;
	      }
	     fprintf(to,", [");
	     print_address(to,inst);
	     fprintf(to,"]");
	   }
	  else
	   { fprintf(to,"<fmt3 op3=%d dreg=%d sreg1=%d i=%d asi=%d opf=%d imm=%d sreg2=%d>",(int)OP3(inst),(int)DREG(inst),(int)SREG1(inst),(int)I(inst),(int)ASI(inst),(int)OPF(inst),(int)SIMM13(inst),(int)SREG2(inst));
	   }
	}
       break;
  }
}

/*
 * Do TRC_INSTR tracing for one instruction.  Our caller has already
 *  verified that TRC_INSTR tracing is turned on; this just
 *  disassembles the instruction at s.pc.
 */
static void do_instr_trc(void)
{
 uint32_t inst;
 FILE *mf;

 inst = mem_get_4(s.pc);
 mf = trc_f(TRC_INSTR);
 fprintf(mf,"(%llu) pc %08lx, code %08lx",s.instrs,(ULI)s.pc,(ULI)inst);
 if (s.flags & SF_ANNUL) fprintf(mf," (annulled)");
 fprintf(mf,": ");
 disassemble(inst,s.pc,mf);
 fprintf(mf,"\n");
}

/*
 * Step by one instruction.  This is just instr_() wrapped in code to
 *  handle post-exec stack dumping, TRC_INSTR tracing, and ensuring
 *  that %g0 is stuck at 0.  (If any instruction could modify a
 *  register and then later read it, we couldn't get away with
 *  implementing %g0 this way.)
 */
static void instr(void)
{
 if (postexec)
  { if (trc_if(TRC_STACK))
     { if (s.regs[R_SP] & 3) panic("post-exec stack (%08lx) misaligned",(ULI)s.regs[R_SP]);
       if (trc_if(TRC_STACK))
	{ FILE *f;
	  uint32_t a;
	  uint32_t v;
	  f = trc_f(TRC_STACK);
	  nomemacc ++;
	  a = s.regs[R_SP];
	  fprintf(f,"sp = %08lx\n",(ULI)a);
	  if (a & 0xf) fprintf(f,"%08lx: %*s",(ULI)(a&~(uint32_t)0xf),(int)(((a&0xf)>>2)*9),"");
	  for (;a<(uint32_t)USRSTACK;a+=4)
	   { v = mem_get_4(a);
	     if ((a & 0xf) == 0) fprintf(f,"%08lx: ",(ULI)a);
	     fprintf(f," %08lx",(ULI)v);
	     if ((a & 0xf) == 0xc) fprintf(f,"\n");
	   }
#if USRSTACK & 0xf
	  fprintf(f,"\n");
#endif
	  nomemacc --;
	}
     }
    postexec = 0;
  }
 if (trc_if(TRC_INSTR))
  { nomemacc ++;
    do_instr_trc();
    nomemacc --;
  }
 instr_();
 s.regs[R_G0] = 0;
}

/*
 * Turn tracing on or off at user command.  cp is the argument string,
 *  fn is a callback to be called for each parsed tracing type, and tag
 *  is a printable description for use in messages.
 */
static char *trace_onoff(char *cp, void (*fn)(int), const char *tag)
{
 char *ep;
 int i;

 for (ep=cp;*ep&&!Cisspace(*ep);ep++) ;
 if ((ep-cp == 1) && (*cp == '*'))
  { for (i=0;i<TRC__N;i++)
     { if (! (trace[i].flags & TRCF_NO_GENERIC_UI))
	{ (*fn)(i);
	  printf("[tracing %s: %s]\n",trace[i].name,tag);
	}
     }
  }
 else
  { for (i=0;i<TRC__N;i++)
     { if ( !(trace[i].flags & TRCF_NO_GENERIC_UI) &&
	    (ep-cp == strlen(trace[i].name)) &&
	    !strncmp(cp,trace[i].name,ep-cp) ) break;
     }
    if (i < TRC__N)
     { (*fn)(i);
       printf("[tracing %s: %s]\n",trace[i].name,tag);
     }
    else
     { printf("bad trace name `%.*s' on t command (`t ?' for list)\n",(int)(ep-cp),cp);
       return(0);
     }
  }
 for (cp=ep;*cp&&Cisspace(*cp);cp++) ;
 return(ep);
}

/*
 * "Close" output for a tracing type.  Just fclose the FILE * and set
 *  it nil (if it's not already nil).
 */
static void trace_close(int kind)
{
 if (trace[kind].f)
  { fclose(trace[kind].f);
    trace[kind].f = 0;
  }
}

/*
 * Output function for tracing streams.
 */
static int trace_w(void *tv, const char *data, int len)
{
 TRC *t;
 const char *nl;
 int o;
 int n;

 t = tv;
 o = 0;
 while (o < len)
  { nl = memchr(data+o,'\n',len-o);
    if (nl)
     { n = nl - (data + o);
       if (t->pln)
	{ if (n)
	   { trcmgr_line_twopart(trace[t->inx].name,t->inx,t->pl,t->pln,data+o,n);
	   }
	  else
	   { trcmgr_line(trace[t->inx].name,t->inx,t->pl,t->pln);
	   }
	  t->pln = 0;
	}
       else
	{ trcmgr_line(trace[t->inx].name,t->inx,data+o,n);
	}
       o += n + 1;
     }
    else
     { n = len - o;
       if (t->pln+n > t->pla) t->pl = realloc(t->pl,t->pla=t->pln+n);
       bcopy(data+o,t->pl+t->pln,n);
       t->pln += n;
       o += n;
     }
  }
 return(len);
}

/*
 * "Open" output for a tracing type, if it isn't already open.
 */
static void trace_open(int kind)
{
 if (! trace[kind].f)
  { trace[kind].f = funopen(&trace[kind],0,&trace_w,0,0);
    setlinebuf(trace[kind].f);
  }
}

/*
 * Turn tracing on for some tracing types.
 */
static char *trace_on(char *cp)
{
 return(trace_onoff(cp,&trace_open,"on"));
}

/*
 * Turn tracing off for some tracing types.
 */
static char *trace_off(char *cp)
{
 return(trace_onoff(cp,&trace_close,"off"));
}

/*
 * Parse a number, which must be a simple positive number in decimal.
 *  s and l are pointer-and-length describing the string; vp is where
 *  to store the result.  Return value is true on success, false (with
 *  nothing printed and nothing stored through vp) on failure.
 */
static int parse_number(const char *s, int l, int *vp)
{
 int v;
 int i;
 int dv;

 v = 0;
 for (i=0;i<l;i++)
  { switch (s[i])
     { case '0': dv = 0; break;
       case '1': dv = 1; break;
       case '2': dv = 2; break;
       case '3': dv = 3; break;
       case '4': dv = 4; break;
       case '5': dv = 5; break;
       case '6': dv = 6; break;
       case '7': dv = 7; break;
       case '8': dv = 8; break;
       case '9': dv = 9; break;
       default: return(0); break;
     }
    v = (v * 10) + dv;
  }
 *vp = v;
 return(1);
}

/*
 * Set the destination for a particular kind of tracing.  dst and
 *  dstlen describe the destination, as a pointer-and-length string
 *  (which may or may not be NUL-terminated).
 */
static void trace_set_output(int kind, const char *dst, int dstlen, int append)
{
 char *d;
 int fd;

 if ((kind < 0) || (kind >= TRC__N)) abort();
 if ((dstlen == 1) && (dst[0] == '-'))
  { d = strdup("standard output");
    trcmgr_set_output_fd(kind,1);
  }
 else if ((dstlen > 1) && (dst[0] == '-') && parse_number(dst+1,dstlen-1,&fd))
  { switch (fd)
     { case 1:
	  d = strdup("standard output");
	  break;
       case 2:
	  d = strdup("standard error");
	  break;
       default:
	  asprintf(&d,"fd #%d",fd);
	  break;
     }
    trcmgr_set_output_fd(kind,fd);
  }
 else
  { d = malloc(dstlen+1);
    bcopy(dst,d,dstlen);
    d[dstlen] = '\0';
    trcmgr_set_output_file(kind,dst,dstlen,append);
  }
 free(trace[kind].dest);
 trace[kind].dest = d;
}

/*
 * Set tracing rotation parameters.  size is the file size; count is
 *  the file count.
 */
static void trace_set_rotate(long long int size, int count)
{
 trcmgr_set_rotation(size,count);
}

/*
 * Set tracing output, either for all types or for only some of them,
 *  depending on the argument.
 */
static char *trace_out(char *cp, int append)
{
 char *ep;
 char *colon;
 int i;
#if TRC__N > 32
#error Update trace_out for this many types of tracing
#endif
 uint32_t kinds;
 char *kp;

 colon = 0;
 for (ep=cp;*ep&&!Cisspace(*ep);ep++) if (!colon && (*ep == ':')) colon = ep;
 if (! colon)
  { kinds = ~(uint32_t)0;
  }
 else
  { kinds = 0;
    while (1)
     { kp = cp;
       for (;(*cp!=',')&&(*cp!=':');cp++) ;
       for (i=0;i<TRC__N;i++)
	{ if ( (cp-kp == strlen(trace[i].name)) &&
	       !strncmp(trace[i].name,kp,cp-kp) )
	   { kinds |= ((uint32_t)1) << i;
	     break;
	   }
	}
       if (i >= TRC__N)
	{ printf("bad trace name %.*s in t > command\n",(int)(cp-kp),kp);
	  return(0);
	}
       cp ++;
       if (cp[-1] == ':') break;
     }
  }
 for (i=TRC__N-1;i>=0;i--)
  { if ((kinds>>i) & 1)
     { trace_set_output(i,cp,ep-cp,append);
     }
  }
 return(ep);
}

/*
 * Initialize tracing during startup.
 */
static void init_trc(void)
{
 int i;

 trcmgr_startup(TRC__N);
 for (i=0;i<TRC__N;i++)
  { trace[i].f = 0;
    trace[i].inx = i;
    trace[i].dest = strdup("standard output");
    trace[i].pl = 0;
    trace[i].pla = 0;
    trace[i].pln = 0;
  }
 io_trace_size = 0;
 memacc = 0;
 amemacc = 0;
 nmemacc = 0;
 nomemacc = 0;
}

/*
 * Do the common parts of a breakpoint UI command: parse the address
 *  and call the operation implementation.
 */
static char *bpt_op(char *cp, void (*op)(uint32_t), const char *verb)
{
 char *ep;
 unsigned long int a;

 a = strtoul(cp,&ep,16);
 if ((ep == cp) || (*ep && !Cisspace(*ep)))
  { printf("bad address `%.*s' when %s breakpoint\n",(int)(ep-cp),cp,verb);
    return(0);
  }
 if (a != (uint32_t)a)
  { printf("out-of-range address `%.*s' when %s breakpoint\n",(int)(ep-cp),cp,verb);
    return(0);
  }
 (*op)(a);
 return(ep);
}

/*
 * Add a breakpoint.  No-op if there's already a breakpoint there.
 *  This is a UI-called function, so it prints things.
 */
static void add_bpt(uint32_t a)
{
 int l;
 int h;
 int m;

 l = -1;
 h = nbpts;
 while (h-l > 1)
  { m = (h + l) / 2;
    if (a < bpts[m].addr)
     { h = m;
     }
    else if (a > bpts[m].addr)
     { l = m;
     }
    else
     { printf("Breakpoint already set at %08lx\n",(ULI)a);
       return;
     }
  }
 if (nbpts >= abpts) bpts = realloc(bpts,(abpts=nbpts+8)*sizeof(BPT));
 if (h < nbpts) bcopy(bpts+h,bpts+h+1,(nbpts-h)*sizeof(BPT));
 bpts[h].addr = a;
 nbpts ++;
 printf("Breakpoint set at %08lx\n",(ULI)a);
}

/*
 * Delete a breakpoint.  No-op if there isn't one there.  This is a
 *  UI-called function, so it prints things.
 */
static void del_bpt(uint32_t a)
{
 int l;
 int h;
 int m;

 l = -1;
 h = nbpts;
 while (h-l > 1)
  { m = (h + l) / 2;
    if (a < bpts[m].addr)
     { h = m;
     }
    else if (a > bpts[m].addr)
     { l = m;
     }
    else
     { if (h < nbpts-1) bcopy(bpts+h+1,bpts+h,(nbpts-1-h)*sizeof(BPT));
       nbpts --;
       printf("Breakpoint at %08lx deleted\n",(ULI)a);
       return;
     }
  }
 printf("No breakpoint set at %08lx\n",(ULI)a);
}

/*
 * The add-a-breakpoint command.
 */
static char *add_breakpoint(char *cp)
{
 return(bpt_op(cp,&add_bpt,"setting"));
}

/*
 * The delete-a-breakpoint command.
 */
static char *del_breakpoint(char *cp)
{
 return(bpt_op(cp,&del_bpt,"unsetting"));
}

/*
 * Breakpoint-operations UI command.  With no arg, list breakpoints;
 *  with a ?, give help.  Otherwise, add or delete breakpoints,
 *  specified as addresses with + and -.
 */
static void do_breakpoints(char *cp)
{
 int i;

 for (;*cp&&Cisspace(*cp);cp++) ;
 switch (*cp)
  { case '\0':
       if (nbpts < 1)
	{ printf("No breakpoints\n");
	}
       else
	{ for (i=0;i<nbpts;i++)
	   { printf("Break at %08lx\n",(ULI)bpts[i].addr);
	   }
	}
       break;
    case '?':
       printf("b       show breakpoints\n");
       printf("b +addr\n");
       printf("        set a breakpoint at `addr'\n");
       printf("b -addr\n");
       printf("        remove any breakpoint at `addr'\n");
       return;
       break;
  }
 while (cp && *cp)
  { switch (*cp)
     { default:
	  printf("bad op `%c' on b command (must be + -)\n",*cp);
	  return;
	  break;
       case '+':
	  cp = add_breakpoint(cp+1);
	  break;
       case '-':
	  cp = del_breakpoint(cp+1);
	  break;
     }
    if (cp) while (*cp && Cisspace(*cp)) cp ++;
  }
}

/*
 * Control size of tracing I/O data dumps.
 */
static char *set_trc_io_size(char *cp)
{
 char *ep;
 long int liv;
 int iv;

 for (;*cp&&Cisspace(*cp);cp++) ;
 if (! *cp)
  { printf("Use io=0 to turn I/O data tracing off\n");
    return(0);
  }
 liv = strtol(cp,&ep,0);
 if (ep == cp)
  { printf("Missing/invalid number after io=\n");
    return(0);
  }
 if (*ep && !Cisspace(*ep))
  { printf("Junk after io= number\n");
    return(0);
  }
 iv = liv;
 if (iv != liv)
  { printf("Out-of-range number after io=\n");
    return(0);
  }
 if (iv <= 0)
  { io_trace_size = 0;
    trace_close(TRC_IO);
  }
 else
  { io_trace_size = iv;
    trace_open(TRC_IO);
  }
 return(ep);
}

/*
 * Control destination of tracing I/O data dumps.
 */
static char *set_trc_io_dest(char *cp, int append)
{
 char *ep;

 ep = cp;
 for (;*ep&&!Cisspace(*ep);ep++) ;
 if (ep == cp)
  { printf("Missing name after io>\n");
    return(0);
  }
 trace_set_output(TRC_IO,cp,ep-cp,append);
 return(ep);
}

/*
 * Control logfile rotation.
 */
static char *set_trc_rotate(char *cp)
{
 long long int size;
 long int lcount;
 int count;
 char *ep;

 size = strtoll(cp,&ep,0);
 if (ep == cp)
  { printf("Missing size after rotate\n");
    return(0);
  }
 if (*ep != ' ')
  { printf("Junk after size\n");
    return(0);
  }
 cp = ep + 1;
 lcount = strtol(cp,&ep,0);
 if (ep == cp)
  { printf("Missing count after size\n");
    return(0);
  }
 if (*ep)
  { printf("Junk after count\n");
    return(0);
  }
 if (size < 1024)
  { printf("Size must be at least 1024\n");
    return(0);
  }
 count = lcount;
 if ((count < 1) || (count != lcount))
  { printf("Count must be at least 1\n");
    return(0);
  }
 trace_set_rotate(size,count);
 return(ep);
}

/*
 * Implement the user-interface t command: show or set tracing.
 */
static void do_tracing(char *cp)
{
 int i;

 for (;*cp&&Cisspace(*cp);cp++) ;
 switch (*cp)
  { case '\0':
	{ int maxl;
	  int l;
	  maxl = -1;
	  for (i=0;i<TRC__N;i++)
	   { l = strlen(trace[i].name);
	     if (l > maxl) maxl = l;
	   }
	  for (i=0;i<TRC__N;i++)
	   { if (trace[i].f)
	      { printf("%*s     -> %s\n",maxl,trace[i].name,trace[i].dest);
	      }
	     else
	      { printf("%*s  off\n",maxl,trace[i].name);
	      }
	   }
	  return;
	}
       break;
    case '?':
       printf("t       show tracing state\n");
       printf("t rotate BYTES COUNT\n");
       printf("        rotate tracing files, keeping COUNT files of BYTES each");
       printf("t io=NBYTES\n");
       printf("        show first N bytes of I/O data\n");
       printf("t io>file\n");
       printf("        send I/O data to file\n");
       printf("t +kind\n");
       printf("        turn `kind' tracing on\n");
       printf("t -kind\n");
       printf("        turn `kind' tracing off\n");
       printf("t >kind1,kind2,...:file\n");
       printf("        send tracing output for kind1, kind2, etc to file\n");
       printf("        note: does not also turn tracing on\n");
       printf("t >file\n");
       printf("        like t >kind,kind,...:file for all possible kinds\n");
       printf("supported tracing kinds:");
       for (i=0;i<TRC__N;i++) printf(" %s",trace[i].name);
       printf("\n");
       return;
       break;
  }
 while (cp && *cp)
  { switch (*cp)
     { default:
	  if (!strncmp(cp,"io=",3))
	   { cp = set_trc_io_size(cp+3);
	   }
	  else if (!strncmp(cp,"io>>",4))
	   { cp = set_trc_io_dest(cp+4,1);
	   }
	  else if (!strncmp(cp,"io>",3))
	   { cp = set_trc_io_dest(cp+3,0);
	   }
	  else if (!strncmp(cp,"rotate ",7))
	   { cp = set_trc_rotate(cp+7);
	   }
	  else
	   { printf("bad op `%c' on t command (must be + - >)\n",*cp);
	     return;
	   }
	  break;
       case '+':
	  cp = trace_on(cp+1);
	  break;
       case '-':
	  cp = trace_off(cp+1);
	  break;
       case '>':
	  if (cp[1] == '>')
	   { cp = trace_out(cp+2,1);
	   }
	  else
	   { cp = trace_out(cp+1,0);
	   }
	  break;
     }
    if (cp) while (*cp && Cisspace(*cp)) cp ++;
  }
}

/*
 * Turn tracing on full.  This is intended to be called from a debugger
 *  or special-case debugging code added but not committed, hence the
 *  attribute on its forward declaration above.
 *
 * The resulting tracing is sent to zz.
 */
static void full_tracing(void)
{
 char *s;

 s = strdup("+* >zz");
 do_tracing(s);
 free(s);
}

/*
 * Do TRC_MEM (log memory references) and TRC_CHG (log machine state
 *  changes) tracing.
 *
 * XXX Change name to reflect functionality more accurately?
 */
static void log_chg(STATE *prevstate)
{
 int i;
 int j;
 int any;
 int throw;

 void pref(void)
  { if (! any) trc(TRC_CHG,"chg: ");
    any ++;
  }

 throw = 0;
 if (vm_changed)
  { MEMSEG *ms;
    sort_vm();
    ms = vm.m;
    while (ms && ms->link)
     { if ( (ms->end == ms->link->base) &&
	    (ms->ops == ms->link->ops) &&
	    (ms->prot == ms->link->prot) &&
	    (*ms->ops->merge)(ms,ms->link) ) continue;
       ms = ms->link;
     }
    if (trc_if(TRC_VM))
     { FILE *f;
       vm_changed = 0;
       f = trc_f(TRC_VM);
       fprintf(f,"VM map:\n");
       dump_vm(f);
     }
  }
 if (trc_if(TRC_MEM))
  { if (nmemacc)
     { MEMWATCH *w;
       for (w=memwatches;w;w=w->link) w->flags &= ~MWF_TRIPPED;
       for (i=0;i<nmemacc;i++)
	{ int j;
	  MEMACC *m;
	  m = &memacc[i];
	  for (w=memwatches;w;w=w->link)
	   { if ((m->a1 >= w->end) || (m->a1+m->n <= w->base)) continue;
	     w->flags |= MWF_TRIPPED;
	   }
	  trc(TRC_CHG,"%c: %08lx:",m->rw,(ULI)m->a1);
	  for (j=0;j<m->n;j++) trc(TRC_CHG," %02x",m->vp[j]);
	  trc(TRC_CHG,"\n");
	}
       any = 0;
       for (w=memwatches;w;w=w->link)
	{ if (w->flags & MWF_TRIPPED)
	   { printf("Watchpoint tripped: [%08lx..%08lx), pc=%08lx\n",(ULI)w->base,(ULI)w->end,(ULI)prevstate->pc);
	     throw = 1;
	   }
	}
       nmemacc = 0;
     }
  }
 if (trc_if(TRC_CHG))
  { any = 0;
    if (s.cwp != prevstate->cwp)
     { pref();
       trc(TRC_CHG," cwp=%d",s.cwp);
     }
    if (s.iwp != prevstate->iwp)
     { pref();
       trc(TRC_CHG," iwp=%d",s.iwp);
     }
    for (i=0;i<NWINDOWS;i++)
     { for (j=0;j<8;j++)
	{ if (s.rw[i].l[j] != prevstate->rw[i].l[j])
	   { pref();
	     trc(TRC_CHG," w%d%s=%08lx",i,regnames[R_L0+j],(ULI)s.rw[i].l[j]);
	   }
	}
       for (j=0;j<8;j++)
	{ if (s.rw[i].i[j] != prevstate->rw[i].i[j])
	   { pref();
	     trc(TRC_CHG," w%d%s=%08lx",i,regnames[R_I0+j],(ULI)s.rw[i].i[j]);
	   }
	}
     }
    for (i=0;i<32;i++)
     { if (s.regs[i] != prevstate->regs[i])
	{ pref();
	  trc(TRC_CHG," %s=%08lx",regnames[i],(ULI)s.regs[i]);
	}
     }
    for (i=0;i<32;i++)
     { if (s.fregs[i] != prevstate->fregs[i])
	{ pref();
	  trc(TRC_CHG," %s=%08lx",regnames[i+PRINT_REGS_Fbase],(ULI)s.fregs[i]);
	}
     }
    if (s.y != prevstate->y)
     { pref();
       trc(TRC_CHG," y=%08lx",(ULI)s.y);
     }
    if (s.cc != prevstate->cc)
     { pref();
       trc(TRC_CHG," cc=");
       print_cc(trc_f(TRC_CHG),s.cc);
     }
    if (s.fcc != prevstate->fcc)
     { pref();
       trc(TRC_CHG," fcc=");
       print_fcc(trc_f(TRC_CHG),s.fcc);
     }
    if (any) trc(TRC_CHG,"\n");
  }
 *prevstate = s;
 if (throw) top();
}

/*
 * Print out the contents of some or all of the registers, implementing
 *  the r user-interface command.  The argument string is parsed into
 *  register names, but, if no register names at all are given, all
 *  registers get printed.
 */
static void show_regs(const char *cp)
{
 char mask[PRINT_REGS__N];
 int bad;
 char *ep;
 const char *t;
 int n;
 int i;
 int any;

 int test(int rno)
  { return(mask[rno]);
  }

 bzero(&mask[0],PRINT_REGS__N);
 bad = 0;
 any = 0;
 while (1)
  { for (cp++;*cp&&Cisspace(*cp);cp++) ;
    if (! *cp) break;
    for (t=cp;*t&&!Cisspace(*t);t++) ;
    do <"found">
     { for (i=0;i<PRINT_REGS__N;i++)
	{ n = strlen(regnames[i]);
	  if ( (t-cp == n) &&
	       !strncmp(cp,regnames[i],n) )
	   { mask[i] = 1;
	     any = 1;
	     cp = t;
	     break;
	   }
	}
       printf("unknown register name `%.*s'\n",(int)(ep-cp),cp);
       bad = 1;
     } while (0);
  }
 if (! bad)
  { if (any)
     { print_regs(stdout,&test);
     }
    else
     { print_regs(stdout,&print_regs_all);
     }
  }
}

/*
 * Print out the contents of some or registers that on real hardware
 *  would be privileged, implementing the R user-interface command.
 *  With no argument, this prints CWP and IWP (the closest things we
 *  have to %cwp and %wim - see the comment above struct state for
 *  more).
 */
static void show_priv_regs(char *cp)
{
 char *ep;
 unsigned long int v;
 uint32_t *ip;
 uint32_t *lp;
 uint32_t *op;

 for (cp++;*cp&&Cisspace(*cp);cp++) ;
 if (! *cp)
  { printf("cwp=%u iwp=%u (NWINDOWS %d)\n",s.cwp,s.iwp,NWINDOWS);
    return;
  }
 v = strtoul(cp,&ep,10);
 if (ep == cp)
  { printf("bad window number `%s'",cp);
    return;
  }
 while (*ep && Cisspace(*ep)) ep ++;
 if (*ep)
  { printf("junk after window number: %s\n",ep);
    return;
  }
 if (v >= NWINDOWS)
  { printf("out-of-range window number %lu\n",v);
    return;
  }
 printf("Window %lu",v);
 if (v == s.cwp)
  { printf(" (current)");
    ip = &s.regs[R_I0];
    lp = &s.regs[R_L0];
    op = &s.regs[R_O0];
  }
 else if (cwp_s(v) == s.cwp)
  { printf(" (previous)");
    ip = &s.rw[v].i[0];
    lp = &s.rw[v].l[0];
    op = &s.regs[R_I0];
  }
 else if (cwp_s(s.cwp) == v)
  { printf(" (next)");
    ip = &s.regs[R_O0];
    lp = &s.rw[v].l[0];
    op = &s.rw[cwp_s(v)].i[0];
  }
 else
  { ip = &s.rw[v].i[0];
    lp = &s.rw[v].l[0];
    op = &s.rw[cwp_s(v)].i[0];
  }
 if (v == s.iwp) printf(" (invalid)");
 printf("\n");
 printf("%%i0 = %08lx  %%i1 = %08lx  %%i2 = %08lx  %%i3 = %08lx\n",(ULI)ip[0],(ULI)ip[1],(ULI)ip[2],(ULI)ip[3]);
 printf("%%i4 = %08lx  %%i5 = %08lx  %%i6 = %08lx  %%i7 = %08lx\n",(ULI)ip[4],(ULI)ip[5],(ULI)ip[6],(ULI)ip[7]);
 printf("%%l0 = %08lx  %%l1 = %08lx  %%l2 = %08lx  %%l3 = %08lx\n",(ULI)lp[0],(ULI)lp[1],(ULI)lp[2],(ULI)lp[3]);
 printf("%%l4 = %08lx  %%l5 = %08lx  %%l6 = %08lx  %%l7 = %08lx\n",(ULI)lp[4],(ULI)lp[5],(ULI)lp[6],(ULI)lp[7]);
 printf("%%o0 = %08lx  %%o1 = %08lx  %%o2 = %08lx  %%o3 = %08lx\n",(ULI)op[0],(ULI)op[1],(ULI)op[2],(ULI)op[3]);
 printf("%%o4 = %08lx  %%o5 = %08lx  %%o6 = %08lx  %%o7 = %08lx\n",(ULI)op[4],(ULI)op[5],(ULI)op[6],(ULI)op[7]);
}

/*
 * Implement the m (dump memory contents) user-interface command.  The
 *  most complex part here is parsing the argument string.
 */
static void dump_mem(const char *cp)
{
 unsigned long int v;
 uint32_t v1;
 uint32_t v2;
 uint32_t a;
 int len;
 char *ep;

 for (;*cp&&Cisspace(*cp);cp++) ;
 if (!*cp || (*cp == '?'))
  { printf(
"m ?     show this help\n"
"m start\n"
"m start end\n"
"m start +bytes\n"
"        show memory contents (with one arg defaults to 128 bytes)\n"
);
    return;
  }
 v = strtoul(cp,&ep,16);
 if (ep == cp)
  { printf("Bad starting address\n");
    return;
  }
 v1 = v;
 if (v1 != v)
  { printf("Out-of-range starting address\n");
    return;
  }
 for (cp=ep;*cp&&Cisspace(*cp);cp++) ;
 if (*cp == '\0')
  { len = 1;
    v2 = (v1 > 0xffffff80) ? (uint32_t)-v1 : 128;
  }
 else
  { if (*cp == '+')
     { len = 1;
       cp ++;
     }
    v = strtoul(cp,&ep,16);
    if (ep == cp)
     { printf("Bad %s\n",len?"size":"ending address");
       return;
     }
    v2 = v;
    if (v2 != v)
     { printf("Out-of-range %s\n",len?"size":"ending address");
       return;
     }
    for (cp=ep;*cp&&Cisspace(*cp);cp++) ;
    if (*cp)
     { printf("Junk after arguments\n");
       return;
     }
  }
 if (len)
  { v2 += v1;
    if ((v2 > 0) && (v2 < v1))
     { printf("Address range overflow\n");
       return;
     }
  }
 else
  { if (v2 < v1)
     { printf("(Note: swapping arguments)\n");
       v = v1;
       v1 = v2;
       v2 = v;
     }
  }
 if (v1 == v2) return;
 nomemacc ++;
 if (v1 & 15) printf("%08lx:%*s",(ULI)(v1&~(uint32_t)15),(3*(v1&15))+(((v1&15)>8)?3:2),"");
 for (a=v1;a!=v2;a++)
  { switch (a & 15)
     { case 0:
	  printf("%08lx:  ",(ULI)a);
	  break;
       case 8:
	  printf(" ");
	  break;
     }
    printf(" %02x",mem_get_1(a));
    if ((a & 15) == 15) printf("\n");
  }
 if ((a & 15) != 0) printf("\n");
 nomemacc --;
}

/*
 * Parse a watchpoint command line, after the + or -, and call a
 *  function to implement it.
 */
static void watch_manip(const char *s, void (*op)(uint32_t, uint32_t))
{
 char *ep;
 unsigned long int v;
 uint32_t a;
 uint32_t l;
 uint32_t e;

 for (;*s&&Cisspace(*s);s++) ;
 v = strtoul(s,&ep,16);
 if (ep == s)
  { printf("Bad starting address\n");
    return;
  }
 a = v;
 if (a != v)
  { printf("Starting address out of range\n");
    return;
  }
 for (s=ep;*s&&Cisspace(*s);s++) ;
 if (! *s)
  { printf("Missing length\n");
    return;
  }
 v = strtoul(s,&ep,16);
 if (ep == s)
  { printf("Bad length\n");
    return;
  }
 l = v;
 e = a + l;
 if ((l != v) || (e < a))
  { printf("Length out of range\n");
    return;
  }
 for (s=ep;*s&&Cisspace(*s);s++) ;
 if (*s)
  { printf("Junk after length\n");
    return;
  }
 (*op)(a,l);
}

/*
 * watch_manip function to add a memory watchpoint.
 */
static void watch_add(uint32_t a, uint32_t l)
{
 MEMWATCH *w;

 w = malloc(sizeof(MEMWATCH));
 w->base = a;
 w->len = l;
 w->end = a + l;
 w->flags = 0;
 w->link = memwatches;
 memwatches = w;
 printf("Added\n");
}

/*
 * watch_manip function to delete a memory watchpoint.
 */
static void watch_del(uint32_t a, uint32_t l)
{
 MEMWATCH *w;
 MEMWATCH **wp;

 wp = &memwatches;
 while ((w = *wp))
  { if ((w->base == a) && (w->len == l))
     { *wp = w->link;
       free(w);
       printf("Removed\n");
       return;
     }
    else
     { wp = &w->link;
     }
  }
 printf("Not found\n");
}

/*
 * List all memory watchpoints.
 */
static void watch_list(void)
{
 MEMWATCH *w;

 if (! memwatches)
  { printf(" None\n");
    return;
  }
 for (w=memwatches;w;w=w->link)
  { printf("base=%08lx len=%08lx end=%08lx\n",(ULI)w->base,(ULI)w->len,(ULI)w->end);
  }
}

/*
 * Implement the w (manipulate watchpoints) user-interface command.
 */
static void watchpoints(const char *cp)
{
 const char *cmd0;
 int cl;

 for (;*cp&&Cisspace(*cp);cp++) ;
 if (!*cp || (*cp == '?'))
  { printf(
"w ?     show this help\n"
"w list  list watchpoints\n"
"w + addr len\n"
"        add a watchpoint\n"
"w - addr len\n"
"        remove a watchpoint\n"
);
    return;
  }
 cmd0 = cp;
 for (;*cp&&!Cisspace(*cp);cp++) ;
 cl = cp - cmd0;
 if (cmd0[0] == '+')
  { watch_manip(cmd0+1,&watch_add);
  }
 else if (cmd0[0] == '-')
  { watch_manip(cmd0+1,&watch_del);
  }
 else if ((cl == 4) && !bcmp(cmd0,"list",4))
  { watch_list();
  }
 else
  { printf("Unrecognized w command `%.*s' (`w ?' for help)\n",cl,cmd0);
  }
}

/*
 * Prompt for, accept, and execute one user-interface action.  The
 *  return value is the number of instructions that should be executed
 *  on return: zero to just call here again, a positive number to step
 *  that many instructions, or a negative number to step until
 *  something throws back to the top level.
 */
static int ui(void)
{
 char *cp;
 int i;
 int n;
 char cmd[512];

 if (vm.m)
  { __label__ local_err;
    void (*save_err)(void);
    uint32_t inst;
    void local_throw(void)
     { printf("Error disassembling at %08lx\n",(ULI)s.pc);
       goto local_err;
     }
    save_err = err_jmp;
    err_jmp = &local_throw;
    nomemacc ++;
    inst = mem_get_4(s.pc);
    nomemacc --;
    printf("%08lx: %08lx = ",(ULI)s.pc,(ULI)inst);
    disassemble(inst,s.pc,stdout);
    printf("\n");
local_err:;
    err_jmp = save_err;
  }
 else
  { printf("No VM\n");
  }
 printf("sparc [pc=%08lx npc=%08lx ic=%llu]> ",(ULI)s.pc,(ULI)s.npc,s.instrs);
 fflush(0);
 if (s.noninteractive)
  { printf("\n(%s%sexiting, non-interactive)\n",s.lastexec?:"",s.lastexec?" ":"");
    fflush(0);
    exit(1);
  }
 if (fgets(&cmd[0],sizeof(cmd),stdin) == 0) exit(0);
 cp = &cmd[0];
 i = strlen(cp);
 if ((i > 0) && (cp[i-1] == '\n')) cp[i-1] = '\0';
 while (*cp && Cisspace(*cp)) cp ++;
 switch (*cp)
  { default:
       printf("`%c' not recognized (? for help)\n",*cp);
       break;
    case '\0':
       break;
    case '?':
       printf(
"sN      step N instructions (default N is 1)\n"
"SN      like S but ignoring breakpoints\n"
//"yN      step N syscalls\n"
"t [arg] tracing operations (`t ?' for help)\n"
"b [arg] breakpoint operations (`b ?' for help)\n"
"g       go (like s with infinity as argument)\n"
"r       show all (user) registers\n"
"r reg [reg ...]\n"
"        show listed registers (number or name)\n"
"R       show privileged registers\n"
"R N     show window N's registers\n"
"M       dump memory map\n"
"m [arg] show memory contents (`m ?' for help)\n"
"w [arg] manipulate watchpoints (`w ?' for help)\n"
"Q       quit\n"
);
       break;
    case 'b':
       do_breakpoints(cp+1);
       break;
    case 'g':
       bpt_suppress = 1;
       return(-1);
       break;
    case 'M':
       dump_vm(stdout);
       break;
    case 'm':
       dump_mem(cp+1);
       break;
    case 'w':
       watchpoints(cp+1);
       break;
    case 'Q':
       exit(0);
       break;
    case 'r':
       show_regs(cp);
       break;
    case 'R':
       show_priv_regs(cp);
       break;
    case 's':
       n = atoi(cp+1);
       if (n < 1) n = 1;
       bpt_suppress = 1;
       return(n);
       break;
    case 'S':
       n = atoi(cp+1);
       if (n < 1) n = 1;
       bpt_suppress = n;
       return(n);
       break;
    case 't':
       do_tracing(cp+1);
       break;
  }
 return(0);
}

/*
 * Check to see if we've hit a breakpoint.  This throws out (by calling
 *  top()) if it finds a breakpoint at s.pc; otherwise, it does
 *  nothing.  This is where bpt_suppress is implemented.
 */
static void check_breakpoints(void)
{
 int l;
 int h;
 int m;

 if (bpt_suppress > 0)
  { bpt_suppress --;
    return;
  }
 l = -1;
 h = nbpts;
 while (h-l > 1)
  { m = (h + l) / 2;
    if (s.pc < bpts[m].addr)
     { h = m;
     }
    else if (s.pc > bpts[m].addr)
     { l = m;
     }
    else
     { printf("Breakpoint at %08lx hit\n",(ULI)s.pc);
       top();
     }
  }
}

/*
 * The main loop. main() calls here once startup is complete.  This is
 *  where control returns to when a throw is done on error.  It is also
 *  where underlying-OS vfork is called on an emulated vfork, since
 *  it's the topmost stack frame execution reaches in normal operation.
 *  And this is where rare things that happen on instruction
 *  boundaries, like signal delivery, are checked for.
 */
static void run(void)
{
 __label__ err_throw;

 int i;
 uint64_t n;
 volatile uint64_t nsave;
 STATE savestate;

 void throw_out(void)
  { goto err_throw;
  }

 vfork_stage = VFORK_NONE;
 vfork_states = 0;
 alert_run = 0;
 err_jmp = &throw_out;
 if (0)
  {
err_throw:;
    s = savestate;
  }
 n = 0;
 while (1)
  { if (n < 1)
     { switch (initial_exec_state)
	{ case IES_INITIAL:
	     initial_exec();
	     break;
	  case IES_DELAY:
	  case IES_WORKED:
	     break;
	  case IES_FAILED:
	     exit(1);
	     break;
	}
       i = ui();
       if (i && (initial_exec_state == IES_DELAY))
	{ initial_exec();
	  i --;
	}
       if (i < 0)
	{ n = ~(uint64_t)0;
	  continue;
	}
       n = (unsigned int)i;
       continue;
     }
    savestate = s;
    while (n)
     { check_breakpoints();
       instr();
       log_chg(&savestate);
       n --;
       if (alert_run)
	{ alert_run = 0;
	  // See the comment on sc___vfork14.
	  switch (vfork_stage)
	   { case VFORK_NONE:
		break;
	     case VFORK_START:
		 { pid_t kid;
		   sigset_t oblock;
		   sigset_t nblock;
		   trc(TRC_VFORK,"vfork START, %s actually vforking\n",__func__);
		   /*
		    * There is a problem here.  A signal can be sent to
		    *  either process at any point during this.  If a
		    *  signal is sent to the parent after
		    *  underlying-OS-level vfork sharing has been
		    *  broken but before we pop the vfork_states stack,
		    *  the record of it (in s.sigpend[]) can be lost.
		    *  For most signals, this is just an unlikely race,
		    *  but, if a vforked child exits, this race is
		    *  basically guaranteed to strike for SIGCHLD.  So,
		    *  we block all signals we can before vforking and
		    *  unblock, below, once things are in a consistent
		    *  enough state to handle signal delivery.
		    */
		   sigemptyset(&oblock); // XXX API botch
		   sigfillset(&nblock);
		   sigprocmask(SIG_BLOCK,&nblock,&oblock);
		   nsave = n;
		   kid = vfork();
		   n = nsave;
		   /*
		    * We have to be careful to do things in the right
		    *  order.  In particular, tracing messages do not
		    *  work between returning to the parent and the
		    *  vfork_cleanup()/trcmgr_newpid() sequence.
		    */
		   if (kid < 0)
		    { vfork_value = os2em_errno(errno);
		      vfork_stage = VFORK_FAIL;
		      trc(TRC_VFORK,"vfork FAIL, emulator errno %lu (%s)\n",(ULI)vfork_value,em_strerror(vfork_value));
		      sigprocmask(SIG_SETMASK,&oblock,0);
		      continue;
		    }
		   else
		    { if (kid == 0)
		       { STATESTACK *ss;
			 during_vfork ++;
			 mypid = getpid();
			 add_vfork_backout(VFB_TRCMGR,0,(FD){.fd=trcmgr_get_fd()});
			 trcmgr_set_fd(-1);
			 trcmgr_newpid(mypid);
			 ss = malloc(sizeof(STATESTACK));
			 ss->state = s;
			 ss->link = vfork_states;
			 vfork_states = ss;
			 s.noninteractive = 1;
			 trc(TRC_VFORK,"vfork level now %d\n",during_vfork);
		       }
		      else
		       { STATESTACK *ss;
			 trcmgr_set_fd(-256);
			 during_vfork --;
			 vfork_cleanup();
			 mypid = getpid();
			 trcmgr_set_mypid(mypid);
			 trc(TRC_VFORK,"vfork level now %d\n",during_vfork);
			 vm_destroy(vfork_dropvm);
			 vfork_dropvm = INITVM();
			 ss = vfork_states;
			 if (! ss) panic("vfork parent: no pending state");
			 vfork_states = ss->link;
			 s = ss->state;
			 free(ss);
		       }
		      sigprocmask(SIG_SETMASK,&oblock,0);
		      vfork_value = kid;
		      vfork_stage = VFORK_SUCCESS;
		      trc(TRC_VFORK,"vfork SUCCESS, return value %lu\n",(ULI)vfork_value);
		      continue;
		    }
		 }
		break;
	     default:
		panic("invalid vfork stage %d in %s",(int)vfork_stage,__func__);
		break;
	   }
	  if (anysigpend && !(s.flags & SF_ANNUL))
	   { trc(TRC_SIGNAL,"run noticing anysigpend\n");
	     deliver_signals(0);
	     log_chg(&savestate);
	   }
	  s.flags &= ~SF_SIGRESTART;
	}
     }
  }
}

/*
 * main() is pretty simple: crack the command line, set stuff up, and
 *  drop into run().
 */
int main(int, char **);
int main(int ac, char **av)
{
 handleargs(ac,av);
 init_trc();
 init_fds();
 init_signals();
 setup();
 run();
 return(0);
}