#define VERBOSE #define COMPARE_SIMPLE /* compare simple mirror with data */ /* * Backing implementation which makes snapshotting possible. * * Multi-byte numbers are stored native-endian. * * Bitmaps store a vector of bits bit[N] in ceil(N/8) bytes, with * bit[N] being stored in the 1<<(N&7) bit of byte N>>3 * * A backing "file" is a directory, containing a subdirectory for each * serial number. For each serial number, N the directory named N * (represented as decimal, with no leading 0s), contains: * * admin: a file containing * 8@0: (ESIZE) ESIZE * 8@8: (ASIZE) ASIZE * 8@16: (BLOCKS) number of blocks in data * 8@24: (FREEROOT) root of freelist * ESIZE is the effective size of the virtual backing * file - that is, the size in blocks of the semantically * equivalent flat file. ASIZE is the number of blocks * actually stored, that is, the number of valid bits in * present and the number of entries in map. BLOCKS is * the size of data in blocks. * * present: a file containing ceil(ASIZE/(8*512)) blocks of * bitmap. A bit here is 1 if this serial contains data * for this block, 0 if this serial's data for this block * is stored in some lower serial number. Bits beyond the * bit for block ASIZE-1, if any, MBZ. * * map: a file containing ceil((ASIZE*8)/512) blocks, holding an * array of ASIZE 8-byte entries. If the present bit for * a block is nonexistent or clear, its entry here is * indeterminate and irrelevant; if set, its entry gives * the block number for the block's data. * * data: a file containing BLOCKS blocks of data. These will * normally be file data, but, because a file can be * shrunk, some of the blocks may be free. If FREEROOT in * admin is ~0, there are no free blocks; otherwise, * FREEROOT is the block number of a free block, the first * 8 bytes of which are the block number of the next free * block, etc, until a next link is ~0. * * A backing "file" directory also contains two other files: * * lock: an empty file, lock, which serves as two things: (a) an * fstat(2) target to tell when two BACKINGs are the same * thing and (b) an flock(2) target. * * simple: a flat-file version of the most recent serial. * * An uninitialized backing "file" is represented by an empty * directory. */ #include #include #include #include #include #include #include #include #include #include #include "cmds.h" #include "pollloop.h" #include "stdio-util.h" #include "backing.h" #define FIXisspace(x) isspace((unsigned char)(x)) #define FIXisdigit(x) isdigit((unsigned char)(x)) typedef struct serial SERIAL; typedef struct vlist VLIST; typedef struct priv PRIV; typedef struct gen GEN; struct gen { SERIAL *s; unsigned int left; unsigned int blkno; int fd; int id; int lg_periter; } ; struct vlist { VLIST *link; union { int i; unsigned int u; void *v; } ; } ; struct priv { PRIV *flink; PRIV *blink; char *basedir; int basedirlen; int fd_simple; int fd_lock; int nserial; SERIAL **serials; char *detail; int detail_valid; } ; struct serial { PRIV *p; int x; /* index of this SERIAL in p->serials[] */ unsigned int serial; int fd_admin; int fd_present; int fd_map; int fd_data; unsigned int esize; unsigned int asize; unsigned int blocks; unsigned long long int freeroot; unsigned char *map_present; GEN *gen; } ; static unsigned int pagesize = 0; static const unsigned char zblk[512] = { 0 }; static PRIV *all; static unsigned int snapserial = 0; #define FREELIST_NIL (~0ULL) #ifdef VERBOSE static void vprf(const char *, ...) __attribute__((__format__(__printf__,1,2))); static void vprf(const char *fmt, ...) { va_list ap; int e; va_start(ap,fmt); e = errno; vprintf(fmt,ap); errno = e; va_end(ap); } #else #define vprf(fmt, args...) do { } while (0) #endif /* * Unsigned int divide, rouding towards positive infinity. * * The name icd coems from "integer ceiling divide". */ static unsigned int icd(unsigned int a, unsigned int b) { return((a+(b-1))/b); } /* * Verify that certain assumptions the code makes are true. * * On failure, print a message to stderr and exit(1) - these failures * are not things that can be corrected by a non-coder. * * Ideally, we wouldn't need this. But that would mean either speccing * the data file formats in terms of native data sizes or contriving * the code to (needlessly, on most machines) convert between byte * streams and native integers. It's just a choice of uglinesses; * this is the ugliness I prefer. */ static void test_assumptions(void) { if (pagesize == 0) pagesize = getpagesize(); if (sizeof(unsigned int) != 4) { fprintf(stderr,"unsigned int isn't 4 bytes\n"); exit(1); } if (sizeof(unsigned long long int) != 8) { fprintf(stderr,"unsigned long long int isn't 8 bytes\n"); exit(1); } if ((int)(unsigned int)(unsigned char)~0U != 255) { fprintf(stderr,"unsigned char isn't 8 bits\n"); exit(1); } } /* * Given a directory pathname, returns a VLIST of its contents. * Deleted entries and . and .. are filtered out. If opendir() fails, * a nil pointer is returned (this is indistinguishable from an empty * directory). */ static VLIST *read_dir_contents(const char *dp) { DIR *d; struct dirent *e; VLIST *v; VLIST *l; l = 0; d = opendir(dp); if (! d) return(0); while ((e = readdir(d))) { if ( (e->d_fileno == 0) || ( (e->d_name[0] == '.') && ( (e->d_name[1] == '\0') || ( (e->d_name[1] == '.') && (e->d_name[2] == '\0') ) ) ) ) continue; v = malloc(sizeof(VLIST)); v->v = strdup(&e->d_name[0]); v->link = l; l = v; } closedir(d); return(l); } /* * Given a list of VLISTs using the v member, filter it based on an * int-returning function, with an arbitrary handler for dropped * entries. */ static VLIST *filter_list_v_f(VLIST *l, int (*fn)(void *), void (*drop)(void *)) { VLIST **vp; VLIST *v; vp = &l; while ((v = *vp)) { if ((*fn)(v->v)) { vp = &v->link; } else { *vp = v->link; (*drop)(v->v); free(v); } } return(l); } /* * Given a list of VLISTs using the v member, filter it based on an * int-returning function, free()ing dropped entries. */ static VLIST *filter_list_v(VLIST *l, int (*fn)(void *)) { return(filter_list_v_f(l,fn,&free)); } /* * Given a list of VLISTs and a comparison function, sort the list. * * (*cmp)(a,b) is expected to return true if a should occur before b in * the resulting list. (If a and b compare equal, it doesn't matter * whether *cmp returns true or false.) */ static VLIST *sort_vlist(VLIST *l, int (*cmp)(VLIST *, VLIST *)) { VLIST *a; VLIST *b; VLIST *t; VLIST **lp; if (!l || !l->link) return(l); a = 0; b = 0; while (l) { t = l; l = l->link; t->link = a; a = b; b = t; } a = sort_vlist(a,cmp); b = sort_vlist(b,cmp); lp = &l; while (a || b) { if (a && (!b || (*cmp)(a,b))) { t = a; a = a->link; } else { t = b; b = b->link; } *lp = t; lp = &t->link; } *lp = 0; return(l); } static void gen_done(GEN *g) { remove_block_id(g->id); close(g->fd); free(g); } /* * Shut down a SERIAL, freeing up resources. This means closing open * files and munmap()ping mapped memory. It does not include freeing * the SERIAL itself. */ static void close_serial(SERIAL *s) { if (s->fd_admin >= 0) close(s->fd_admin); if (s->fd_present >= 0) close(s->fd_present); if (s->fd_map >= 0) close(s->fd_map); if (s->fd_data >= 0) close(s->fd_data); if (s->map_present) munmap(s->map_present,icd(s->asize,4096)*512); if (s->gen) gen_done(s->gen); s->fd_admin = -1; s->fd_present = -1; s->fd_map = -1; s->fd_data = -1; s->map_present = 0; s->gen = 0; } /* * Open the files for a SERIAL. Assumes the directory already exists. * Opens are performed using (*ofn); nothing is done with the files * beyond opening them. Other members of the SERIAL are initialized. * (*err)() is called on error. * * On success, 0 is returned, and s->fd_* are set; on error, (*err) is * called, any successful opens are closed and -1 is returned, with * s->fd_* all set to -1 (errno is not useful). * * If (*err) or (*ofn) throws out, this will leak memory. */ static int open_serial_with(SERIAL *s, int (*ofn)(const char *), void (*err)(const char *, ...)) { __label__ failure; int openit(const char *name) { int fd; char *p; asprintf(&p,"%s/%u/%s",s->p->basedir,s->serial,name); fd = (*ofn)(p); if (fd < 0) { (*err)("can't open %s: %s",p,strerror(errno)); free(p); goto failure; } vprf("open %s -> %d\n",p,fd); free(p); return(fd); } s->fd_admin = -1; s->fd_present = -1; s->fd_map = -1; s->fd_data = -1; s->fd_admin = openit("admin"); s->fd_present = openit("present"); s->fd_map = openit("map"); s->fd_data = openit("data"); return(0); failure:; close_serial(s); return(-1); } /* * Load data from the files for an existing SERIAL. The SERIAL must be * new, except that the files must already have been opened. * * On success, returns 0, with the SERIAL set up. On failure, calls * (*err) and returns -1, with the SERIAL in a state suitable for * passing to close_serial(). */ static int load_serial(SERIAL *s, void (*err)(const char *, ...)) { __label__ failure; struct stat stb; struct stat stb2; unsigned int ui; unsigned long long int ulli; auto void fail(void) __attribute__((__noreturn__)); void fail(void) { goto failure; } void mustread(int fd, void *buf, int len, off_t at, const char *what, const char *where) { int n; n = pread(fd,buf,len,at); if (n == len) return; if (n < 0) { (*err)("can't read %s from %s/%u/%s: %s",what,s->p->basedir,s->serial,where,strerror(errno)); } else { (*err)("can't read %s from %s/%u/%s (wanted %d, got %d)",what,s->p->basedir,s->serial,where,len,n); } fail(); } void *mustmap(int fd, off_t at, unsigned int size, const char *what) { void *mmrv; mmrv = mmap(0,size,PROT_READ|PROT_WRITE,MAP_FILE|MAP_SHARED,fd,at); if (mmrv != MAP_FAILED) return(mmrv); (*err)("can't mmap %s/%u/%s (%u at %llu): %s",s->p->basedir,s->serial,what,size,(unsigned long long int)at,strerror(errno)); fail(); } vprf("load_serial %s %u\n",s->p->basedir,s->serial); fstat(s->fd_admin,&stb); if (stb.st_size != 32) { (*err)("%s/%u: admin size wrong",s->p->basedir,s->serial); return(-1); } mustread(s->fd_admin,&ulli,8,0,"effective size","admin"); vprf(" esize %llu\n",ulli); s->esize = ulli; if (s->esize != ulli) { (*err)("%s/%u: effective size too large",s->p->basedir,s->serial); fail(); } mustread(s->fd_admin,&ulli,8,8,"actual size","admin"); vprf(" asize %llu\n",ulli); s->asize = ulli; if (s->asize != ulli) { (*err)("%s/%u: actual size too large",s->p->basedir,s->serial); fail(); } mustread(s->fd_admin,&ulli,8,16,"data block count","admin"); vprf(" blocks %llu\n",ulli); s->blocks = ulli; if (s->blocks != ulli) { (*err)("%s/%u: data block count too large",s->p->basedir,s->serial); fail(); } mustread(s->fd_admin,&ulli,8,24,"freelist root","admin"); if (ulli == FREELIST_NIL) { vprf(" freeroot (nil)\n"); } else { vprf(" freeroot %llu\n",ulli); } s->freeroot = ulli; if (s->asize > s->esize) { (*err)("%s/%u: asize (%u) > esize (%u)\n",s->asize,s->esize); fail(); } fstat(s->fd_present,&stb); fstat(s->fd_map,&stb2); ui = icd(s->asize,4096) * 512; ftruncate(s->fd_present,ui); if (s->asize > 0) s->map_present = mustmap(s->fd_present,0,ui,"present"); ulli = icd(s->asize,64) * 512ULL; if (stb2.st_size < ulli) { unsigned int o; stb2.st_size &= ~(off_t)511; ftruncate(s->fd_map,stb2.st_size); o = stb2.st_size >> (9 - 3); bzero(s->map_present+o,ui-o); } ftruncate(s->fd_map,ulli); vprf(" success\n"); return(0); failure:; vprf(" failure\n"); return(-1); } /* * Open and set up a SERIAL. Assumes the directory already exists. * All the files must also already exist. * * Returns 0 on success or -1, with a call to (*err), on failure. */ static int open_serial(SERIAL *s, void (*err)(const char *, ...)) { int ofn(const char *s) { return(open(s,O_RDWR,0)); } if (open_serial_with(s,&ofn,err) < 0) return(-1); if (load_serial(s,err) < 0) { close_serial(s); return(-1); } return(0); } /* * Open and set up a SERIAL. Assumes the directory already exists. * Files that do not exist will be created; files that do exist will * have their data destroyed. * * Returns 0 on success or -1, with a call to (*err), on failure. */ static int create_serial(SERIAL *s, void (*err)(const char *, ...)) { unsigned long long int ulli; int ofn(const char *s) { return(open(s,O_RDWR|O_CREAT|O_TRUNC,0666)); } void loaderr(const char *fmt __attribute__((__unused__)), ...) { } if (open_serial_with(s,&ofn,err) < 0) return(-1); ftruncate(s->fd_admin,32); ulli = 0; pwrite(s->fd_admin,&ulli,8,0); ulli = 0; pwrite(s->fd_admin,&ulli,8,8); ulli = 0; pwrite(s->fd_admin,&ulli,8,16); ulli = ~0ULL; pwrite(s->fd_admin,&ulli,8,24); ftruncate(s->fd_present,0); ftruncate(s->fd_map,0); ftruncate(s->fd_data,0); if (load_serial(s,&loaderr) < 0) { (*err)("impossible create load failure"); close_serial(s); return(-1); } return(0); } /* * Find the next available serial number in p's directory, starting at * s->serial. Create the relevant directory and create_serial() it. * * On success, returns 0. On failure, calls (*err) and returns -1. */ static int new_next_serial(PRIV *p, SERIAL *s, void (*err)(const char *, ...)) { char *db; db = 0; while (1) { free(db); asprintf(&db,"%s/%u",p->basedir,s->serial); if (mkdir(db,0777) < 0) { if (errno == EEXIST) { s->serial ++; continue; } (*err)("can't create new serial %u in %s: %s",p->basedir,s->serial,strerror(errno)); free(db); return(-1); } free(db); if (create_serial(s,err) == 0) return(0); return(-1); } } /* * Return a new PRIV, ie, one with all its members initialized to * suitably empty values. */ static PRIV *new_priv(void) { PRIV *p; p = malloc(sizeof(PRIV)); p->basedir = 0; p->basedirlen = 0; p->fd_simple = -1; p->fd_lock = -1; p->nserial = 0; p->serials = 0; p->detail = 0; p->detail_valid = 0; return(p); } /* * Return a new SERIAL, ie, one with all its members initialized to * suitably empty values. */ static SERIAL *new_serial(void) { SERIAL *s; s = malloc(sizeof(SERIAL)); s->p = 0; s->x = -1; s->serial = ~0U; s->fd_admin = -1; s->fd_present = -1; s->fd_map = -1; s->fd_data = -1; s->map_present = 0; s->gen = 0; return(s); } /* * Just like atoi() except for unsigned int. */ static unsigned int atou(const char *s) { return(strtoul(s,0,10)); } /* * A comparison function for sort_vlist to sort in ascending order by * the u member. */ static int vlist_cmp_u(VLIST *a, VLIST *b) { return(a->uu); } /* * Actually write a block of data (from data) to block blkno in SERIAL * s. This assumes any necessary map and present growth have already * been taken care of, but handles checking present and allocating a * new block if necessary. */ static int write_it(SERIAL *s, const void *data, unsigned int blkno) { unsigned char *bp; unsigned char bb; unsigned long long int m; int rv; vprf("write_it %s %u at %u\n",s->p->basedir,s->serial,blkno); bp = s->map_present + (blkno >> 3); bb = 1 << (blkno & 7); if (bb & *bp) { rv = pread(s->fd_map,&m,8,blkno*(off_t)8); if (rv != 8) { if (rv >= 0) errno = EIO; vprf("write_it block already present but map read got %d\n",rv); return(-1); } if (m >= s->blocks) { /* ??? How'd this happen? */ *bp &= ~bb; errno = EIO; vprf("write_it block already present but map read got %llu >= %u\n",m,s->blocks); return(-1); } } else { m = s->freeroot; if (m != FREELIST_NIL) { if (m >= s->blocks) { /* ??? How'd this happen? */ vprf("write_it freelist root %llu >= %u\n",m,s->blocks); errno = EIO; return(-1); } rv = pread(s->fd_data,&s->freeroot,8,m*512); if (rv != 8) { if (rv >= 0) errno = EIO; vprf("write_it freelist read got %d\n",rv); return(-1); } } else { m = ++ s->blocks; rv = pwrite(s->fd_admin,&m,8,16); if (rv != 8) { if (rv >= 0) errno = EIO; vprf("write_it block not present but admin write got %d\n",rv); return(-1); } m --; } rv = pwrite(s->fd_map,&m,8,blkno*8ULL); if (rv != 8) { if (rv >= 0) errno = EIO; vprf("write_it block not present but map write got %d\n",rv); return(-1); } *bp |= bb; } vprf("write_it writing at %llu\n",m*512); rv = pwrite(s->fd_data,data,512,m*512); if (rv == 512) return(512); vprf("write_it data write failed\n"); if (rv >= 0) errno = EIO; return(-1); } /* * Open a backing "file". This is responsible for loading all existing * serial subdirs and figuring out the most recent serial number. If * the directory contains no openable serials at all, this creates a * new one with the lowest available number and uses it. */ static void *snapshot_open(const char *fn, void (*err)(const char *, ...)) { char *path; PRIV *p; int lfd; int sfd; VLIST *contents; VLIST *v; VLIST **vp; int n; SERIAL *s; int x; int drop_admin(void *s) { return(strcmp(s,"lock")); } test_assumptions(); vprf("snapshot_open %s\n",fn); asprintf(&path,"%s/lock",fn); lfd = open(path,O_RDWR,0); if (lfd < 0) { if (errno != ENOENT) { (*err)("can't open %s: %s",path,strerror(errno)); return(0); } lfd = open(path,O_RDWR|O_CREAT|O_EXCL,0600); if (lfd < 0) { (*err)("can't create %s: %s",path,strerror(errno)); return(0); } } vprf(" open %s -> %d\n",path,lfd); free(path); asprintf(&path,"%s/simple",fn); sfd = open(path,O_RDWR,0); if (sfd < 0) { if (errno != ENOENT) { (*err)("can't open %s: %s",path,strerror(errno)); close(lfd); return(0); } sfd = open(path,O_RDWR|O_CREAT|O_EXCL,0600); if (sfd < 0) { (*err)("can't create %s: %s",path,strerror(errno)); close(lfd); return(0); } } vprf(" open %s -> %d\n",path,sfd); free(path); contents = filter_list_v(read_dir_contents(fn),&drop_admin); #ifdef VERBOSE vprf(" dir contents %s ->",fn); for (v=contents;v;v=v->link) vprf(" %s",(const char *)v->v); vprf("\n"); #endif n = 0; vp = &contents; while ((v = *vp)) { unsigned int s; char *ss; s = atou(v->v); asprintf(&ss,"%d",n); if (strcmp(ss,v->v) && (n > 0)) { *vp = v->link; free(v->v); free(v); } else { free(v->v); v->u = s; vp = &v->link; n ++; } free(ss); } contents = sort_vlist(contents,&vlist_cmp_u); p = new_priv(); p->basedir = strdup(fn); p->basedirlen = strlen(p->basedir); p->fd_simple = sfd; p->fd_lock = lfd; p->serials = malloc(n*sizeof(SERIAL *)); for (x=n-1;x>=0;x--) p->serials[x] = 0; p->nserial = n; x = 0; while (contents) { v = contents; contents = contents->link; s = new_serial(); s->p = p; s->x = x; s->serial = v->u; free(v); if (open_serial(s,err) < 0) { free(s); n --; } else { p->serials[x++] = s; } } if (n < 1) { free(p->serials); p->nserial = 1; p->serials = malloc(sizeof(SERIAL *)); s = new_serial(); p->serials[0] = 0; s->p = p; s->x = 0; s->serial = 1; if (new_next_serial(p,s,err) < 0) { free(s); (*backing_snapshot.done)(p); return(0); } p->serials[0] = s; n = 1; } p->nserial = n; vprf("snapshot_open returning %p\n",(void *)p); p->flink = all; p->blink = 0; if (all) all->blink = p; all = p; return(p); } /* * Close down and free up a PRIV. */ static void snapshot_done(void *pv) { int i; PRIV *p; if (! pv) return; p = pv; vprf("snapshot_done %p\n",(void *)p); if (p->flink) p->flink->blink = p->blink; if (p->blink) p->blink->flink = p->flink; else all = p->flink; for (i=p->nserial-1;i>=0;i--) { SERIAL *s; s = p->serials[i]; if (s) { close_serial(s); free(s); } } free(p->serials); close(p->fd_lock); close(p->fd_simple); free(p->basedir); free(p->detail); free(p); } /* * Lock a PRIV. Since this exists just to prevent two runs from * colliding over the same backing data, all we need to do is pick a * distinguished file and lock it. */ static int snapshot_flock(void *pv, int op) { vprf("snapshot_flock %p %d\n",pv,op); return(flock(((PRIV *)pv)->fd_lock,op)); } static void write_admin(SERIAL *s) { unsigned long long int d[4]; d[0] = s->esize; d[1] = s->asize; d[2] = s->blocks; d[3] = s->freeroot; pwrite(s->fd_admin,&d[0],32,0); } /* * Write the data pointed to by data to block number blkno in p. * On success, returns 512; on failure, returns -1 and sets errno. */ static int snapshot_write(void *pv, unsigned int blkno, const void *data) { PRIV *p; SERIAL *s; int saveadmin; p = pv; vprf("snapshot_write %p at %u\n",(void *)p,blkno); pwrite(p->fd_simple,data,512,blkno*(off_t)512); s = p->serials[p->nserial-1]; saveadmin = 0; if (blkno >= s->esize) { s->esize = blkno + 1; saveadmin = 1; } if (blkno >= s->asize) { int oldpb; int newpb; void *mmrv; oldpb = icd(s->asize,4096) * 512; newpb = icd(blkno+1,4096) * 512; if (newpb != oldpb) { msync(s->map_present,oldpb,MS_ASYNC); ftruncate(s->fd_present,newpb); mmrv = mmap(0,newpb,PROT_READ|PROT_WRITE,MAP_FILE|MAP_SHARED,s->fd_present,0); if (mmrv == MAP_FAILED) return(-1); munmap(s->map_present,oldpb); s->map_present = mmrv; } s->asize = blkno + 1; ftruncate(s->fd_map,icd(s->asize,64)*(off_t)512); saveadmin = 1; } if (saveadmin) write_admin(s); return(write_it(s,data,blkno)); } /* * Read the data for blocks starting at blkno, for a total of nblks * blocks, into data. * * On success, returns nblks*512. On failure, returns -1 with errno * set appropriately. */ static int snapshot_read(void *pv, unsigned int blkno, unsigned int nblks, void *data) { PRIV *p; unsigned char *dp; int sx; SERIAL *s; unsigned long long int m; int rv; unsigned int nb; unsigned char sblk[512]; p = pv; vprf("snapshot_read %p %u..%u\n",(void *)p,blkno,blkno+nblks-1); dp = data; for <"blocks"> (nb=nblks;nb>0;blkno++,nb--,dp+=512) { for (sx=p->nserial-1;sx>=0;sx--) { s = p->serials[sx]; if (blkno >= s->esize) continue; if ((blkno < s->asize) && (s->map_present[blkno>>3] & (1<<(blkno&7)))) { rv = pread(s->fd_map,&m,8,blkno*(off_t)8); if (rv != 8) { if (rv >= 0) errno = EIO; vprf("snapshot_read block %u in %u but map read got %d\n",blkno,s->serial,rv); return(-1); } rv = pread(s->fd_data,dp,512,m*512); if (rv != 512) { if (rv >= 0) errno = EIO; vprf("snapshot_read block %u in %u but data read got %d\n",blkno,s->serial,rv); return(-1); } vprf("snapshot_read block %u in %u\n",blkno,s->serial); continue <"blocks">; } } vprf("snapshot_read synthesizing block of 0s at %u\n",blkno); bzero(dp,512); } #ifdef COMPARE_SIMPLE blkno -= nblks; dp = data; for (nb=nblks;nb>0;blkno++,nb--,dp+=512) { rv = pread(p->fd_simple,&sblk[0],512,blkno*(off_t)512); if (rv < 0) { fprintf(stderr,"warning: read error on simple mirror for %s block %u\n",p->basedir,blkno); } else if (rv == 0) { if (bcmp(dp,&zblk[0],51)) { fprintf(stderr,"warning: simple mirror compare error for %s block %u\n",p->basedir,blkno); } else { vprf("snapshot_read compare with 0s worked at %u\n",blkno); } } else if (rv != 512) { fprintf(stderr,"warning: read %d instead of 512 on simple mirror for %s block %u\n",rv,p->basedir,blkno); } else if (bcmp(dp,&sblk[0],51)) { fprintf(stderr,"warning: simple mirror compare error for %s block %u\n",p->basedir,blkno); } else { vprf("snapshot_read compare worked at %u\n",blkno); } } #endif return(nblks*512); } /* * Set the PRIV's current size to nblks blocks. */ static void snapshot_set_size(void *pv, unsigned int nblks) { PRIV *p; SERIAL *s; unsigned long long int fl; unsigned long long int m; int rv; int saveadmin; p = pv; vprf("snapshot_set_size %p to %u\n",(void *)p,nblks); ftruncate(p->fd_simple,nblks*(off_t)512); s = p->serials[p->nserial-1]; saveadmin = 0; if (nblks > s->esize) { s->esize = nblks; saveadmin = 1; } fl = s->freeroot; while (nblks < s->esize) { s->esize --; if (s->map_present[s->esize>>3] & (1<<(s->esize&7))) { rv = pread(s->fd_map,&m,8,s->esize*(off_t)8); if (rv != 8) { if (rv >= 0) errno = EIO; vprf("snapshot_set_size shrinking lost block %u (read %d)\n",s->esize,rv); } else { rv = pwrite(s->fd_data,&fl,8,m*(off_t)512); if (rv != 8) { if (rv >= 0) errno = EIO; vprf("snapshot_set_size shrinking lost block %u (write %d)\n",s->esize,rv); } else { fl = m; } } s->map_present[s->esize>>3] &= ~(1<<(s->esize&7)); } saveadmin = 1; } if (fl != s->freeroot) { s->freeroot = fl; saveadmin = 1; } if (saveadmin) write_admin(s); } /* * Return a string giving internal data of potential value to debuggers * looking at live processes. */ static const char *snapshot_info(void *pv) { PRIV *p; p = pv; if (! p->detail_valid) { char *str; FILE *f; int sx; SERIAL *s; f = fopenalloc(&str,0); fprintf(f,"%d/%d",p->fd_lock,p->fd_simple); for (sx=p->nserial-1;sx>=0;sx--) { s = p->serials[sx]; fprintf(f,"-%u[%d/%d/%d/%d]",s->serial,s->fd_admin,s->fd_present,s->fd_map,s->fd_data); } fclose(f); free(p->detail); p->detail = str; p->detail_valid = 1; } vprf("snapshot_info %p -> %s\n",(void *)p,p->detail); return(p->detail); } /* * Print detailed debugger-useful info to the FILE *. */ static void snapshot_detail(void *pv, FILE *f) { PRIV *p; int i; p = pv; vprf("snapshot_detail %p\n",(void *)p); fprintf(f,"Lock fd: %d\n",p->fd_lock); fprintf(f,"Simple fd: %d\n",p->fd_simple); fprintf(f,"Serial count: %d\n",p->nserial); for (i=0;inserial;i++) { SERIAL *s; s = p->serials[i]; fprintf(f,"[%d]:\n",i); fprintf(f,"\tSerial: %u\n",s->serial); fprintf(f,"\tAdmin fd: %d\n",s->fd_admin); fprintf(f,"\tPresent fd: %d\n",s->fd_present); fprintf(f,"\tMap fd: %d\n",s->fd_map); fprintf(f,"\tData fd: %d\n",s->fd_data); fprintf(f,"\tEffective size: %u (%llu)\n",s->esize,512ULL*s->esize); fprintf(f,"\tActual size: %u (%llu)\n",s->asize,512ULL*s->asize); fprintf(f,"\tBlocks: %u (%llu)\n",s->blocks,512ULL*s->blocks); fprintf(f,"\tMap: %p\n",(void *)s->map_present); } } /* * Return a struct stat which is suitable for comparing whether two * backing "file"s are really the same. * * Return semantics are as for fstat(2). */ static int snapshot_fstat(void *pv, struct stat *stb) { vprf("snapshot_fstat %p\n",pv); return(fstat(((PRIV *)pv)->fd_lock,stb)); } /* * Move a PRIV from *fp to *tp. */ static void snapshot_move(void **fvp, void **tvp) { vprf("snapshot_move %p -> %p\n",*fvp,*tvp); (*backing_snapshot.done)(*tvp); *tvp = *fvp; *fvp = 0; } static const CMD cmd_snap; /* forward */ static PRIV *lookup_priv(const char *name, int namelen, FILE *f) { PRIV *p; for (p=all;p;p=p->flink) { if ((p->basedirlen == namelen) && !bcmp(p->basedir,name,namelen)) return(p); } fprintf(f,"No snapshot client named %.*s\n",namelen,name); return(0); } static void take_snap(PRIV *p, FILE *f) { SERIAL *s; auto void errmsg(const char *, ...) __attribute__((__format__(__printf__,1,2))); void errmsg(const char *fmt, ...) { va_list ap; va_start(ap,fmt); vfprintf(f,fmt,ap); va_end(ap); } p->serials = realloc(p->serials,(p->nserial+1)*sizeof(SERIAL *)); s = new_serial(); s->p = p; s->x = p->nserial; s->serial = p->serials[p->nserial-1]->serial; if (new_next_serial(p,s,&errmsg) < 0) { free(s); return; } p->serials[p->nserial++] = s; fprintf(f,"Serial %u\n",s->serial); p->detail_valid = 0; } static void list_snaps(PRIV *p, FILE *f) { int i; for (i=p->nserial-1;i>=0;i--) fprintf(f," %u",p->serials[i]->serial); fprintf(f,"\n"); } static int gen_snap_blockfn(BLOCKFN_ARGS) { GEN *g; struct timeval tv0; struct timeval tv1; unsigned int i; int sx; SERIAL *s; PRIV *p; int rv; unsigned long long int m; unsigned char blk[512]; const unsigned char *bp; g = arg; gettimeofday(&tv0,0); p = g->s->p; for (i=1<lg_periter;i>0;i--) { do <"found"> { for (sx=g->s->x;sx>=0;sx--) { s = p->serials[sx]; if ( (g->blkno < s->esize) && (s->map_present[g->blkno>>3] & (1<<(g->blkno&7))) ) { rv = pread(s->fd_map,&m,8,g->blkno*(off_t)8); if (rv != 8) { if (rv < 0) { fprintf(stderr,"warning: map read error on %s #%u generating snapshot for #%u: %s\n", p->basedir,s->serial,g->s->serial,strerror(errno)); } else { fprintf(stderr,"warning: map read error on %s #%u generating snapshot for #%u: wanted %d got %d\n", p->basedir,s->serial,g->s->serial,8,rv); } gen_done(g); return(1); } rv = pread(s->fd_data,&blk[0],512,m*512); if (rv != 512) { if (rv < 0) { fprintf(stderr,"warning: data read error on %s #%u generating snapshot for #%u: %s\n", p->basedir,s->serial,g->s->serial,strerror(errno)); } else { fprintf(stderr,"warning: data read error on %s #%u generating snapshot for #%u: wanted %d got %d\n", p->basedir,s->serial,g->s->serial,512,rv); } gen_done(g); return(1); } bp = &blk[0]; break <"found">; } } bp = &zblk[0]; } while (0); rv = pwrite(g->fd,&blk[0],512,g->blkno*(off_t)512); if (rv != 512) { if (rv < 0) { fprintf(stderr,"warning: generate write generating snapshot for %s #%u: %s\n", p->basedir,g->s->serial,strerror(errno)); } else { fprintf(stderr,"warning: generate write generating snapshot for %s #%u: wanted %d got %d\n", p->basedir,g->s->serial,512,rv); } gen_done(g); return(1); } g->left --; g->blkno ++; if (g->left < 1) { gen_done(g); return(1); } } gettimeofday(&tv1,0); i = tv1.tv_sec - tv0.tv_sec; if (i > 5) i = 5; i = (i * 1000000) + tv1.tv_usec - tv0.tv_usec; if (i >= 200000) { if (g->lg_periter > 0) g->lg_periter --; } else if (i < 100000) { if (g->lg_periter < 20) g->lg_periter ++; } return(1); } static void gen_snap(PRIV *p, unsigned int n, FILE *f) { int i; SERIAL *s; GEN *g; do <"found"> { for (i=p->nserial-1;i>=0;i--) { s = p->serials[i]; if (s->serial == n) break <"found">; } fprintf(f,"%s has no snapshot #%u\n",p->basedir,n); return; } while (0); if (s == p->serials[p->nserial-1]) { fprintf(f,"%u is the live version of %s\n",n,p->basedir); return; } if (s->gen) { fprintf(f,"A snapshot of %s #%u is already being generated\n",p->basedir,n); return; } while (1) { static char *path = 0; free(path); asprintf(&path,"%s/snap.%u",p->basedir,snapserial++); i = open(path,O_WRONLY|O_CREAT|O_EXCL,0600); if (i >= 0) { fprintf(f,"Generating to %s, size %llu\n",path,s->esize*512ULL); break; } if (errno != EEXIST) { fprintf(f,"Can't create %s: %s\n",path,strerror(errno)); return; } } g = malloc(sizeof(GEN)); g->s = s; g->left = s->esize; g->blkno = 0; g->fd = i; g->lg_periter = 0; g->id = add_block_fn(&gen_snap_blockfn,g); s->gen = g; } /* * Handler function for the snap command. */ static void cmd_snap_handle(const char *rest, int restlen, FILE *f) { int x; int x0; int r; unsigned int n; PRIV *p; void usage(void) { fprintf(f,"%s",cmd_snap.help); } for (x=0;(x= restlen) { usage(); return; } x0 = x; for (;(x=r)&&FIXisspace(rest[x]);x--) ; for (;(x>=r)&&!FIXisspace(rest[x]);x--) ; if (x < r) { fprintf(f,"Usage: snap gen \n"); return; } n = strtoul(rest+x,0,0); p = lookup_priv(rest+r,restlen-r,f); if (! p) return; gen_snap(p,n,f); } else { usage(); } } static const CMD cmd_snap = { "snap", "snap take \n Take a snapshot for the backup in \n" "snap list \n List snapshots for the backup in \n" "snap gen \n Generate a flat-file image of snapshot for ", CMD_FXNS(snap) }; /* * Initialize this module. * * All we do here is register a command. */ static void snapshot_init(void) { all = 0; register_cmd(&cmd_snap); } const BACKING backing_snapshot = BACKING_INIT(snapshot);