/* This file is in the public domain. */ #include #include #include #include #include #include #include #include #include #include "codecintf.h" #include "mp4.h" extern const char *__progname; #define NTMP 8 typedef enum { TT_UNSET = 1, TT_AUDIO, TT_VIDEO, } TRACKTYPE; typedef struct track TRACK; typedef struct stts_data STTS_DATA; typedef struct stsc_data STSC_DATA; typedef struct chunk CHUNK; typedef struct sample SAMPLE; struct sample { unsigned int cue; /* time */ unsigned int len; /* bytes */ unsigned int loc; } ; struct chunk { unsigned int pos; unsigned int id; unsigned int len; } ; struct stsc_data { unsigned int first; unsigned int space; unsigned int id; } ; struct stts_data { unsigned int n; unsigned int dur; } ; struct track { MP4 *mp4; TRACK *link; int serial; TRACKTYPE type; unsigned char codec[4]; unsigned int data; unsigned int len; unsigned int timescale; unsigned long long int length; unsigned int stsd_type_at; unsigned int stsd_at; unsigned int stsd_len; unsigned int stts_n; STTS_DATA *stts_tbl; unsigned int stss_n; unsigned int *stss_tbl; unsigned int stsc_n; STSC_DATA *stsc_tbl; unsigned int samplesize; unsigned int stsz_n; unsigned int *stsz_tbl; unsigned int stco_n; unsigned int *stco_tbl; int nchunks; CHUNK *chunks; unsigned int fixeddur; int nsamples; SAMPLE *samples; struct { unsigned int avc_config_loc; unsigned int avc_config_len; } v; struct { unsigned int sample_rate; unsigned int setup_loc; unsigned int setup_len; } a; } ; struct mp4 { const char *filename; const unsigned char *file; unsigned int flags; FILE *vf; void *filemap; unsigned int filesize; int trackserial; unsigned int moov_at; unsigned int moov_len; unsigned int mdat_at; unsigned int mdat_len; TRACK *tracks; MP4TRK *open_tracks; } ; struct mp4trk { MP4TRK *link; TRACK *t; unsigned int sx; CODEC *c; unsigned char *dbuf; int dblen; union { MP4ADESC a; MP4VDESC v; } ; } ; #define FOURCHAR(a,b,c,d) ( ((a) * 0x01000000) + \ ((b) * 0x010000) + \ ((c) * 0x0100) + \ ((d) * 0x01) ) /* * Comments below that talk about "ver 1" refer here. * * There are various places where the file contains a version number * which specifies, for example, whether another value is 32 bits or * 64 bits. In at least some of these cases, my reference specifies * that one thing happens for version 1 and something else for all * other versions. I do not know whether the spec really does specify * that, or "0 means this, 1 means that, other values reserved", or * what. (The actual spec appears to be a pay-to-play * pseudo-standard; apparently they'd rather have people implementing * based on collections of miscellanous tidbits, as I'm doing, rather * than referring to the real spec.) */ static void verbose(MP4 *, const char *, ...) __attribute__((__format__(__printf__,2,3))); static void verbose(MP4 *h, const char *fmt, ...) { va_list ap; if (! (h->flags & MP4F_VERBOSE)) return; va_start(ap,fmt); vfprintf(h->vf,fmt,ap); va_end(ap); } static const char *tmpprintf(const char *, ...) __attribute__((__format__(__printf__,1,2))); static const char *tmpprintf(const char *fmt, ...) { va_list ap; char *s; static char *rv[NTMP] = { 0 }; static int hand = 0; va_start(ap,fmt); vasprintf(&s,fmt,ap); va_end(ap); free(rv[hand]); rv[hand] = s; hand = (hand ? hand : NTMP) - 1; return(s); } static void map_file(MP4 *h, const char *name) { int fd; struct stat stb; void *mmrv; h->filename = name; fd = open(name,O_RDONLY,0); if (fd < 0) { printf("*** open %s: %s\n",name,strerror(errno)); exit(1); } fstat(fd,&stb); h->filesize = stb.st_size; if (h->filesize != stb.st_size) { printf("*** %s: too large\n",name); exit(1); } mmrv = mmap(0,stb.st_size,PROT_READ,MAP_FILE|MAP_SHARED,fd,0); if (mmrv == MAP_FAILED) { printf("*** mmap %s: %s\n",name,strerror(errno)); exit(1); } h->filemap = mmrv; h->file = mmrv; close(fd); } static void unmap_file(MP4 *h) { munmap(h->filemap,h->filesize); } static unsigned int get2be(MP4 *h, unsigned int o) { return( (h->file[o ] * 0x0100) + (h->file[o+1] * 0x01) ); } static unsigned int get3be(MP4 *h, unsigned int o) { return( (h->file[o ] * 0x010000) + (h->file[o+1] * 0x0100) + (h->file[o+2] * 0x01) ); } static unsigned int get4be(MP4 *h, unsigned int o) { return( (h->file[o ] * 0x01000000) + (h->file[o+1] * 0x010000) + (h->file[o+2] * 0x0100) + (h->file[o+3] * 0x01) ); } static unsigned long long int get8be(MP4 *h, unsigned int o) { return( (h->file[o ] * 0x0100000000000000ULL) + (h->file[o+1] * 0x01000000000000ULL) + (h->file[o+2] * 0x010000000000ULL) + (h->file[o+3] * 0x0100000000ULL) + (h->file[o+4] * 0x01000000ULL) + (h->file[o+5] * 0x010000ULL) + (h->file[o+6] * 0x0100ULL) + (h->file[o+7] * 0x01ULL) ); } static void print_str_vis(FILE *to, const unsigned char *d, int len) { int i; for (i=len;i>0;i--) { if (*d < 32) { fprintf(to,"^%c",64+*d); } else if (*d < 127) { putc(*d,to); } else if (*d == 127) { fprintf(to,"^?"); } else if (*d < 160) { fprintf(to,"^%c",64+*d); } else { putc(*d,to); } d ++; } } static const char *scan_chunks(MP4 *h, int depth, unsigned int start, unsigned int len, const char *(*chunk)(const unsigned char [4], unsigned int, unsigned int, void *), void *arg) { unsigned int o; unsigned int l; int i; const char *e; o = 0; while (1) { if (o == len) break; if (o+8 > len) return(tmpprintf("chunk overrun 1 (%u+8 > %u)",o,len)); l = get4be(h,start+o); if (l == 1) return(tmpprintf("64-bit chunk length at %u+%u",start,o)); if (l < 8) return(tmpprintf("runt chunk (%u) at %u+%u",l,start,o)); if (h->flags & MP4F_VERBOSE) { for (i=depth;i>0;i--) putc('\t',h->vf); fprintf(h->vf,"%08x+%08x: chunk len=%x type=",start,o,l); print_str_vis(h->vf,h->file+start+o+4,4); fprintf(h->vf,"\n"); } if (o+l > len) return(tmpprintf("chunk overrun 2 (%u+%u > %u)",o,l,len)); e = (*chunk)(h->file+start+o+4,start+o+8,l-8,arg); if (e) return(e); o += l; } return(0); } static const char *file_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *hv) { MP4 *h; int i; h = hv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(h,"skipping unknown chunk\n"); break; case FOURCHAR('f','t','y','p'): if (len < 8) return(tmpprintf("ftyp chunk too small (%u)",len)); if (len % 4) return(tmpprintf("ftyp chunk length (%u) isn't a multiple of 4",len)); if (h->flags & MP4F_VERBOSE) { fprintf(h->vf,"ftyp: major `"); print_str_vis(h->vf,h->file+data,4); fprintf(h->vf,"' minor %08x compat:",get4be(h,data+4)); for (i=8;ivf," "); print_str_vis(h->vf,h->file+data+i,4); } fprintf(h->vf,"\n"); } break; case FOURCHAR('m','o','o','v'): verbose(h,"saving moov chunk location\n"); h->moov_at = data; h->moov_len = len; break; case FOURCHAR('m','d','a','t'): verbose(h,"saving mdat chunk location\n"); h->mdat_at = data; h->mdat_len = len; break; case FOURCHAR('f','r','e','e'): verbose(h,"ignoring free chunk\n"); break; } return(0); } static void save_track(MP4 *h, unsigned int data, unsigned int len) { TRACK *t; t = malloc(sizeof(TRACK)); t->mp4 = h; t->serial = h->trackserial ++; t->type = TT_UNSET; t->data = data; t->len = len; t->stsd_at = 0; t->stts_n = 0; t->stts_tbl = 0; t->stss_n = 0; t->stss_tbl = 0; t->stsc_n = 0; t->stsc_tbl = 0; t->stsz_n = 0; t->stsz_tbl = 0; t->stco_n = 0; t->stco_tbl = 0; t->nchunks = 0; t->chunks = 0; t->nsamples = 0; t->samples = 0; t->link = h->tracks; h->tracks = t; } static const char *moov_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *hv) { MP4 *h; h = hv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(h,"skipping unknown chunk\n"); break; case FOURCHAR('m','v','h','d'): { unsigned char ver; unsigned long long int dur; unsigned int ts; if (len < 1) return(tmpprintf("mvhd chunk too small 1 (%u)",len)); /* XXX what are the rest of the values here? */ ver = h->file[data]; /* XXX see "ver 1" comment at file top */ if (len < ((ver==1) ? 32 : 20)) return(tmpprintf("mvhd chunk too small 2 (%u, ver=%d)",len,ver)); if (ver == 1) { ts = get4be(h,data+20); dur = get8be(h,data+24); } else { ts = get4be(h,data+12); dur = get4be(h,data+16); } verbose(h,"movie: ts=%u dur=%llu\n",ts,dur); } break; case FOURCHAR('t','r','a','k'): verbose(h,"saving trak chunk location\n"); save_track(h,data,len); break; case FOURCHAR('u','d','t','a'): verbose(h,"skipping udta chunk\n"); break; } return(0); } static const char *stbl_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *tv) { TRACK *t; t = tv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(t->mp4,"\t\t\tskipping unknown chunk\n"); break; case FOURCHAR('s','t','s','d'): { unsigned int n; unsigned int o; unsigned int l; if (len < 8) return(tmpprintf("stsd chunk too small (%u)",len)); /* XXX what are the first four bytes? */ n = get4be(t->mp4,data+4); verbose(t->mp4,"\t\t\tDescription list, n = %u\n",n); o = 8; for (;n>0;n--) { if (len < o+8) return(tmpprintf("stsd chunk overrun 1 (%u < %u+8)",len,o)); l = get4be(t->mp4,data+o); if (l < 8) return(tmpprintf("stsd runt (%u)",l)); if (len < o+l) return(tmpprintf("stsd chunk overrun 2 (%u < %u+%u)",len,o,l)); if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"\t\t\t\ttype = `"); print_str_vis(t->mp4->vf,t->mp4->file+data+o+4,4); fprintf(t->mp4->vf,"', len = %u\n",l); } if (t->stsd_at) return("extra stsd for this track"); t->stsd_type_at = data + o + 4; t->stsd_at = data + o + 8; t->stsd_len = l - 8; o += l; } } break; case FOURCHAR('s','t','t','s'): { unsigned int n; unsigned int o; unsigned long long int total; int i; if (t->stts_n) return("extra stts for this track"); if (len < 8) return(tmpprintf("stts chunk too small 1 (%u)",len)); /* XXX what are the first four bytes? */ n = get4be(t->mp4,data+4); verbose(t->mp4,"\t\t\tSample duration table, n = %u\n",n); if (len < 8+(n*8)) return(tmpprintf("stts chunk too small 2 (%u < %u)",len,8*(n*8))); t->stts_n = n; t->stts_tbl = n ? malloc(n*sizeof(*t->stts_tbl)) : 0; i = 0; o = 8; for (i=0;istts_tbl[i].n = get4be(t->mp4,data+o); t->stts_tbl[i].dur = get4be(t->mp4,data+o+4); o += 8; } total = 0; for (i=0;istts_n;i++) total += t->stts_tbl[i].n * 1ULL * t->stts_tbl[i].dur; if (total != t->length) { verbose(t->mp4,"*** warning: track length mismatch: %llu != %llu\n",t->length,total); } } break; case FOURCHAR('s','t','s','s'): { unsigned int n; unsigned int o; int i; if (t->stss_n) return("extra stss for this track"); if (len < 8) return(tmpprintf("stss chunk too small 1 (%u)",len)); n = get4be(t->mp4,data+4); verbose(t->mp4,"\t\t\tSyncing samples table, n = %u, ver = %u, flags = %02x %02x %02x\n", n,t->mp4->file[data+3],t->mp4->file[data],t->mp4->file[data+1],t->mp4->file[data+2]); if (len < 8+(n*4)) return(tmpprintf("stss chunk too small 2 (%u < %u)",len,8+(n*4))); t->stss_n = n; t->stss_tbl = n ? malloc(n*sizeof(*t->stss_tbl)) : 0; i = 0; o = 8; for (i=0;istss_tbl[i] = get4be(t->mp4,data+o) - 1; o += 4; } } break; case FOURCHAR('s','t','s','c'): { unsigned int n; unsigned int o; int i; if (t->stsc_n) return("extra stsc for this track"); if (len < 8) return(tmpprintf("stsc chunk too small 1 (%u)",len)); n = get4be(t->mp4,data+4); verbose(t->mp4,"\t\t\tSample->chunk table, n = %u, ver = %u, flags = %02x %02x %02x\n", n,t->mp4->file[data+3],t->mp4->file[data],t->mp4->file[data+1],t->mp4->file[data+2]); if (len < 8+(n*12)) return(tmpprintf("stsc chunk too small 2 (%u < %u)",len,8+(n*12))); t->stsc_n = n; t->stsc_tbl = n ? malloc(n*sizeof(*t->stsc_tbl)) : 0; i = 0; o = 8; for (i=0;istsc_tbl[i].first = get4be(t->mp4,data+o) - 1; t->stsc_tbl[i].space = get4be(t->mp4,data+o+4); t->stsc_tbl[i].id = get4be(t->mp4,data+o+8); o += 12; } } break; case FOURCHAR('s','t','s','z'): { unsigned int n; unsigned int o; int i; if (t->stsz_n) return("extra stsz for this track"); if (len < 12) return(tmpprintf("stsz chunk too small 1 (%u)",len)); n = get4be(t->mp4,data+8); t->samplesize = get4be(t->mp4,data+4); verbose(t->mp4,"\t\t\tSample size table, n = %u, ver = %u, flags = %02x %02x %02x", n,t->mp4->file[data+3],t->mp4->file[data],t->mp4->file[data+1],t->mp4->file[data+2]); if (t->samplesize == 0) { verbose(t->mp4,", ss variable\n"); if (len < 12+(n*4)) return(tmpprintf("stsz chunk too small 2 (%u < %u)",len,12+(n*4))); t->stsz_n = n; t->stsz_tbl = n ? malloc(n*sizeof(*t->stsz_tbl)) : 0; i = 0; o = 12; for (i=0;istsz_tbl[i] = get4be(t->mp4,data+o); o += 4; } } else { verbose(t->mp4,", ss = %u\n",t->samplesize); } } break; case FOURCHAR('s','t','c','o'): { unsigned int n; unsigned int o; int i; if (t->stco_n) return("extra stco for this track"); if (len < 8) return(tmpprintf("stco chunk too small 1 (%u)",len)); n = get4be(t->mp4,data+4); verbose(t->mp4,"\t\t\tChunk offset table, n = %u\n",n); /* * mplayer doesn't check for duplicates and does * if (n > t->stco_n) * { free and reallocate t->stco_tbl; * t->stco_n = n; * } * and then expects to find t->stco_n entries in the file, * even if that's more than n. This looks weird to me, but, * because we fall over in the presence of a second stco, I * don't have to worry about it. */ if (len < 8+(n*4)) return(tmpprintf("stco chunk too small 2 (%u < %u)",len,8+(n*4))); t->stco_n = n; t->stco_tbl = n ? malloc(n*sizeof(*t->stco_tbl)) : 0; i = 0; o = 8; for (i=0;istco_tbl[i] = get4be(t->mp4,data+o); o += 4; } } break; } return(0); } static const char *minf_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *tv) { TRACK *t; const char *e; t = tv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(t->mp4,"\t\tskipping unknown chunk\n"); break; case FOURCHAR('s','m','h','d'): verbose(t->mp4,"\t\tTrack is audio\n"); t->type = TT_AUDIO; t->a.setup_len = 0; break; case FOURCHAR('v','m','h','d'): verbose(t->mp4,"\t\tTrack is video\n"); t->type = TT_VIDEO; t->v.avc_config_len = 0; break; case FOURCHAR('d','i','n','f'): verbose(t->mp4,"\t\tUnknown\n"); break; case FOURCHAR('s','t','b','l'): verbose(t->mp4,"\t\tUnknown\n"); e = scan_chunks(t->mp4,3,data,len,&stbl_chunk,t); if (e) return(e); break; } return(0); } static const char *mdia_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *tv) { TRACK *t; const char *e; t = tv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(t->mp4,"\tskipping unknown chunk\n"); break; case FOURCHAR('m','d','h','d'): { int ver; /* XXX what are the rest of the values here? */ ver = t->mp4->file[data]; /* XXX see "ver 1" comment at file top */ if (len < ((ver==1) ? 32 : 20)) return(tmpprintf("mdhd chunk too small (%u, ver=%d)",len,ver)); if (ver == 1) { t->timescale = get4be(t->mp4,data+20); t->length = get8be(t->mp4,data+24); } else { t->timescale = get4be(t->mp4,data+12); t->length = get4be(t->mp4,data+16); } verbose(t->mp4,"\ttimescale = %u, length = %llu\n",t->timescale,t->length); } break; case FOURCHAR('h','d','l','r'): { int slen; if (len < 25) { verbose(t->mp4,"hdlr chunk too small 1 (%u)",len); } else { slen = t->mp4->file[data+24]; if (len < 25+slen) { verbose(t->mp4,"hdlr chunk too small 2 (%u, slen=%d)",len,slen); } else if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"\tHandler block: len=%u\n",len); fprintf(t->mp4->vf,"\t\tword0 %08x\n",get4be(t->mp4,data)); fprintf(t->mp4->vf,"\t\ttype "); print_str_vis(t->mp4->vf,t->mp4->file+data+4,4); fprintf(t->mp4->vf,"\n"); fprintf(t->mp4->vf,"\t\tsubtype "); print_str_vis(t->mp4->vf,t->mp4->file+data+8,4); fprintf(t->mp4->vf,"\n"); fprintf(t->mp4->vf,"\t\tmfg "); print_str_vis(t->mp4->vf,t->mp4->file+data+12,4); fprintf(t->mp4->vf,"\n"); fprintf(t->mp4->vf,"\t\tcflags %08x\n",get4be(t->mp4,data+16)); fprintf(t->mp4->vf,"\t\tcmask %08x\n",get4be(t->mp4,data+20)); fprintf(t->mp4->vf,"\t\tstring "); print_str_vis(t->mp4->vf,t->mp4->file+data+25,slen); fprintf(t->mp4->vf,"\n"); } /* type=mhlr, type=dhlr - set media and data handler types */ } } break; case FOURCHAR('m','i','n','f'): verbose(t->mp4,"\tMedia info\n"); e = scan_chunks(t->mp4,2,data,len,&minf_chunk,t); if (e) return(e); break; } return(0); } static const char *trak_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *tv) { TRACK *t; const char *e; t = tv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(t->mp4,"skipping unknown chunk\n"); break; case FOURCHAR('t','k','h','d'): verbose(t->mp4,"Track header: len=%u\n",len); if (len < 84) return(tmpprintf("tkhd chunk too short (%u)",len)); if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"\tver %d\n",t->mp4->file[data]); fprintf(t->mp4->vf,"\tflags %02x %02x %02x\n",t->mp4->file[data+1],t->mp4->file[data+2],t->mp4->file[data+3]); fprintf(t->mp4->vf,"\tcreate %u\n",get4be(t->mp4,data+4)); fprintf(t->mp4->vf,"\tmodify %u\n",get4be(t->mp4,data+8)); fprintf(t->mp4->vf,"\tid %08x\n",get4be(t->mp4,data+12)); fprintf(t->mp4->vf,"\t(resv) %08x\n",get4be(t->mp4,data+16)); fprintf(t->mp4->vf,"\tdur %u\n",get4be(t->mp4,data+20)); fprintf(t->mp4->vf,"\t(resv) %016llx\n",get8be(t->mp4,data+24)); fprintf(t->mp4->vf,"\tlayer %u\n",get2be(t->mp4,data+32)); fprintf(t->mp4->vf,"\taltgrp %u\n",get2be(t->mp4,data+34)); fprintf(t->mp4->vf,"\tvolume %u\n",get2be(t->mp4,data+36)); fprintf(t->mp4->vf,"\t(resv) %04x\n",get2be(t->mp4,data+38)); fprintf(t->mp4->vf,"\tmatrix %02x ... %02x\n",t->mp4->file[data+40],t->mp4->file[data+75]); fprintf(t->mp4->vf,"\twidth %u\n",get4be(t->mp4,data+76)); fprintf(t->mp4->vf,"\theight %u\n",get4be(t->mp4,data+80)); } break; case FOURCHAR('m','d','i','a'): verbose(t->mp4,"Media stream\n"); e = scan_chunks(t->mp4,1,data,len,&mdia_chunk,t); if (e) return(e); break; } return(0); } static unsigned int mp4_len(MP4 *h, unsigned int base, unsigned int *op, unsigned int maxo) { unsigned int o; unsigned int l; unsigned char b; int i; o = *op; l = 0; for (i=4;i>0;i--) /* XXX is the 4 limit correct? */ { if (o >= maxo) { printf("*** overrun in mp4_len\n"); exit(1); } b = h->file[base+o++]; l = (l << 7) | (b & 0x7f); if (! (b & 0x80)) break; } *op = o; return(l); } static void audio_parse_esds(TRACK *t, unsigned int data, unsigned int len) { unsigned int o; unsigned int l; void overrun(void) { printf("*** esds data overrun\n"); exit(1); } #define REQUIRE(n) do { if (o+(n) > len) overrun(); } while (0) o = 0; REQUIRE(5); verbose(t->mp4,"ESDS version %u, flags %06x\n",t->mp4->file[data],get3be(t->mp4,data+1)); o = 4; if (t->mp4->file[data+o++] == 0x03) /* Descriptor tag */ { l = mp4_len(t->mp4,data,&o,len); REQUIRE(3); verbose(t->mp4,"ESDS (len %u) ID %04x, priority = %d\n",l,get2be(t->mp4,data+o),t->mp4->file[data+o+2]); o += 3; } else { REQUIRE(2); verbose(t->mp4,"ESDS (len 2) ID %04x\n",get2be(t->mp4,data+o)); o += 2; } REQUIRE(1); if (t->mp4->file[data+o++] != 0x04) /* decoder config descr */ { printf("*** esds data missing decoder config descr\n"); exit(1); } l = mp4_len(t->mp4,data,&o,len); REQUIRE(13); if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"ESDS decoder config (len %d):\n",l); fprintf(t->mp4->vf," object type ID = %u\n",t->mp4->file[data+o]); fprintf(t->mp4->vf," stream type = %u\n",t->mp4->file[data+o+1]); fprintf(t->mp4->vf," buffer size db = %u\n",get3be(t->mp4,data+o+2)); fprintf(t->mp4->vf," max bitrate = %u\n",get4be(t->mp4,data+o+5)); fprintf(t->mp4->vf," avg bitrate = %u\n",get4be(t->mp4,data+o+9)); } o += 13; REQUIRE(1); if (t->mp4->file[data+o++] != 0x05) return; /* decoder-specific descr */ l = mp4_len(t->mp4,data,&o,len); t->a.setup_loc = data + o; t->a.setup_len = l; if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"ESDS decoder-specific config data len %u\n",l); fprintf(t->mp4->vf," data:"); for (;l>0;l--) { REQUIRE(1); fprintf(t->mp4->vf," %02x",t->mp4->file[data+o++]); } fprintf(t->mp4->vf,"\n"); } REQUIRE(1); if (t->mp4->file[data+o++] != 0x06) return; /* SL config descr */ l = mp4_len(t->mp4,data,&o,len); if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"ESDS SL config data len %u\n",l); fprintf(t->mp4->vf," data:"); for (;l>0;l--) { REQUIRE(1); fprintf(t->mp4->vf," %02x",t->mp4->file[data+o++]); } fprintf(t->mp4->vf,"\n"); } #undef REQUIRE } static const char *stsd_audio_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *tv) { TRACK *t; t = tv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(t->mp4,"skipping unknown chunk\n"); break; case FOURCHAR('e','s','d','s'): audio_parse_esds(t,data,len); break; } return(0); } static void crack_stsd_audio(TRACK *t) { if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"version = %u\n",get2be(t->mp4,t->stsd_at+8)); fprintf(t->mp4->vf,"revision = %u\n",get2be(t->mp4,t->stsd_at+10)); fprintf(t->mp4->vf,"vendor ID = %08x\n",get4be(t->mp4,t->stsd_at+12)); fprintf(t->mp4->vf,"channels = %u\n",get2be(t->mp4,t->stsd_at+16)); fprintf(t->mp4->vf,"sample size = %u\n",get2be(t->mp4,t->stsd_at+18)); fprintf(t->mp4->vf,"compression ID = %u\n",get2be(t->mp4,t->stsd_at+20)); fprintf(t->mp4->vf,"packet size = %u\n",get2be(t->mp4,t->stsd_at+22)); fprintf(t->mp4->vf,"sample rate = %u\n",get2be(t->mp4,t->stsd_at+24)); } t->a.sample_rate = get2be(t->mp4,t->stsd_at+24); if (t->stsd_len > 28) scan_chunks(t->mp4,0,t->stsd_at+28,t->stsd_len-28,&stsd_audio_chunk,t); } static const char *stsd_video_chunk(const unsigned char type[4], unsigned int data, unsigned int len, void *tv) { TRACK *t; t = tv; switch (FOURCHAR(type[0],type[1],type[2],type[3])) { default: verbose(t->mp4,"skipping unknown chunk\n"); break; case FOURCHAR('a','v','c','C'): verbose(t->mp4,"saving AVC config data\n"); t->v.avc_config_loc = data; t->v.avc_config_len = len; break; } return(0); } static const char *crack_stsd_video(TRACK *t) { if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"version = %u\n",get2be(t->mp4,t->stsd_at+8)); fprintf(t->mp4->vf,"revision = %u\n",get2be(t->mp4,t->stsd_at+10)); fprintf(t->mp4->vf,"vendor ID = %08x\n",get4be(t->mp4,t->stsd_at+12)); fprintf(t->mp4->vf,"temporal quality = %u\n",get4be(t->mp4,t->stsd_at+16)); fprintf(t->mp4->vf,"spatial quality = %u\n",get4be(t->mp4,t->stsd_at+20)); fprintf(t->mp4->vf,"size = %ux%u\n",get2be(t->mp4,t->stsd_at+24),get2be(t->mp4,t->stsd_at+26)); /* XXX what are the bytes at 30, 31, 34, and 35? */ fprintf(t->mp4->vf,"DPI = %ux%u\n",get2be(t->mp4,t->stsd_at+28),get2be(t->mp4,t->stsd_at+32)); /* XXX what are the bytes at 36...39? */ fprintf(t->mp4->vf,"frames per sample = %u\n",get2be(t->mp4,t->stsd_at+40)); fprintf(t->mp4->vf,"compressor name = "); print_str_vis(t->mp4->vf,t->mp4->file+t->stsd_at+42,32); fprintf(t->mp4->vf,"\n"); fprintf(t->mp4->vf,"depth = %u\n",get2be(t->mp4,t->stsd_at+74)); fprintf(t->mp4->vf,"CLUT ID = %04x\n",get2be(t->mp4,t->stsd_at+76)); } return( (t->stsd_len > 78) ? scan_chunks(t->mp4,0,t->stsd_at+78,t->stsd_len-78,&stsd_video_chunk,t) : 0 ); } static void post_trak(TRACK *t) { int i; int j; int k; int l; unsigned int loc; if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"track #%d: type ",t->serial); switch (t->type) { default: abort(); break; case TT_UNSET: fprintf(t->mp4->vf,"UNSET\n"); return; break; case TT_AUDIO: fprintf(t->mp4->vf,"AUDIO"); break; case TT_VIDEO: fprintf(t->mp4->vf,"VIDEO"); break; } fprintf(t->mp4->vf," chunks=%u",t->stco_n); fprintf(t->mp4->vf," samples=%u",t->stsz_n); fprintf(t->mp4->vf," length=%llu",t->length); fprintf(t->mp4->vf," timescale=%u",t->timescale); fprintf(t->mp4->vf," (duration %g)",t->length/(double)t->timescale); fprintf(t->mp4->vf,"\n"); } if (! t->stco_n) { verbose(t->mp4,"no chunks\n"); return; } t->nchunks = t->stco_n; t->chunks = malloc(t->nchunks*sizeof(CHUNK)); j = t->stsc_n - 1; for (i=t->nchunks-1;i>=0;i--) { if (i < t->stsc_tbl[j].first) { j --; if (j < 0) { printf("*** stsc table underflow\n"); exit(1); } if (i < t->stsc_tbl[j].first) /* still */ { printf("*** stsc table redundancy\n"); exit(1); } } t->chunks[i].pos = t->stco_tbl[i]; t->chunks[i].id = t->stsc_tbl[j].id; t->chunks[i].len = t->stsc_tbl[j].space; } if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"Chunk table:\n"); for (i=0;inchunks;i++) fprintf(t->mp4->vf,"[%d] pos=%08x id=%u len=%u\n",i,t->chunks[i].pos,t->chunks[i].id,t->chunks[i].len); } j = 0; for (i=0;inchunks;i++) j += t->chunks[i].len; k = 0; for (i=t->stts_n-1;i>=0;i--) k += t->stts_tbl[i].n; if (j != k) { printf("*** chunks total (%d) != duration table total (%d)\n",j,k); exit(1); } /* maybe build a fake stsz table here if samplesize != 0? mplayer does. */ if (t->samplesize) { /* * mplayer, at this point, checks to see if the stts table is size * 1 (or size 2 with n==1 in its second element) and either sets a * `duration' field (if so) or coughs and dies (if not). However, * I can't see any setting of duration in any other cases, meaning * it goes uninitialized if this code doesn't run. Fortunately, * all uses of that field run only if stsz_n is nonzero. It's a * bit confusing to read bceause their names for what we call * samplesize and stsz_n are confusingly similar (sample_size and * samplesize), but with attention to that the logic is clear, if * slightly confused because the same datum ("is the sample size * fixed?") is tested in two different ways (by testing * sample_size and samplesize). We call their "duration" field * "fixeddur" to emphasize that it has meaning only for fixed-size * samples. */ if ((t->stts_n == 1) || ((t->stts_n == 2) && (t->stts_tbl[1].n == 1))) { t->fixeddur = t->stts_tbl[0].dur; } else { printf("*** can't handle constant sample size with variable duration\n"); exit(1); } } if (t->stsz_n != k) { printf("*** chunk/duration length (%d) != sample count (%u)\n",k,t->stsz_n); exit(1); } t->nsamples = t->stsz_n; t->samples = malloc(t->nsamples*sizeof(SAMPLE)); for (i=t->nsamples-1;i>=0;i--) t->samples[i].len = t->stsz_tbl[i]; j = 0; k = 0; loc = 0; for (i=0;insamples;i++) { if (k < 1) { if (j >= t->stts_n) { printf("*** stts overrun setting sample durations\n"); exit(1); } k = t->stts_tbl[j].n; l = t->stts_tbl[j].dur; } t->samples[i].cue = loc; loc += l; k --; } if (k || (j != t->stts_n-1)) { printf("*** warning: stts underrun setting sample starts (k=%d j=%d n=%d)\n",k,j,t->stts_n); /* no exit here */ } k = 0; for (i=0;inchunks;i++) { loc = t->chunks[i].pos; for (j=t->chunks[i].len;j>0;j--) { if (k >= t->nsamples) abort(); t->samples[k].loc = loc; loc += t->samples[k].len; k ++; } } if (k != t->nsamples) abort(); if (t->mp4->flags & MP4F_VERBOSE) { fprintf(t->mp4->vf,"Sample table:\n"); for (i=0;insamples;i++) { fprintf(t->mp4->vf,"[%d] loc=%08x len=%08x cue=%u\n",i,t->samples[i].loc,t->samples[i].len,t->samples[i].cue); } fprintf(t->mp4->vf,"stsd data, type "); print_str_vis(t->mp4->vf,t->mp4->file+t->stsd_type_at,4); fprintf(t->mp4->vf," at %08x len %08x\n",t->stsd_at,t->stsd_len); for (i=0;istsd_len;i++) { switch (i & 15) { case 0: fprintf(t->mp4->vf,"%8x: ",i); break; case 8: fprintf(t->mp4->vf," "); break; } fprintf(t->mp4->vf," %02x",t->mp4->file[t->stsd_at+i]); switch (i & 15) { case 15: fprintf(t->mp4->vf,"\n"); break; } } if (t->stsd_len & 15) fprintf(t->mp4->vf,"\n"); } switch (t->type) { default: abort(); break; case TT_AUDIO: if (t->stsd_len < 26) { printf("*** stsd data too short for audio (len %u)\n",t->stsd_len); } else { crack_stsd_audio(t); } break; case TT_VIDEO: if (t->stsd_len < 78) { printf("*** stsd data too short for video (len %u)\n",t->stsd_len); } else { crack_stsd_video(t); } break; } } static TRACK *find_track_by_index(MP4 *h, int tx) { TRACK *t; if ((tx < 0) || (tx >= h->trackserial)) return(0); for (t=h->tracks;t;t=t->link) { if (t->serial == tx) return(t); } return(0); } static void audio_eof(MP4ADESC *d) { d->rate = 0; d->samples = 0; d->channels = 0; d->format = MP4_AFMT_EOF; d->data = 0; d->when = 0; } static void video_eof(MP4VDESC *d) { d->w = 0; d->h = 0; d->data = 0; d->when = 0; } static MP4TRK *new_mp4trk(void) { MP4TRK *ht; ht = malloc(sizeof(MP4TRK)); ht->sx = 0; ht->dbuf = 0; ht->dblen = 0; return(ht); } static void ensure_track_space(MP4TRK *ht, int nb) { if (nb > ht->dblen) { free(ht->dbuf); ht->dblen = nb; ht->dbuf = malloc(nb); } } static void audio_data(void *htv, const ADESC *d) { MP4TRK *ht; int nb; ht = htv; switch (d->format) { case AFMT_SLIN16N: nb = d->samples * d->channels * 2; ht->a.format = MP4_AFMT_SLIN16N; break; default: printf("*** don't know sound format %d\n",(int)d->format); exit(1); } ht->a.rate = ht->t->a.sample_rate; ht->a.samples = d->samples; ht->a.channels = d->channels; ensure_track_space(ht,nb); bcopy(d->data,ht->dbuf,nb); ht->a.data = ht->dbuf; } static void video_data(void *htv, const VDESC *d) { MP4TRK *ht; int nb; ht = htv; ht->v.w = d->w; ht->v.h = d->h; nb = d->w * d->h * 4; ensure_track_space(ht,nb); bcopy(d->data,ht->dbuf,nb); ht->v.data = (const unsigned char (*)[4])ht->dbuf; } MP4 *mp4_open(const char *fn, unsigned int flags, ...) { va_list ap; MP4 *h; const char **emp; const char *e; h = malloc(sizeof(MP4)); h->trackserial = 0; h->moov_at = 0; h->mdat_at = 0; h->tracks = 0; h->open_tracks = 0; h->flags = flags; emp = 0; va_start(ap,flags); if (flags & MP4F_VERBOSE) { h->vf = va_arg(ap,FILE *); } if (flags & MP4F_ERRMSG) { emp = va_arg(ap,const char **); } va_end(ap); map_file(h,fn); verbose(h,"scanning file\n"); #define FAIL(msg) do { \ if (emp) \ { *emp = (msg); \ mp4_close(h); \ return(0); \ } \ printf("*** %s\n",(msg)); \ exit(1); \ } while (0) e = scan_chunks(h,0,0,h->filesize,&file_chunk,h); if (e) FAIL(e); if (h->moov_at == 0) FAIL("no `moov' chunk"); if (h->mdat_at == 0) FAIL("no `mdat' chunk"); #undef FAIL return(h); } const char *mp4_scan(MP4 *h) { const char *e; verbose(h,"\nscanning moov chunk\n"); e = scan_chunks(h,0,h->moov_at,h->moov_len,&moov_chunk,h); if (e) return(e); if (h->tracks) { TRACK *t; for (t=h->tracks;t;t=t->link) { verbose(h,"\nscanning trak chunk\n"); e = scan_chunks(h,0,t->data,t->len,&trak_chunk,t); if (e) return(e); verbose(h,"trak postprocessing\n"); post_trak(t); } } return(0); } void mp4_close(MP4 *h) { while (h->open_tracks) mp4_close_track(h->open_tracks); while (h->tracks) { TRACK *t; t = h->tracks; h->tracks = t->link; free(t->stts_tbl); free(t->stss_tbl); free(t->stsc_tbl); free(t->stsz_tbl); free(t->stco_tbl); free(t->chunks); free(t->samples); free(t); } unmap_file(h); } int mp4_ntracks(MP4 *h) { return(h->trackserial); } MP4TRKTYPE mp4_track_type(MP4 *h, int tx) { TRACK *t; t = find_track_by_index(h,tx); if (! t) return(MP4_TT_NONE); switch (t->type) { case TT_AUDIO: return(MP4_TT_AUDIO); break; case TT_VIDEO: return(MP4_TT_VIDEO); break; default: abort(); break; } } MP4TRK *mp4_open_audio(MP4 *h, int tx) { TRACK *t; MP4TRK *ht; t = find_track_by_index(h,tx); if (! t) return(0); if (t->type != TT_AUDIO) return(0); ht = new_mp4trk(); ht->t = t; ht->c = codec_init_audio(t->mp4->file+t->stsd_type_at,&audio_data,ht); if (t->a.setup_len) codec_prefix(ht->c,t->mp4->file+t->a.setup_loc,t->a.setup_len); ht->link = h->open_tracks; h->open_tracks = ht; return(ht); } MP4TRK *mp4_open_video(MP4 *h, int tx) { TRACK *t; MP4TRK *ht; t = find_track_by_index(h,tx); if (! t) return(0); if (t->type != TT_VIDEO) return(0); ht = new_mp4trk(); ht->t = t; ht->c = codec_init_video(t->mp4->file+t->stsd_type_at,&video_data,ht); codec_prefix(ht->c,t->mp4->file+t->v.avc_config_loc,t->v.avc_config_len); ht->link = h->open_tracks; h->open_tracks = ht; return(ht); } const MP4ADESC *mp4_get_audio(MP4TRK *ht) { if (ht->t->type != TT_AUDIO) return(0); if (! ht->c) { audio_eof(&ht->a); return(&ht->a); } while (1) { ht->a.samples = 0; if (ht->sx < ht->t->nsamples) { SAMPLE *s; s = &ht->t->samples[ht->sx]; ht->a.when = s->cue / (double)ht->t->timescale; codec_data(ht->c,ht->t->mp4->file+s->loc,s->len); } else if (ht->sx == ht->t->nsamples) { codec_done(ht->c); ht->c = 0; } else { audio_eof(&ht->a); return(&ht->a); } ht->sx ++; if (ht->a.samples) return(&ht->a); } } const MP4VDESC *mp4_get_video(MP4TRK *ht) { if (ht->t->type != TT_VIDEO) return(0); ht->v.data = 0; if (! ht->c) { video_eof(&ht->v); return(&ht->v); } while (1) { ht->v.data = 0; if (ht->sx < ht->t->nsamples) { SAMPLE *s; s = &ht->t->samples[ht->sx]; ht->v.when = s->cue / (double)ht->t->timescale; codec_data(ht->c,ht->t->mp4->file+s->loc,s->len); } else if (ht->sx == ht->t->nsamples) { codec_done(ht->c); ht->c = 0; } else { video_eof(&ht->v); return(&ht->v); } ht->sx ++; if (ht->v.data) return(&ht->v); } } int mp4_max_index(MP4TRK *ht) { return(ht->t->nsamples-1); } double mp4_index_time(MP4TRK *ht, int sx) { if ((sx < 0) || (sx >= ht->t->nsamples)) return(-1); return(ht->t->samples[sx].cue/(double)ht->t->timescale); } int mp4_max_seekx(MP4TRK *ht) { return(ht->t->stss_tbl?ht->t->stss_n-1:ht->t->nsamples-1); } double mp4_seekx_time(MP4TRK *ht, int sx) { if (ht->t->stss_tbl) { if ((sx < 0) || (sx >= ht->t->stss_n)) return(-1); return(ht->t->samples[ht->t->stss_tbl[sx]].cue/(double)ht->t->timescale); } else { if ((sx < 0) || (sx >= ht->t->nsamples)) return(-1); return(ht->t->samples[sx].cue/(double)ht->t->timescale); } } void mp4_seek(MP4TRK *ht, int sx) { if (ht->c == 0) return; if (ht->t->stss_tbl) { if (sx < 0) sx = 0; else if (sx >= ht->t->stss_n) sx = ht->t->stss_n - 1; ht->sx = ht->t->stss_tbl[sx]; } else { if (sx < 0) sx = 0; else if (sx >= ht->t->nsamples) sx = ht->t->nsamples - 1; ht->sx = sx; } #if 1 codec_did_seek(ht->c); #endif #if 1 if (ht->t->type == TT_VIDEO) { codec_done(ht->c); ht->c = codec_init_video(ht->t->mp4->file+ht->t->stsd_type_at,&video_data,ht); codec_prefix(ht->c,ht->t->mp4->file+ht->t->v.avc_config_loc,ht->t->v.avc_config_len); } #endif } void mp4_close_track(MP4TRK *ht) { MP4TRK *tt; MP4TRK **ttp; if (ht->c) codec_done(ht->c); free(ht->dbuf); ttp = &ht->t->mp4->open_tracks; while ((tt = *ttp)) { if (tt == ht) { *ttp = tt->link; free(ht); return; } else { ttp = &tt->link; } } abort(); }