/* * Compute optimal quoting. * * Input is a string and a FILE *. Output consists of printing * characters to the FILE * to generate a quoted version of the * string. * * Inside ' ', any character represents itself except another '. * Inside " ", any character represents itself except ", \, or $, and * \ quotes any of them. Outside ' ' or " ", \ quotes any character. * This module computes optimal quoting in the sense of a * minimum-length string that represents the input string. Characters * %+,-./:=@_, letters, and digits do not need any quoting (though * they can be quoted if it is convenient); all other characters * cannot appear unquoted. * * Let the input string be S, of length L, and let C(l,e) be the * minimum-length way to write the first l characters of S, ending * with a single quote, a double quote, or no quote (or a backslashed * quote), according as e is 1, 2, or 0. Then, depending on S[i], we * can express C(i,e) in terms of C(i-1,e') for various pairs, * where the particular expressions depend on whether S[i] is * * - Letter, digit, or one of the above punctuation characters. * - ', single quote. * - ", double quote. * - \, backslash. * - $, dollar sign. * - Other. * * An optimum result can then be obtained as the minimum of C(L,e) for * all possible values of e. By attaching a little additional data, * this can be extended to yield the actual quoting methods used. * * As a special case, a zero-length string turns into '' rather than * zero characters. */ #include #include #include "quote.h" typedef struct cell CELL; struct cell { unsigned int val; unsigned char from; unsigned char how; #define HOW_NULL 1 // Nothing #define HOW_DOUBLE_S 2 // Append '' #define HOW_DOUBLE_D 3 // Append "" #define HOW_SIMPLE 4 // Just append #define HOW_INSIDE_S 5 // Append inside the trailing ' #define HOW_INSIDE_D 6 // Append inside the trailing " #define HOW_S_QUOTE 7 // Append with ' ' around it #define HOW_D_QUOTE 8 // Append with " " around it #define HOW_B_QUOTE 9 // Append \ and the char #define HOW_S_QUOTE_B 10 // Append with '\ ' around it #define HOW_D_QUOTE_B 11 // Append with "\ " around it #define HOW_INSIDE_D_B 12 // Append with \ inside trailing " #define HOW_B_S_S_S 13 // Append \''' } ; #define MK(v,f,h) ((CELL){.val=(v),.from=(f),.how=(h)}) static unsigned int how_cost[][3] = { [HOW_NULL] = { 0, 1000000, 1000000 }, [HOW_DOUBLE_S] = { 20, 1000000, 1000000 }, [HOW_DOUBLE_D] = { 20, 1000000, 1000000 }, [HOW_SIMPLE] = { 10, 11, 11 }, [HOW_INSIDE_S] = { 1000000, 10, 1000000 }, [HOW_INSIDE_D] = { 1000000, 1000000, 10 }, [HOW_S_QUOTE] = { 31, 1000000, 31 }, [HOW_D_QUOTE] = { 31, 31, 1000000 }, [HOW_B_QUOTE] = { 20, 21, 21 }, [HOW_S_QUOTE_B] = { 41, 40, 41 }, [HOW_D_QUOTE_B] = { 41, 41, 40 }, [HOW_INSIDE_D_B] = { 1000000, 1000000, 20 }, [HOW_B_S_S_S] = { 1000000, 40, 1000000 } }; static CELL best(const CELL *p, int h0, int h1, int h2) { int t0; int t1; int t2; t0 = p[0].val + how_cost[h0][0]; t1 = p[1].val + how_cost[h1][1]; t2 = p[2].val + how_cost[h2][2]; /* * There are 13 possible orders for t0, t1, t2: * * A: t0 == t1 == t2 * B: t0 == t1 < t2 * C: t0 == t2 < t1 * D: t1 == t2 < t0 * E: t0 < t1 == t2 * F: t1 < t0 == t2 * G: t2 < t0 == t1 * H: t0 < t1 < t2 * I: t0 < t2 < t1 * J: t1 < t0 < t2 * K: t1 < t2 < t0 * L: t2 < t0 < t1 * M: t2 < t1 < t0 * * However, we don't care about distinctions beyond "lowest" and * "non-lowest". For example, we don't care about the difference * between E and H. This reduces the cases to seven: * * A: t0 == t1 == t2 * B: t0 == t1 < t2 * C: t0 == t2 < t1 * D: t1 == t2 < t0 * E: t0 < ... * F: t1 < ... * G: t2 < ... * * Even though there are only seven cases, there is no way to tell * them apart in only three comparisons. Proof: first comparison * must be of the form tX==tY or tX=0;i--) { cur = c[i][k]; path[i] = cur; k = cur.from; } trail = -1; for (i=0;i<=l;i++) { cur = path[i]; switch (cur.how) { case HOW_NULL: if (i) abort(); break; case HOW_DOUBLE_S: if (i) abort(); putc('\'',f); trail = '\''; break; case HOW_DOUBLE_D: if (i) abort(); putc('"',f); trail = '"'; break; case HOW_SIMPLE: if (trail >= 0) putc(trail,f); trail = s[i-1]; break; case HOW_INSIDE_S: if (trail != '\'') abort(); putc(s[i-1],f); break; case HOW_INSIDE_D: if (trail != '"') abort(); putc(s[i-1],f); break; case HOW_S_QUOTE: if (trail >= 0) putc(trail,f); fprintf(f,"'%c",s[i-1]); trail = '\''; break; case HOW_D_QUOTE: if (trail >= 0) putc(trail,f); fprintf(f,"\"%c",s[i-1]); trail = '"'; break; case HOW_B_QUOTE: if (trail >= 0) putc(trail,f); putc('\\',f); trail = s[i-1]; break; case HOW_S_QUOTE_B: if (trail >= 0) putc(trail,f); fprintf(f,"'\\%c",s[i-1]); trail = '\''; break; case HOW_D_QUOTE_B: if (trail >= 0) putc(trail,f); fprintf(f,"\"\\%c",s[i-1]); trail = '"'; break; case HOW_INSIDE_D_B: if (trail != '"') abort(); fprintf(f,"\\%c",s[i-1]); break; case HOW_B_S_S_S: if (trail >= 0) putc(trail,f); fprintf(f,"\\''"); trail = '\''; break; default: abort(); break; } } if (trail < 0) abort(); putc(trail,f); free(c); free(path); } #ifdef TESTBED int main(void); int main(void) { char str[64]; int l; while (1) { if (! fgets(&str[0],sizeof(str),stdin)) break; l = strlen(&str[0]); if ((l > 0) && (str[l-1] == '\n')) str[--l] = '\0'; optimal_quote(stdout,&str[0]); printf("\n"); } return(0); } #endif