/* * csvtotab -- convert comma-separated values to tab-separated values * * RFC 4180 requires CRLF as line endings; this program also accepts * CR or LF. RFC 4180 requires the same number of fields in each line, * this program does not check for that. * * Line endings on output are LF, independent of the input. * * Copyright © 2014 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 * * Author: Bert Bos * Created: 3 April 2014 */ #include #include #include #include #include #define VERSION "0.1" static struct option longopts[] = { {"help", no_argument, NULL, 'h'}, {"separator", required_argument, NULL, 's'}, {"version", no_argument, NULL, 'v'} }; static char sep = ','; /* version -- print program version and exit */ static void version(const char *prog) { printf("%s %s\n", prog, VERSION); exit(0); } /* usage -- print usage message on stderr and exit */ static void usage(const char *prog) { fprintf(stderr, "Usage: %s [-h] [-v] [-s char] [file [file...]]\n", prog); exit(1); } /* help -- print usage information and exit */ static void help(const char *prog) { printf("\ Usage: %s [options] [--] [csv-file [csv-file...]]\n\ Version: %s\n\ Options:\n\ -s, --separator=\n\ The character that separates fields in the input. Default comma (,)\n\ -h, --help\n\ Show this help text and exit.\n\ -v, --version\n\ Show the program version nummer and exit.\n\ The arguments are CSV files. If not files are given or if the name\n\ is -, the program reads from standard input.\n", prog, VERSION); exit(0); } /* process -- read file f as a CSV and output tab-separated to stdout */ static void process(FILE *f, const char *name) { enum {START, COMMA, IN_FIELD, QUOTE, QUOTED, CR} state = START; int c; /* Remove the BOM, if any */ if ((c = fgetc(f)) != 0xEF) { ungetc(c, f); } else if ((c = fgetc(f)) != 0xBB) { putchar(0xEF); state = IN_FIELD; ungetc(c, f); } else if ((c = fgetc(f)) != 0xBF) { putchar(0xEF); putchar(0xBB); state = IN_FIELD; ungetc(c, f); } while ((c = fgetc(f)) != EOF) { if (c == sep) { switch (state) { case QUOTED: putchar(c); break; case CR: fputs("\n\t", stdout); state = COMMA; break; default: putchar('\t'); state = COMMA; break; } } else if (c == '"') { switch(state) { case IN_FIELD: errx(1, "Found a quote (\") inside a field"); case QUOTE: putchar('"'); state = QUOTED; break; case QUOTED: state = QUOTE; break; case CR: putchar('\n'); state = QUOTED; break; default: state = QUOTED; break; } } else if (c == '\t') { switch (state) { case QUOTE: errx(1, "Found a lone quote (\") in a quoted field"); case QUOTED: fputs("\\t", stdout); break; case CR: fputs("\n\\t", stdout); state = IN_FIELD; break; default: fputs("\\t", stdout); state = IN_FIELD; break; } } else if (c == '\r') { switch (state) { case QUOTED: fputs("\\r", stdout); break; case CR: putchar('\n'); break; default: state = CR; break; } } else if (c == '\n') { switch (state) { case QUOTED: fputs("\\n", stdout); break; case CR: fputs("\n", stdout); state = START; break; default: putchar('\n'); state = START; break; } } else if (c == '\\') { switch (state) { case QUOTED: fputs("\\\\", stdout); break; case QUOTE: errx(1, "Found a lone quote (\") in a quoted field"); case CR: fputs("\n\\\\", stdout); state = IN_FIELD; break; default: fputs("\\\\", stdout); state = IN_FIELD; } } else { switch (state) { case QUOTE: errx(1, "Found a lone quote (\") in a quoted field"); case QUOTED: putchar(c); break; case CR: putchar('\n'); putchar(c); state = IN_FIELD; break; default: putchar(c); state = IN_FIELD; break; } } } switch (state) { case START: break; case QUOTED: errx(1, "End of file inside a quoted field"); case CR: case QUOTE: case IN_FIELD: case COMMA: putchar('\n'); break; } } int main(int argc, char *argv[]) { FILE *f; int i, c; while ((c = getopt_long(argc, argv, "hs:v", longopts, NULL)) != -1) switch (c) { case 'h': help(argv[0]); break; case 's': if (!*optarg||optarg[1]) usage(argv[0]); sep = *optarg; break; case 'v': version(argv[0]); break; default: usage(argv[0]); } if (sep == '"' || sep == '\r' || sep == '\n') errx(1, "Cannot use this separator."); if (optind == argc) process(stdin, ""); else for (i = optind; i < argc; i++) if (argv[i][0] == '-' && !argv[i][1]) process(stdin, ""); else if (!(f = fopen(argv[i], "r"))) err(1, NULL); else process(f, argv[i]); return 0; }