/*
* Parse results HTML forms for use in CGI scripts.
* Copyright © 2000 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software-19980720.html
*
* Author: Bert Bos
* Created: 31 July 2000
*/
#include
#include
#include
#include
#include
#include
#define MAXLINE 4096
#define ACCEPT "abcdefghijklmnopqrstuvwxyz0123456789_-."
typedef enum {Variable, Value, Hex1, Hex2} State;
/* error -- print error message and exit */
static void error(char *msg)
{
fprintf(stderr, "parseform: %s\n", msg);
exit(1);
}
/* parse_query_string -- parse the contents of the QUERY_STRING variable */
static void parse_query_string(char *prefix)
{
State state;
char *query, filename[MAXLINE], hexchar = 0; /* Initialized to stop -Wall */
FILE *f = NULL;
size_t i;
if (! (query = getenv("QUERY_STRING"))) error("Missing QUERY_STRING");
strcpy(filename, prefix);
i = strlen(prefix);
for (state = Variable; *query; query++) {
switch (state) {
case Variable: /* Scanning name of var */
if (*query == '=') {
filename[i] = '\0';
if (! (f = fopen(filename, "w"))) error(strerror(errno));
state = Value;
} else if (i < sizeof(filename) - 1 && isalnum(*query)) {
filename[i++] = *query;
}
break;
case Value: /* Scanning a value */
if (*query == ';' || *query == '&') {
fclose(f);
i = strlen(prefix);
state = Variable;
} else if (*query == '%') {
state = Hex1;
} else {
putc(*query == '+' ? ' ' : *query, f);
}
break;
case Hex1: /* 1st char after '%' */
state = Hex2;
if ('0' <= *query && *query <= '9') hexchar = *query - '0';
else if ('A' <= *query && *query <= 'F') hexchar = *query - 'A' + 10;
else if ('a' <= *query && *query <= 'f') hexchar = *query - 'a' + 10;
else state = Value; /* Error, skip char... */
break;
case Hex2: /* 2nd char after '%' */
if ('0' <= *query && *query <= '9')
putc(16 * hexchar + *query - '0', f);
else if ('A' <= *query && *query <= 'F')
putc(16 * hexchar + *query - 'A' + 10, f);
else if ('a' <= *query && *query <= 'f')
putc(16 * hexchar + *query - 'a' + 10, f);
else
; /* Error, skip char... */
state = Value;
break;
default:
assert(!"Cannot happen");
}
}
if (f) fclose(f);
}
/* parse_url_encoded -- parse URL-encoded data from stdin */
static void parse_url_encoded(char *prefix)
{
State state;
char *length, c, filename[MAXLINE], hexchar = 0; /* Init'ed to stop -Wall */
FILE *f = NULL;
size_t i, len;
if (! (length = getenv("CONTENT_LENGTH"))) error("Missing CONTENT_LENGTH");
len = atoi(length);
strcpy(filename, prefix);
i = strlen(prefix);
for (state = Variable; len > 0; len--) {
c = getchar();
switch (state) {
case Variable: /* Scanning name of var */
if (c == '=') {
filename[i] = '\0';
if (! (f = fopen(filename, "w"))) error(strerror(errno));
state = Value;
} else if (i < sizeof(filename) - 1 && isalnum(c)) {
filename[i++] = c;
}
break;
case Value: /* Scanning a value */
if (c == ';' || c == '&') {
fclose(f);
i = strlen(prefix);
state = Variable;
} else if (c == '%') {
state = Hex1;
} else {
putc(c == '+' ? ' ' : c, f);
}
break;
case Hex1: /* 1st char after '%' */
state = Hex2;
if ('0' <= c && c <= '9') hexchar = c - '0';
else if ('A' <= c && c <= 'F') hexchar = c - 'A' + 10;
else if ('a' <= c && c <= 'f') hexchar = c - 'a' + 10;
else state = Value; /* Error, skip char... */
break;
case Hex2: /* 2nd char after '%' */
if ('0' <= c && c <= '9')
putc(16 * hexchar + c - '0', f);
else if ('A' <= c && c <= 'F')
putc(16 * hexchar + c - 'A' + 10, f);
else if ('a' <= c && c <= 'f')
putc(16 * hexchar + c - 'a' + 10, f);
else
; /* Error, skip char... */
state = Value;
break;
default:
assert(!"Cannot happen");
}
}
if (f) fclose(f);
}
/* shift_and_read_more -- remove prefix from buf and read more from stdin */
static void shift_and_read_more(size_t start, char buf[MAXLINE], size_t *len)
{
assert(start <= *len);
*len -= start;
memmove(buf, buf + start, *len);
*len += fread(buf + *len, 1, MAXLINE - *len, stdin);
if (ferror(stdin)) error(strerror(errno));
}
/* parse_multipart -- parse multipart/form-data from stdin */
static void parse_multipart(char *prefix)
{
char boundary[MAXLINE], buf[MAXLINE], filename[MAXLINE], *p;
size_t blen, n, j;
FILE *f = NULL;
/* Read first buffer full of data */
n = fread(buf, 1, sizeof(buf), stdin);
if (ferror(stdin)) error(strerror(errno));
/* At this point, buf should contain at least the first boundary */
for (j = 0; j < n && buf[j] != '\r'; j++) boundary[j] = buf[j];
if (j + 1 >= n || buf[j+1] != '\n') error("Failed to parse multipart data");
boundary[j] = '\0'; /* Not needed, just for easier debugging */
blen = j;
/* Loop over all parts of the multipart, until a boundary + "--" */
do {
/* Remove this boundary and the following newline, and read more */
shift_and_read_more(blen + 2, buf, &n);
/* At this point, buf contains the start of a part of the multipart */
/* Loop over headers until an empty line */
while (strncmp(buf, "\r\n", 2) != 0) {
/* At this point buf contains (hopefully) a line of text or more */
if (strncasecmp(buf, "Content-disposition:", 20) == 0) {
/* Create a file named after the parameter */
if (! (p = strstr(buf, "name="))) error("Missing name=");
if (*(p + 5) == '"') p += 6; else p += 5;
if ((j = strspn(p, ACCEPT)) == 0) error("Illegal variable name");
strncat(strcpy(filename, prefix), p, j);
if (f) error("Syntax error in input, duplicate headers");
if (! (f = fopen(filename, "w"))) error(strerror(errno));
} else if (strncasecmp(buf, "Content-type:", 13) == 0) {
; /* Skip this header */
} else {
error("Possible bug: unrecognized header");
}
/* Remove this line from buf and read more from stdin */
for (j = 0; j < n && buf[j] != '\r'; j++) ;
if (j + 1 >= n) error("Failed to parse multipart data");
shift_and_read_more(j + 2, buf, &n);
}
if (! f) error("Missing parameter name");
/* Remove the \r\n and read more */
shift_and_read_more(2, buf, &n);
/* Copy data up to next \r from buf to file f */
for (j = 0; buf[j] != '\r';) {
if (fputc(buf[j++], f) == EOF) error(strerror(errno));
if (j == n) {
shift_and_read_more(j, buf, &n);
if (n == 0) error("Premature EOF on input");
j = 0;
}
}
/* Remove that part from buf and read more */
shift_and_read_more(j, buf, &n);
/* At this point, buf starts with \r */
while (buf[1] != '\n' || strncmp(buf + 2, boundary, blen) != 0) {
/* Copy the \r */
if (fputc(buf[0], f) == EOF) error(strerror(errno));
/* Copy data up to next \r from buf to file f */
for (j = 1; buf[j] != '\r';) {
if (fputc(buf[j++], f) == EOF) error(strerror(errno));
if (j == n) {
shift_and_read_more(j, buf, &n);
if (n == 0) error("Premature EOF on input");
j = 0;
}
}
/* Remove that part from buf and read more */
shift_and_read_more(j, buf, &n);
}
if (fclose(f) == EOF) error(strerror(errno));
f = NULL;
/* Remove the \r\n, leave the boundary */
shift_and_read_more(2, buf, &n);
} while (buf[blen] != '-');
}
/* parse_stdin -- parse URL-encoded or multipart/form-data data from stdin */
static void parse_stdin(char *prefix)
{
char *type;
if (! (type = getenv("CONTENT_TYPE"))) error("Missing CONTENT_TYPE");
setvbuf(stdin, NULL, _IONBF, 0); /* Unbuffered input */
if (strncmp(type, "multipart/form-data", 19) == 0) parse_multipart(prefix);
else parse_url_encoded(prefix);
}
/* main -- main body */
int main(int argc, char *argv[])
{
char *method, *prefix;
if (argc != 2) error("Usage: parseform file-prefix\n");
prefix = argv[1];
if (! (method = getenv("REQUEST_METHOD"))) error("Missing REQUEST_METHOD");
if (strcmp(method, "GET") == 0) parse_query_string(prefix);
else if (strcmp(method, "HEAD") == 0) parse_query_string(prefix);
else if (strcmp(method, "POST") == 0) parse_stdin(prefix);
else error("Unknown REQUEST_METHOD");
return 0;
}