Retrieving documents with HTTP

This agent conforms to W3A/A. It uses some utility routines that are not shown.

TO DO: set O_NONBLOCK before reading the MIME header. (Currently, if O_NONBLOCK is requested, it is only applied after the MIME header has been read. This can still cause blocking.)

TO DO: handle content encodings.

TO DO: fix handling of old HTTP servers; currently their first line is lost.

<<*>> =
#include <config.h> #include <fcntl.h> #include <sys/socket.h> #include <netinet/in.h> #include <netdb.h> #include <pwd.h> /* To find who we are */ #include <w3a.h> #include <tcp.h> /* connectTCP() */ #include <str.h> /* String and heap functions */ #include <url.h> /* URL parsing */ #include <mime.h> /* Read/parse MIME headers */ static struct { W3ADocumentInfo info; FILE *f; } *conn_info[FD_SETSIZE];

send_HTTP_request sends a request to an HTTP server. Arguments are: f = the socket; selector = the thing that is requested; method = HTTP method to use; referer = URL of document with source of hyperlink.

An auxiliary function send_HTRQ_headers sends the MIME headers for an HTTP request. The From: header should give the E-mail address of the user. The Accept: header gives a list of accepted file formats. There may be more than one Accept: header. User-Agent: and Referer: are also sent.

<<*>> +=
#define HTTPVERSION "HTTP/1.0" /* HTTP protocol version */ #define MAXHOSTNAMELEN 256 /* Name of local machine */ static void send_HTRQ_headers(FILE *f, const char *referer) { struct passwd *pwent; /* Info about user */ struct hostent *phe; /* Info about localhost */ char host[MAXHOSTNAMELEN]; /* Name of local machine */ W3ABrowserInfo info; /* Accepted formats */ int i; if ((pwent = getpwuid(getuid())) && (gethostname(host, sizeof(host)) == 0) && (phe = gethostbyname(host))) fprintf(f, "From: %s@%s\r\n", pwent->pw_name, phe->h_name); W3AbrowserInfo(&info); for (i = 0; i < info.nformats; i++) if (info.preferences[i] == 1.0) fprintf(f, "Accept: %s\015\012", info.formats[i]); else fprintf(f, "Accept: %s; q=%f\015\012", info.formats[i], info.preferences[i]); fprintf(f, "User-Agent: %s\015\012", info.version); if (referer) fprintf(f, "Referer: %s\015\012", referer); } static Bool send_HTTP_request(FILE *f, URI uri, int method, const char *referer) { char *path, *search, *meth; path = strip2str(uri.path); search = uri.search ? strip2str(uri.search) : NULL; switch (method) { case GET_METHOD: meth = "GET"; break; case PUT_METHOD: meth = "PUT"; break; case POST_METHOD: meth = "POST"; break; case HEAD_METHOD: meth = "HEAD"; break; default: errno = EMETHOD; return FALSE; /* Illegal method */ } fprintf(f, "%s %s%s%s %s\r\n", meth, strip2str(uri.path), search ? "?" : "", search ? search : "", HTTPVERSION); send_HTRQ_headers(f, referer); fprintf(f, "\r\n"); /* End of headers */ fflush(f); /* Make ready for read */ return TRUE; }

The exported functions are: initHTTP, openHTTP, readHTTP, writeHTTP, infoHTTP, closeHTTP, and deleteHTTP. Deleting a document is not implemented yet.

<<*>> +=
Bool initHTTP() { /* Nothing to initialize */ } int openHTTP(const char *url, int method, int flags, const char *referer) { URI uri; char *host, *port; int s, i; FILE *f; char buf[BUFSIZ]; MIME_header header; if (! URL_parse(url, &uri)) { errno = EURL; /* Bad URL syntax */ return -1; } port = uri.port ? strip2str(uri.port) : "80"; host = strip2str(uri.host); if ((s = connectTCP(host, port)) == -1) return -1; /* Could not connect */ if (! (f = fdopen(s, "r+"))) return -1; /* I/O error */ if (! send_HTTP_request(f, uri, method, referer)) return -1; /* Illegal method */ if (! fgets(buf, sizeof(buf), f)) /* Read status line */ return -1; /* I/O error */ new(conn_info[s]); conn_info[s]->info.url = newstring(url); conn_info[s]->info.mime_type = NULL; conn_info[s]->info.mime_params = NULL; conn_info[s]->info.title = NULL; conn_info[s]->info.referer = newstring(referer); conn_info[s]->info.status = NULL; if (! n_eq(buf, "HTTP", 4)) { /* Old server */ conn_info[s]->info.mime_type = newstring("text/html"); /* Sorry, we loose the first line... */ } else { /* HTTP/1.0 or newer */ for (i = 4; !isspace(buf[i]); i++) ; for (; buf[i] && isspace(buf[i]); i++) ; conn_info[s]->info.status = newstring(buf + i); } read_header(f, &header, NULL); if (header.head[Title]) conn_info[s]->info.title = newstring(header.head[Title]); if (header.head[Base]) conn_info[s]->info.url = newstring(header.head[Base]); if (header.head[Content_Type]) conn_info[s]->info.mime_type = newstring(header.head[Content_Type]); else conn_info[s]->info.mime_type = newstring("text/html"); /* Something with mime_params, charset?... */ if (flags & O_NONBLOCK) if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) return -1; /* I/O error */ conn_info[s]->f = f; return s; } int readHTTP(int fd, char *buf, size_t nbytes) { assert(conn_info[fd]); return fread(buf, 1, nbytes, conn_info[fd]->f); } int writeHTTP(int fd, const char *buf, size_t nbytes) { assert(conn_info[fd]); return fwrite(buf, 1, nbytes, conn_info[fd]->f); } Bool infoHTTP(int fd, W3ADocumentInfo *buf) { buf->url = newstring(conn_info[fd]->info.url); buf->mime_type = newstring(conn_info[fd]->info.mime_type); buf->mime_params = newstring(conn_info[fd]->info.mime_params); buf->title = newstring(conn_info[fd]->info.title); buf->referer = newstring(conn_info[fd]->info.referer); buf->status = newstring(conn_info[fd]->info.status); return TRUE; } Bool closeHTTP(int fd) { int status; assert(conn_info[fd]); status = fclose(conn_info[fd]->f); dispose(conn_info[fd]); return status != -1; } Bool deleteHTTP(const char *url) { errno = ENYI; /* Not yet implemented */ return FALSE; }