Minimal WWW browser

This is an example of a WWW browser that uses the W3A API. It is `minimal' in the sense that it has no GUI of its own, doesn't handle user functions (W3A/U) or printer drivers (W3A/P) and is not very configurable.

It should not be used as a good example of how to implement a W3A browser, but only as an illustration of the API. Furthermore, it can be used to test applets.

The implementation makes use of the FWF Stack widget and a few routines for parsing URLs (not shown, declared in `str.h' and `url.h').

<<*>> =
#include <config.h> #include <stdio.h> /* Standard I/O */ #include <X11/Intrinsic.h> /* X Toolkit */ #include <X11/StringDefs.h> /* X Toolkit */ #include <Xfwf/Stack.h> /* Stack widget */ #include <w3a.h> /* W3A API definition */ #include <errno.h> /* errno variable */ #include <str.h> /* String & heap fns */ #include <url.h> /* URL parsing */ #include <w3alib.h> /* new_doc() and copy_doc() */ #define MAXNAMELEN 100 /* Max. function name length */ #define prefix(a, b) (strncmp(b, a, strlen(a)) == 0) #define eq(s, t) (strcmp(s, t) == 0) typedef struct { /* Describes an agent */ char **proto; /* Protocols */ int nrproto; Bool (*init)(char ***protocols, int *nrprotocols); int (*open)(const char *, int, int, const char *); int (*peek)(int fd); int (*read)(int, char *, size_t); int (*write)(int, const char *, size_t); Bool (*close)(int); Bool (*delete)(const char *); Bool (*info)(int, W3ADocumentInfo *); } Agent; typedef struct { /* Describes a viewer */ char **mime_types; /* MIME types */ int nrtypes; Bool (*init)(char ***mime_types, int *nrtypes); long (*open)(const W3ADocumentInfo, W3AWindow); int (*write)(long, const char *, size_t); Bool (*close)(long); void (*event)(long id, long sourceid, long eventtype, void *params); Bool (*info)(long id, W3ADocumentInfo *info); } Viewer; typedef struct { /* Describes a filter */ char **from, **to; /* MIME types */ int nrfromto; Bool (*init)(char ***from, char ***to, int *nrfromto); long (*open)(const char *, const char *, const char *, const char *); int (*read)(long id, char *buf, size_t bufsiz); int (*write)(long id, const char *buf, size_t nchars); Bool (*close)(long id); } Filter; #include <HTTP.h> #include <Gopher.h> #include <TELNET.h> #include <MailTo.h> #include <FTP.h> static Agent agents[] = { {NULL, 0, initHTTP, openHTTP, peekHTTP, readHTTP, writeHTTP, closeHTTP, deleteHTTP, infoHTTP}, {NULL, 0, initGopher, openGopher, peekGopher, readGopher, writeGopher, closeGopher, deleteGopher, infoGopher}, {NULL, 0, initTELNET, openTELNET, peekTELNET, readTELNET, writeTELNET, closeTELNET, deleteTELNET, infoTELNET}, {NULL, 0, initMailTo, openMailTo, peekMailTo, readMailTo, writeMailTo, closeMailTo, deleteMailTo, infoMailTo}, {NULL, 0, initFTP, openFTP, peekFTP, readFTP, writeFTP, closeFTP, deleteFTP, infoFTP}, {NULL, 0, initFTP, openFTP, peekFTP, readFTP, writeFTP, closeFTP, deleteFTP, infoFTP} /* Add other agents here */ }; #define NRAGENTS XtNumber(agents) #include <HTML.h> #include <Plain.h> #include <GIF.h> #include <XBM.h> #include <XPM.h> #include <Telnet.h> #include <PBM.h> #include <Extern.h> static Viewer viewers[] = { {NULL, 0, initHTML, openHTML, writeHTML, closeHTML, eventHTML, infoHTML}, {NULL, 0, initPlain, openPlain, writePlain, closePlain, eventPlain, infoPlain}, {NULL, 0, initGIF, openGIF, writeGIF, closeGIF, eventGIF, infoGIF}, {NULL, 0, initPBM, openPBM, writePBM, closePBM, eventPBM, infoPBM}, {NULL, 0, initXBM, openXBM, writeXBM, closeXBM, eventXBM, infoXBM}, {NULL, 0, initXPM, openXPM, writeXPM, closeXPM, eventXPM, infoXPM}, {NULL, 0, initTelnet, openTelnet, writeTelnet, closeTelnet, eventTelnet, infoTelnet}, {NULL, 0, initExtern, openExtern, writeExtern, closeExtern, eventExtern, infoExtern} /* Add other viewers here */ }; #define NRVIEWERS XtNumber(viewers) static Filter filters[] = { {NULL, NULL, 0, NULL, NULL, NULL, NULL} /* Insert filters here */ }; #define NRFILTERS 0

The function W3AbrowserInfo can be called by applets and should return a struct with information about the browser. The variable browser is used to hold that information.

In this minimal browser the viewers are hardcoded and so are the acceptable formats. A more useful browser would have a configuration file or something similar.

<<*>> +=
static W3ABrowserInfo browser = { "minimal 1.0", /* For User-Agent: headers */ 0, NULL, /* For Accept: headers */ NULL /* idem */ }; Widget toplevel, workarea; XtAppContext app_context; long curviewerid = -1; /* Current displayed viewer */ static char *fallback[] = { "Minimal.geometry: 500x400", "Minimal.workarea*allowResize: TRUE", NULL, };

The connection array holds information about connections that were opened by applets, through a call to W3AopenDoc. In this case it only needs to remember which agent is used for the connection. The array is indexed by file descriptor. There are FD_SETSIZE file descriptors available. (Is this defined on other systems than HP-UX?)

The openviews array holds information about viewers that have been opened with W3Aprocess, W3Asubprocess or W3AopenView. It associates IDs with indices in the viewers array.

TO DO: errors while closing a viewer are not reported.

<<*>> +=
static int connection[FD_SETSIZE]; #define MAX_VIEWS 50 static struct { W3ADocumentInfo *doc; long id, filter_id; int viewer, filter; } openviews[MAX_VIEWS]; static int nropenviews = 0; /* add_view -- add a viewer/ID pair to openviews */ static Bool add_view(W3ADocumentInfo *doc, long id, int viewer, long filter_id, int filter) { int i; for (i = 0; i < nropenviews; i++) if (openviews[i].viewer == -1) break; if (i == MAX_VIEWS) { errno = ENOMEM; return FALSE; } openviews[i].doc = doc; openviews[i].id = id; openviews[i].viewer = viewer; openviews[i].filter_id = filter_id; openviews[i].filter = filter; if (i == nropenviews) nropenviews++; return TRUE; } /* remove_view -- remove a viewer/ID pair from openviews */ static void remove_view(long id) { int i; for (i = 0; i < nropenviews; i++) if (openviews[i].id == id) { viewers[openviews[i].viewer].close(id); if (openviews[i].filter >= 0) viewers[openviews[i].filter].close(openviews[i].filter_id); openviews[i].id = -1; /* Mark as free */ dispose_doc(openviews[i].doc); /* Free heap */ return; } } /* lookup_view -- find a viewer/ID pair in openviews */ static Bool lookup_view(long id, int *viewer, long *filter_id, int *filter) { int i; for (i = 0; i < nropenviews; i++) if (openviews[i].id == id) { *viewer = openviews[i].viewer; *filter_id = openviews[i].filter_id; *filter = openviews[i].filter; return TRUE; } return FALSE; }

An applet that has some data to display may ask the browser to open a viewer for it. The applet calls W3AopenView with a description of the document to view and a window to display it in. The browser will open the viewer and hand back its ID.

The algorithm to find filters when a certain type cannot be displayed directly is rather simple: it tries only one level of filters, chanining of filters is not implemented.

<<*>> +=
static Bool find_viewer(const char *mime_type, int *viewer, int *sub) { for (*viewer = 0; *viewer < NRVIEWERS; (*viewer)++) for (*sub = 0; *sub < viewers[*viewer].nrtypes; (*sub)++) if (eq(mime_type, viewers[*viewer].mime_types[*sub])) return TRUE; return FALSE; } static Bool find_filter(const char *mime_type, int from, int *filter, int *sub) { for (*filter = from + 1; *filter < NRFILTERS; (*filter)++) for (*sub = 0; *sub < filters[*filter].nrfromto; (*sub)++) if (eq(mime_type, filters[*filter].from[*sub])) return TRUE; return FALSE; } long W3AopenView(const W3ADocumentInfo info, W3AWindow area) { int subfilt, subview, viewer, filter = -1; long id, filter_id; W3ADocumentInfo *newinfo; newinfo = new_doc(); copy_doc(newinfo, info); /* Find viewer, and perhaps filter */ if (! find_viewer(info.mime_type, &viewer, &subview)) { assert(newinfo); while (TRUE) { if (! find_filter(info.mime_type, filter, &filter, &subfilt)) { errno = ETYPE; return -1; } if (find_viewer(filters[filter].to[subfilt], &viewer, &subview)) break; } /* Open filter */ if ((filter_id = filters[filter].open (info.mime_type, info.mime_params, filters[filter].to[subfilt], NULL)) == -1) return -1; /* Set new MIME type in info */ dispose(newinfo->mime_type); newinfo->mime_type = newstring(filters[filter].to[subfilt]); } assert(newinfo); /* Open viewer */ if ((id = viewers[viewer].open(*newinfo, area)) == -1) return -1; if (! add_view(newinfo, id, viewer, filter_id, filter)) return -1; return id; } int W3AwriteView(long id, const char *data, size_t nbytes) { int v, f; long filter_id; if (! lookup_view(id, &v, &filter_id, &f)) return -1; if (f >= 0) { /* Use filter */ char buf[BUFSIZ]; int n, n1 = nbytes; /* Send data to filter and loop while data not empty */ do { n = filters[f].write(filter_id, data, n1); if (n < 0 && errno != EAGAIN) return -1; if (n >= 0) {data += n; n1 -= n;} /* Read data from filter and loop while filter returns data */ while (1) { n = filters[f].read(filter_id, buf, sizeof(buf)); if (n < 0 && errno == EAGAIN) break; if (n < 0) return -1; if ((n = viewers[v].write(id, buf, n)) < 0) return -1; } } while (n1 > 0); return nbytes; } else /* Don't use filter */ return viewers[v].write(id, data, nbytes); } Bool W3AcloseView(long id) { remove_view(id); return TRUE; }

When an applet wants to retrieve a document, it can use the browser's services. The W3AopenDoc function in this browser just looks for the appropriate agent and then calls its open function. The returned file descriptor is passed to the applet, which uses it in calls to W3AreadDoc or W3AwriteDoc. To remember which agents is used for this connection, the browser stores the number of the agent in the connection array.

<<*>> +=
static Bool find_agent(const char *url, int *agent, int *sub) { for (*agent = 0; *agent < NRAGENTS; (*agent)++) for (*sub = 0; *sub < agents[*agent].nrproto; (*sub)++) if (prefix(agents[*agent].proto[*sub], url)) return TRUE; return FALSE; } int W3AopenDoc(const char *url, int method, int flags, const char *referer) { int i, j, fd; if (! find_agent(url, &i, &j)) {errno = ETYPE; return -1;} if ((fd = agents[i].open(url, method, flags, referer)) == -1) return -1; connection[fd] = i; /* Remember the agent */ return fd; } int W3AreadDoc(int fd, char *buf, size_t nbytes) { return agents[connection[fd]].read(fd, buf, nbytes); } int W3AwriteDoc(int fd, const char *buf, size_t nbytes) { return agents[connection[fd]].write(fd, buf, nbytes); } int W3AcloseDoc(int fd) { return agents[connection[fd]].close(fd); } int W3AinfoDoc(int fd, W3ADocumentInfo *info) { return agents[connection[fd]].info(fd, info); } Bool W3AdeleteDoc(const char *url) { int i, j; if (! find_agent(url, &i, &j)) {errno = ETYPE; return FALSE;} return agents[i].delete(url); }

The W3Aevent functions sends the event to all open viewers (and all user functions, except that this minimal browser doesn't support user functions). The open viewers are all stored in the openviews array.

<<*>> +=
void W3Aevent(long id, long eventtype, void *param) { int i; for (i = 0; i < nropenviews; i++) if (openviews[i].id != -1 && openviews[i].id != id) viewers[openviews[i].viewer].event(openviews[i].id, id, eventtype, param); }

process does most of the work for W3Aprocess and W3Asubprocess. It finds the appropriate agent for the URL and the appropriate viewer for the retrieved document. The return code is the ID of the viewer.

<<*>> +=
static long process(W3ADocumentInfo *doc, int method, const char *data, size_t nbytes, W3AWindow area) { char buf[BUFSIZ]; URI uri, base; int fd, n; long id; if (! URL_parse(doc->url, &uri)) {errno = EURL; return -1;} if (uri.tp == URI_Rel) { if (! doc->referer || ! URL_parse(doc->referer, &base)) { errno = EURL; return -1; } URL_expand(&uri, base); dispose(doc->url); /* Equivalent to XtFree */ doc->url = uri2str(uri); /* `Unhash' a string */ } if (uri.tp != URI_URL) {errno = EURL; return -1;} if (method == DELETE_METHOD) { if (! W3AdeleteDoc(doc->url)) return -1; return 0; /* Success, but not an ID */ } /* Open agent */ if ((fd = W3AopenDoc(doc->url, method, 0, doc->referer)) < 0) return -1; if (method == PUT_METHOD || method == POST_METHOD) { do { if ((n = W3AwriteDoc(fd, data, nbytes)) < 0) return -1; data += n; nbytes -= n; } while (nbytes > 0); } /* Query agent for MIME type of retrieved document */ if (! W3AinfoDoc(fd, doc)) return -1; /* Find viewer for this type */ if ((id = W3AopenView(*doc, area)) == -1) { W3AcloseDoc(fd); return -1; } /* Copy data from agent to viewer */ do { if ((n = W3AreadDoc(fd, buf, sizeof(buf))) == -1) return -1; if (W3AwriteView(id, buf, n) == -1) { W3AcloseDoc(fd); return -1; } } while (n != 0); W3AcloseDoc(fd); return id; } Bool W3Aprocess(W3ADocumentInfo *info, int method, const char *data, size_t nbytes) { long id; if ((id = process(info, method, data, nbytes, workarea)) == -1) return FALSE; /* Close previous viewer */ if (curviewerid != -1) remove_view(curviewerid); curviewerid = id; W3Aevent(-1, NEW_DOCUMENT, info); return TRUE; }

Another possibility for viewers is to open a sub-viewer, by creating a window for it and then asking the browser to retrieve a document and start a viewer (as for W3Aprocess), except that the sub-viewer doesn't get the window from the browser, but from the parent viewer.

<<*>> +=
long W3Asubprocess(W3ADocumentInfo *doc, int method, const char *data, size_t nbytes, W3AWindow area) { return process(doc, method, data, nbytes, area); } int W3AresolveURN(const char *urn, char ***url_list_return) { errno = ENYI; /* Not yet implemented */ return -1; } void W3AbrowserInfo(W3ABrowserInfo *info) { *info = browser; }

Since this browser is an X Toolkit application, it has the W3Atoplevel function. It just returns the toplevel shell widget of the browser.

<<*>> +=
Widget W3Atoplevel(void) { return toplevel; }

This browser should be called with exactly one argument (except for X toolkit options), viz. the URL of the first page to view.

<<*>> +=
static void usage() { fprintf(stderr, "Usage: minimal [toolkit options] first-URL\n"); exit(1); } int main(int argc, char *argv[]) { W3ADocumentInfo info; int n, i, j, nraccepted; toplevel = XtVaAppInitialize (&app_context, "Minimal", NULL, 0, &argc, argv, fallback, NULL); /* XSynchronize(XtDisplay(toplevel), TRUE); */ if (argc != 2) usage(); workarea = XtVaCreateManagedWidget ("workarea", xfwfStackWidgetClass, toplevel, XfwfNfill, TRUE, NULL); XtRealizeWidget(toplevel); /* Initialize applets */ for (i = 0; i < NRAGENTS; i++) if (! agents[i].init(&agents[i].proto, &agents[i].nrproto)) XtAppError(app_context, "Init failed"); nraccepted = 0; for (i = 0; i < NRVIEWERS; i++) { if (! viewers[i].init(&viewers[i].mime_types, &viewers[i].nrtypes)) XtAppError(app_context, "Init failed"); nraccepted += viewers[i].nrtypes; } for (i = 0; i < NRFILTERS; i++) { if (! filters[i].init(&filters[i].from, &filters[i].to, &filters[i].nrfromto)) XtAppError(app_context, "Init failed"); nraccepted += filters[i].nrfromto; } /* Initialize browser info */ browser.nformats = nraccepted; newarray(browser.formats, nraccepted); newarray(browser.preferences, nraccepted); for (n = 0, i = 0; i < NRVIEWERS; i++) for (j = 0; j < viewers[i].nrtypes; j++, n++) { browser.formats[n] = viewers[i].mime_types[j]; browser.preferences[n] = 1.0; } for (i = 0; i < NRFILTERS; i++) for (j = 0; j < filters[i].nrfromto; j++, n++) { browser.formats[n] = filters[i].from[j]; browser.preferences[n] = 0.5; } /* Open first document */ info.url = XtNewString(argv[1]); info.referer = XtNewString("file://localhost/"); info.size = -1; info.mime_type = info.mime_params = info.title = info.status = NULL; if (! W3Aprocess(&info, GET_METHOD, NULL, 0)) XtAppError(app_context, "Couldn't open document"); XtAppMainLoop(app_context); }