#include #include typedef int BOOL; #define TRUE 1 #define FALSE 0 main(argc, argv) int argc; char **argv; { FILE *fp; char filename[256]; char buffer[1024]; char *p; BOOL found = FALSE; BOOL tag = FALSE; int lines = 0; if (argc != 3) { fprintf(stderr, "\n\ This program takes an HTML document and extracts to its stdout\n\ the TITLE of the document, all in one line.\n\n\ Usage:\n\ \t%s directory filename\n\n", argv[0]); exit(1); } strcpy(filename, argv[1]); if (*filename) strcat(filename, "/"); strcat(filename, argv[2]); if (!(fp = fopen(filename, "r"))) { fprintf(stderr, "%s: Unable to open file \"%s\"\n", argv[0], filename); exit(2); } while (lines++ < 20 && /* Scan only first 20 lines */ NULL != (p = fgets(buffer, 1024, fp))) { if (*p) p[strlen(p)-1] = NULL; /* Overwrite newline */ while (p && *p) { if (tag) { p = strchr(p, '>'); if (p) { p++; tag = FALSE; } else continue; } if (found) while (*p && *p != '<') fputc(*(p++), stdout); else while (*p && *p != '<') p++; if (!*p) { if (found) fputc(' ', stdout); /* We replace newline with space */ continue; } else if (!found && (!strncmp(p, "", 7) || !strncmp(p, "<title>", 7) || !strncmp(p, "<Title>", 7))) { p += 7; found = TRUE; } else if (found && (!strncmp(p, "", 8) || !strncmp(p, "", 8) || !strncmp(p, "", 8))) { fclose(fp); fputc('\n', stdout); exit(0); } else tag = TRUE; } /* while stuff in buffer */ } /* while not EOF and not very many lines read */ /* If we come here, the title was not found among */ /* the first few lines. */ printf("%s\n", argv[2]); /* Then using filename */ fclose(fp); }