package w3c.pics.parser;

import java.util.*;
import java.io.*;

/**
 * Implements a filter that reads in an HTML file, and locates any labels 
 * that are contained in its header.
 **/

public class LabelFinder {

  String fname = null;
  int count = 0;
  Vector labels = new Vector();

  /**
   * Creates a new LabelFinder for the HTML file <em>filename</em>
   * @param filename The name of the HTML file to scan for labels.
   **/

  public LabelFinder(String filename) {
    fname = filename;
  }

  /**
   * An interactive mode which takes a file name as a command line argument, 
   * counts the number of labels found, and then allows the user to request 
   * a label by number.
   **/

  public static void main(String args[]) {
    LabelFinder lf = new LabelFinder(args[0]);
    System.out.println("Number of labels: "+lf.getLabels());
    System.out.println("Fetch label: ");
    try {
      BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
      String cmd = br.readLine();
      Integer c = new Integer(cmd);
      System.out.println(lf.showLabel(c.intValue()-1));
      br.close();
    }
    catch (IOException ex) {
      System.out.println("IOException in LabelFinder.main: "+ex.getMessage());
    }
  }

  /**
   * Returns the number of labels that were found in the HTML file's header
   * and stores the labels it finds in a vector.
   * @return An int, the number of labels found.
   **/

  public int getLabels() {
    if (count!=0)
      return count;
    try {
      BufferedReader br = new BufferedReader(new FileReader(fname));
      String theline = "";
      String retval = "";
      int content, sQuoteIndex;

      while (theline!=null) {
	if (theline.indexOf("PICS-Label")!=-1) {
	  while ((content = theline.toUpperCase().indexOf("CONTENT="))==-1)
	    theline = br.readLine();
	  retval = theline.substring(content+8);
	  while ((sQuoteIndex = theline.indexOf("'"))==-1)
	    theline = br.readLine();
	  retval = theline.substring(sQuoteIndex+1);
	  while (theline.indexOf("'",sQuoteIndex+1)==-1) {
	    theline = br.readLine();
	    retval+=theline;
	    sQuoteIndex=-1;
	  }
	  int closeab = retval.indexOf("'",1);
	  retval = retval.substring(0,closeab);

	  // Replace escaped characters
	  retval = replaceEscaped(retval);
	  labels.addElement(retval);
	  count++;
	}
	theline = br.readLine();
      }

      br.close();
      return count;
    }
    catch (IOException ex) {
      System.out.println("IOException in LabelFinder.countLabels: "+
			 ex.getMessage());
      return -1;
    }
  }

  /**
   * Returns a String with escaped characters replaced
   * Replaces &amp;nbsp;, &amp;quot;, &amp;#39;, &amp;amp;, &amp;gt; with 
   * nonbreaking space, ", ', &, >, respectively 
   * @param original The input String to be unescaped.
   * @return A String with the escaped characters replaced
   **/

  public String replaceEscaped(String original)
  {
    int pos = 0,ampIndex;
    String before = new String();
    String after = new String();
    String temp = original;

    while ((ampIndex = temp.indexOf("&",pos))!=-1) {
      if (ampIndex!=0)
	before = temp.substring(0,ampIndex);
      if (temp.substring(ampIndex+1,ampIndex+6).equals("nbsp;")) {
	if (temp.length()>ampIndex+5)
	  after = temp.substring(ampIndex+6);
	temp = before.concat(" ");
	temp = temp.concat(after);
	continue;
      }
      if (temp.substring(ampIndex+1,ampIndex+6).equals("quot;")) {
	after = temp.substring(ampIndex+6);
	temp = before.concat("\"");
        temp = temp.concat(after);
        continue;
      }
      if (temp.substring(ampIndex+1,ampIndex+5).equals("#39;")) {
	after = temp.substring(ampIndex+5);
        temp = before.concat("'");
	temp = temp.concat(after);
	continue;
      }
      if (temp.substring(ampIndex+1,ampIndex+5).equals("amp;")) {
	after = temp.substring(ampIndex+5);
        temp = before.concat("&");
	temp = temp.concat(after);
        continue;
      }
      if (temp.substring(ampIndex+1,ampIndex+4).equals("gt;")) {
	after = temp.substring(ampIndex+4);
	temp = before.concat(">");
	temp = temp.concat(after);
        continue;
      }
      pos=ampIndex+1;
    }
    return temp;
  }

  /**
   * Returns a String with the contents of an individual label.  This String 
   * contains only the PICS label text, and is suitable for passing to the 
   * LabelParser.
   * @see LabelParser
   * @param i The index of which label is to be returned.
   * @return A String containing the text of the label.
   **/

  public String showLabel(int i) {
    if (i>=count || i<0)
      return null;
    else
      return (String)labels.elementAt(i);
  }
}
