abstract_grammar.c Source File

00001 /*
00002   
00003 
00004 
00005 */
00006 
00007 static char id[] = "$Id: abstract__grammar_8c-source.html,v 1.10 2001/10/10 20:40:58 sandro Exp $";
00008 
00009 
00010 #include "config.h"
00011 
00012 #include <stdlib.h>
00013 #include <stdio.h>
00014 #include <malloc.h>
00015 #include <string.h>
00016 #include <assert.h>
00017 
00018 #include "abstract_grammar.h"
00019 
00020 Grammar *new_grammar() {
00021   Grammar *g = dltree_alloc(sizeof(Grammar));
00022   return g;
00023 }
00024 
00025 Rule* add_charset(Grammar *g, char present[256]) {
00026     Charset *other;
00027 
00028     /* Look to see if we have a matching charset already */
00029     for (other = g->first_charset; other; other=other->next_in_grammar) {
00030     int i;
00031     for (i=0; i<256; i++) {
00032         if (other->present[i] != present[i]) goto no_match;
00033     }
00034     return other->rule;
00035     }
00036 
00037  no_match:
00038     {
00039     Charset *s = calloc(1, sizeof(Charset));
00040     int i;
00041     for (i=0; i<256; i++) {
00042         if (present[i]) s->bits++;
00043     }
00044     memcpy(s->present, present, 256*sizeof(char));
00045     memcpy(s->present_only_here, present, 256);
00046     
00047     s->rule = add_anonymous_rule(g);
00048     //fprintf(stderr, "MADE A CHARSET RULE %x\n", s->rule);
00049     s->next_in_grammar = g->first_charset;
00050     g->first_charset = s;
00051     return s->rule;
00052     }
00053 }
00054 
00055 static void fill_in_rule_for_charset(Grammar *g, Charset *s) 
00056 {
00057     Rule *r1 = add_anonymous_rule(g);
00058     Term *t;
00059     Action *a;
00060     int c;
00061     Branch *b;
00062 
00063     //fprintf(stderr, "FILLED IN A CHARSET RULE %x\n", s->rule);
00064     for(c=0; c<=255; c++) {
00065     if (s->present_only_here[c]) {
00066         b = add_branch(r1);    
00067         t = add_term(b);
00068         t->type = literal;
00069         t->data.literal = (char) c;
00070         t->name = strdup("_charset_literal");
00071         
00072         a = (Action*) calloc(1, sizeof(Action));
00073         a->type = nop;
00074         t->actions = a;
00075     }
00076     }
00077 
00078     {
00079     struct charset_list_node *n;
00080     int count=0;
00081     for (n=s->inclusions; n; n=n->next) {
00082         b = add_branch(r1);
00083         t = add_term(b);
00084         t->type = rule_pointer;
00085         t->data.rule_pointer = n->charset->rule;
00086         t->name = strdup("_charset_subset");
00087         if (count++ > 32) {
00088         fprintf(stderr, "Warning, weird charset inclusions.\n");
00089         break;
00090         }
00091     }
00092     }
00093 
00094     b = add_branch(s->rule);    
00095     t = add_term(b);
00096     t->type = rule_pointer;
00097     t->data.rule_pointer = r1;
00098     t->name = strdup("_charset_action");
00099         
00100     a = (Action*) calloc(1, sizeof(Action));
00101     a->type = onechar;
00102     t->actions = a;
00103 }
00104 
00105 void post_process_charsets(Grammar *g) 
00106 {
00107     /*
00108       for each charset, look for any other "sub" charsets, which have
00109       only bits that we have.  If we find one, remove its bits from
00110       present_only_here and add it to our inclusions list.
00111 
00112     */
00113 
00114     Charset *big;
00115     Charset *small;
00116 
00117     for (big = g->first_charset; big; big=big->next_in_grammar) {
00118     fprintf(stderr, "Does %s include anythig?\n", big->rule->name);
00119     for (small = g->first_charset; small; small=small->next_in_grammar) {
00120         int i;
00121         //if (big->bits <= small->bits) continue;
00122         if (big == small) continue;
00123 
00124         fprintf(stderr, "  Like %s?\n", small->rule->name);
00125         for (i=0; i<256; i++) {
00126         if (small->present[i] && !big->present[i]) goto next_small;
00127         }
00128         for (i=0; i<256; i++) {
00129         if (small->present[i]) big->present_only_here[i] = 0;
00130         }
00131         fprintf(stderr, "  yes!\n");
00132         {
00133         struct charset_list_node *n = 
00134             malloc(sizeof(struct charset_list_node));
00135         fprintf(stderr, "Including %s in %s\n", small->rule->name,
00136             big->rule->name);
00137         n->next = big->inclusions;
00138         big->inclusions = n;
00139         n->charset = small;
00140         }
00141         continue;
00142     next_small:
00143         fprintf(stderr, "   no\n");
00144     }
00145     }
00146 
00147     /*
00148       Write the appropriate rules for each one now 
00149     */
00150     for (big = g->first_charset; big; big=big->next_in_grammar){
00151     fill_in_rule_for_charset(g, big);
00152     }
00153 }
00154 
00155 Rule* add_anonymous_rule(Grammar *g) {
00156   static char buf[80];
00157   sprintf(buf, "_%d", ++(g->anons));
00158   return add_rule(g, buf);
00159 }
00160 
00161 Rule* add_rule(Grammar *g, char *name) {
00162   Rule *r = (Rule *) dltree_alloc(sizeof(Rule));
00163   dltree_append_child(g, r);
00164   r->name=strdup(name);
00165   return r;
00166 }
00167 
00168 Rule *obtain_rule(Grammar *g, char *name); /* add or lookup */
00169 
00170 Branch* add_branch(Rule *r) {
00171   Branch *b = (Branch *) dltree_alloc(sizeof(Branch));
00172   dltree_append_child(r, b);
00173   return b;
00174 }
00175 
00176 Term *add_term(Branch *b) {
00177   Term *t = (Term *) dltree_alloc(sizeof(Term));
00178   dltree_append_child(b, t);
00179   t->type = unused;
00180   return t;
00181 }
00182 
00183 void grammar_copy_term(Term *t, Term *dest) {
00184   dest->type = t->type;
00185   switch(t->type) {
00186   case unused:
00187     break;
00188   case literal:
00189     dest->data.literal = t->data.literal;
00190     break;
00191   case rule_name: 
00192     dest->data.rule_name = strdup(t->data.rule_name);
00193     break;
00194   case rule_pointer: 
00195     dest->data.rule_pointer = t->data.rule_pointer;
00196     break;
00197   }
00198   dest->name = strdup(t->name);
00199   dest->actions = 0;   /*  XXX   for now, we're assuming we're not supposed to copy the actions */
00200 }
00201 
00202 void print_value(FILE *out, Value* value, int in_rule) {
00203   switch (value->type) {
00204   case unused: abort();
00205   case text_buffer:
00206       fprintf(out, "symbolFor(text_buffer(), sink->getSink())");
00207       break;
00208   case literal_string: 
00209       fprintf(out, "symbolFor(\"%s\", sink->getSink())", value->as.literal_string.string);
00210       break;
00211   case tuple: 
00212   { 
00213       Value *v = value->as.tuple.first;
00214       for ( ; v; v=v->next) {
00215       print_value(out, v, in_rule);
00216       if (v->next) fprintf(out, ", ");
00217       }
00218   }
00219   break;
00220   case rdfid:
00221       fprintf(out, "Symbol(space, \"%s\")", value->as.rdfid.id);
00222       break;
00223   case local_name:
00224       fprintf(out, "obtain_local(\"%s\")", value->as.local_name.name);
00225       break;
00226   case current_content:
00227   {
00228       if (in_rule) {
00229       fprintf(out, "$$.content");
00230       } else {
00231       fprintf(out, "result.content");
00232       }
00233       break;
00234   }
00235   case subst:
00236   {
00237       char var[16];
00238 
00239       if (in_rule) {
00240       if (value->as.subst.position == 0) {
00241           sprintf(var, "$$");
00242       } else {
00243           sprintf(var, "$%d", value->as.subst.position);
00244       }
00245       } else {
00246       sprintf(var, "result");
00247       }
00248 
00249       switch (value->as.subst.part) {
00250       case value_part:  
00251       fprintf(out, "%s.value", var); 
00252       break;
00253       case content_part:  
00254       fprintf(out, "%s.content", var); 
00255       break;
00256       case text_part:  
00257       fprintf(out, 
00258           "symbolFor(%s.text, %s.text_end, sink->getSink())",
00259           var, var); 
00260       break;
00261       }
00262       break;
00263   }
00264   }
00265 }
00266 
00267 void print_value_text(FILE *out, Value* value) {
00268     switch (value->type) {
00269     case unused: abort();
00270     case text_buffer:
00271     fprintf(out, "text_buffer()");
00272     break;
00273     case literal_string: 
00274     /* escaping for embedded quotes? */
00275     fprintf(out, "\"%s\"", value->as.literal_string.string);
00276     break;
00277     case tuple: 
00278     die("type mismatch, can't convert tuple to string");
00279     break;
00280     case current_content:
00281     die("type mismatch, can't convert current_content to string");
00282     break;
00283     case rdfid:
00284     die("type mismatch, can't convert rdfid to string");
00285     break;
00286     case local_name:
00287     die("type mismatch, can't convert local name to string");
00288     break;
00289     case subst:
00290     if (value->as.subst.position == 0) {
00291         fprintf(out, "$$");
00292     } else {
00293         fprintf(out, "$%d", value->as.subst.position);
00294     }
00295     switch (value->as.subst.part) {
00296     case value_part:
00297         die("type mismatch, can't convert value symbol to string"); break;
00298     case content_part:
00299         die("type mismatch, can't convert content symbol to string"); break;
00300     case text_part:  
00301         fprintf(out, ".text"); break;
00302     }
00303     break;
00304     }
00305 }
00306 
00307 void print_actions(FILE *out, Action* action, int last, Branch *b) {
00308   Action *a = action;
00309   if (a == 0 && !last) {
00310       return;
00311   }
00312 
00313   if (b) fprintf(out, "[YYVALID;]\n");    /* we don't really want backtracking, just a guarantee of the order in which things will get parsed */
00314 
00315   if (a && a->type == nop) {
00316       if (b) fprintf(out, "   { }");
00317       return;
00318   }
00319 
00320   fprintf(out, "\n      {\n");
00321 
00322   if (b) {
00323       fprintf(out, "        $$.content = symbolForAnonymous(\"content\", sink->getSink());\n");
00324       fprintf(out, "        $$.value = symbolForAnonymous(\"value\", sink->getSink());\n");
00325   }
00326 
00327   if (b) { 
00328       /* count back the number of character literals from the 
00329      first rule, or here if none */
00330       int bytes=0;
00331       int term=0;
00332       int i;
00333       DLTreeNode *n = b->tree.first;
00334       for (i=1; n; i++, n=n->next) {
00335       Term *t = (Term *)n;
00336       if ( t->type == literal ) {
00337           bytes += 1;  /*  strlen(t->data.literal)   WIDE?  */
00338       } else {
00339           term = i;
00340           break;
00341       }
00342       }
00343 
00344       if (a && a->type == onechar) { bytes=1; term=0; }
00345 
00346       if (term == 0) {
00347       fprintf(out, "        $$.text = yylex_buffer - %d;\n", bytes);
00348       } else {
00349       fprintf(out, "        $$.text = $%d.text - %d;\n", term, bytes);
00350       }
00351 
00352       fprintf(out, "        $$.text_end = yylex_buffer;\n");
00353   } 
00354 
00355   while (a) {
00356     fprintf(out, "        ");
00357     switch (a->type) {
00358     case unused: abort();
00359     case nop: break;
00360     case onechar: break;
00361     case addto: 
00362       fprintf(out, "sink->addTo(");
00363       print_value(out, a->as.addto.set, b!=0);
00364       fprintf(out, ", ");
00365       print_value(out, a->as.addto.object, b!=0);
00366       fprintf(out, ")");
00367       break;
00368     case includein: 
00369       fprintf(out, "sink->includeIn(");
00370       print_value(out, a->as.includein.outer, b!=0);
00371       fprintf(out, ", ");
00372       print_value(out, a->as.includein.inner, b!=0);
00373       fprintf(out, ")");
00374       break;
00375     case appendtotext:
00376       fprintf(out, "append_to_text_buffer(");
00377       print_value_text(out, a->as.appendtotext.text);
00378       fprintf(out, ")");
00379       break;
00380     case cleartext:
00381       fprintf(out, "clear_text_buffer()");
00382       break;
00383     }
00384       
00385     fprintf(out, ";\n");
00386     a = a->next;
00387   }
00388   fprintf(out, "      }\n");
00389 
00390 }
00391 void print_term(FILE *out, Grammar *g, Term *t) {
00392   switch(t->type) {
00393   case unused: fprintf(out, "<unused>"); 
00394     break;
00395   case literal: {
00396       char c = t->data.literal;
00397       /* HACK short term solution - what's generally portable for yaccs? */
00398       if (c == '\\') {
00399       fprintf(out, "'\\\\'");
00400       } else if (c == '\'') {
00401       fprintf(out, "'\\''");
00402       } else if (c == '\n') {
00403       fprintf(out, "'\\n'");
00404       } else if (c == '\r') {
00405       fprintf(out, "'\\r'");
00406       } else {
00407       fprintf(out, "'%c'", c);
00408       }
00409     }
00410     break;
00411   case rule_name: 
00412     /* we could resolve it, or just let yacc do it.... */
00413     fprintf(out, "%s", t->data.rule_name);
00414     break;
00415   case rule_pointer: 
00416     /* we could resolve it, or just let yacc do it.... */
00417     fprintf(out, "%s", t->data.rule_pointer->name);
00418     break;
00419   }
00420   assert(t->name);
00421 #if COMMENT_YACC_RULES
00422   if (t->name[0] != '_') fprintf(out, " /*=\"%s\"*/  ", t->name);
00423 #endif
00424 }
00425 
00426 void print_branch(FILE *out, Grammar *g, Branch *b) {
00427   DLTreeNode *n = b->tree.first;
00428   while (n) {
00429     print_term(out, g, (Term *)n);
00430     print_actions(out, ((Term *)n)->actions, (n->next == 0), b);
00431     n=n->next;
00432     if (n) fprintf(out, " ");
00433   }
00434 }
00435 
00436 void print_rule(FILE *out, Grammar *g, Rule *r) {
00437   DLTreeNode *n = r->tree.first;
00438 
00439   assert(n);  /* rule with no branches....?    nah.  */
00440 
00441   fprintf(out, "%s\n  : ", r->name);
00442   while (n) {
00443     print_branch(out, g, (Branch *)n);
00444     n=n->next;
00445     if (n) fprintf(out, "\n  | ");
00446   }
00447   fprintf(out, "\n");
00448   fprintf(out, "\n");
00449 }
00450 
00451 void print_grammar(FILE *out, Grammar *g) {
00452   DLTreeNode *n = g->tree.first;
00453 
00454   post_process_charsets(g);
00455   fprintf(out, "%%{\n\n");
00456   fprintf(out, "static char id[] = \"machine generated by blindfold (should have various version infos)\";\n");
00457   fprintf(out, "#include <parser_common_head.h>\n");
00458   fprintf(out, "\n%%}\n\n");
00459   fprintf(out, "%%%%\n");
00460 
00461   while (n) {
00462     print_rule(out, g, (Rule *)n);
00463     n=n->next;
00464   }
00465 
00466   fprintf(out, "%%%%\n");
00467 
00468   fprintf(out, "\n\nvoid top_level_actions()\n");
00469   print_actions(out, g->actions, 1, 0);
00470   fprintf(out, "\n");
00471 
00472   fprintf(out, "#include <parser_common_foot.h>\n");
00473 }
00474   
00475