00001
00002
00003
00004
00005
00006
00007 static char id[] = "$Id: abstract__grammar_8c-source.html,v 1.10 2001/10/10 20:40:58 sandro Exp $";
00008
00009
00010 #include "config.h"
00011
00012 #include <stdlib.h>
00013 #include <stdio.h>
00014 #include <malloc.h>
00015 #include <string.h>
00016 #include <assert.h>
00017
00018 #include "abstract_grammar.h"
00019
00020 Grammar *new_grammar() {
00021 Grammar *g = dltree_alloc(sizeof(Grammar));
00022 return g;
00023 }
00024
00025 Rule* add_charset(Grammar *g, char present[256]) {
00026 Charset *other;
00027
00028
00029 for (other = g->first_charset; other; other=other->next_in_grammar) {
00030 int i;
00031 for (i=0; i<256; i++) {
00032 if (other->present[i] != present[i]) goto no_match;
00033 }
00034 return other->rule;
00035 }
00036
00037 no_match:
00038 {
00039 Charset *s = calloc(1, sizeof(Charset));
00040 int i;
00041 for (i=0; i<256; i++) {
00042 if (present[i]) s->bits++;
00043 }
00044 memcpy(s->present, present, 256*sizeof(char));
00045 memcpy(s->present_only_here, present, 256);
00046
00047 s->rule = add_anonymous_rule(g);
00048
00049 s->next_in_grammar = g->first_charset;
00050 g->first_charset = s;
00051 return s->rule;
00052 }
00053 }
00054
00055 static void fill_in_rule_for_charset(Grammar *g, Charset *s)
00056 {
00057 Rule *r1 = add_anonymous_rule(g);
00058 Term *t;
00059 Action *a;
00060 int c;
00061 Branch *b;
00062
00063
00064 for(c=0; c<=255; c++) {
00065 if (s->present_only_here[c]) {
00066 b = add_branch(r1);
00067 t = add_term(b);
00068 t->type = literal;
00069 t->data.literal = (char) c;
00070 t->name = strdup("_charset_literal");
00071
00072 a = (Action*) calloc(1, sizeof(Action));
00073 a->type = nop;
00074 t->actions = a;
00075 }
00076 }
00077
00078 {
00079 struct charset_list_node *n;
00080 int count=0;
00081 for (n=s->inclusions; n; n=n->next) {
00082 b = add_branch(r1);
00083 t = add_term(b);
00084 t->type = rule_pointer;
00085 t->data.rule_pointer = n->charset->rule;
00086 t->name = strdup("_charset_subset");
00087 if (count++ > 32) {
00088 fprintf(stderr, "Warning, weird charset inclusions.\n");
00089 break;
00090 }
00091 }
00092 }
00093
00094 b = add_branch(s->rule);
00095 t = add_term(b);
00096 t->type = rule_pointer;
00097 t->data.rule_pointer = r1;
00098 t->name = strdup("_charset_action");
00099
00100 a = (Action*) calloc(1, sizeof(Action));
00101 a->type = onechar;
00102 t->actions = a;
00103 }
00104
00105 void post_process_charsets(Grammar *g)
00106 {
00107
00108
00109
00110
00111
00112
00113
00114 Charset *big;
00115 Charset *small;
00116
00117 for (big = g->first_charset; big; big=big->next_in_grammar) {
00118 fprintf(stderr, "Does %s include anythig?\n", big->rule->name);
00119 for (small = g->first_charset; small; small=small->next_in_grammar) {
00120 int i;
00121
00122 if (big == small) continue;
00123
00124 fprintf(stderr, " Like %s?\n", small->rule->name);
00125 for (i=0; i<256; i++) {
00126 if (small->present[i] && !big->present[i]) goto next_small;
00127 }
00128 for (i=0; i<256; i++) {
00129 if (small->present[i]) big->present_only_here[i] = 0;
00130 }
00131 fprintf(stderr, " yes!\n");
00132 {
00133 struct charset_list_node *n =
00134 malloc(sizeof(struct charset_list_node));
00135 fprintf(stderr, "Including %s in %s\n", small->rule->name,
00136 big->rule->name);
00137 n->next = big->inclusions;
00138 big->inclusions = n;
00139 n->charset = small;
00140 }
00141 continue;
00142 next_small:
00143 fprintf(stderr, " no\n");
00144 }
00145 }
00146
00147
00148
00149
00150 for (big = g->first_charset; big; big=big->next_in_grammar){
00151 fill_in_rule_for_charset(g, big);
00152 }
00153 }
00154
00155 Rule* add_anonymous_rule(Grammar *g) {
00156 static char buf[80];
00157 sprintf(buf, "_%d", ++(g->anons));
00158 return add_rule(g, buf);
00159 }
00160
00161 Rule* add_rule(Grammar *g, char *name) {
00162 Rule *r = (Rule *) dltree_alloc(sizeof(Rule));
00163 dltree_append_child(g, r);
00164 r->name=strdup(name);
00165 return r;
00166 }
00167
00168 Rule *obtain_rule(Grammar *g, char *name);
00169
00170 Branch* add_branch(Rule *r) {
00171 Branch *b = (Branch *) dltree_alloc(sizeof(Branch));
00172 dltree_append_child(r, b);
00173 return b;
00174 }
00175
00176 Term *add_term(Branch *b) {
00177 Term *t = (Term *) dltree_alloc(sizeof(Term));
00178 dltree_append_child(b, t);
00179 t->type = unused;
00180 return t;
00181 }
00182
00183 void grammar_copy_term(Term *t, Term *dest) {
00184 dest->type = t->type;
00185 switch(t->type) {
00186 case unused:
00187 break;
00188 case literal:
00189 dest->data.literal = t->data.literal;
00190 break;
00191 case rule_name:
00192 dest->data.rule_name = strdup(t->data.rule_name);
00193 break;
00194 case rule_pointer:
00195 dest->data.rule_pointer = t->data.rule_pointer;
00196 break;
00197 }
00198 dest->name = strdup(t->name);
00199 dest->actions = 0;
00200 }
00201
00202 void print_value(FILE *out, Value* value, int in_rule) {
00203 switch (value->type) {
00204 case unused: abort();
00205 case text_buffer:
00206 fprintf(out, "symbolFor(text_buffer(), sink->getSink())");
00207 break;
00208 case literal_string:
00209 fprintf(out, "symbolFor(\"%s\", sink->getSink())", value->as.literal_string.string);
00210 break;
00211 case tuple:
00212 {
00213 Value *v = value->as.tuple.first;
00214 for ( ; v; v=v->next) {
00215 print_value(out, v, in_rule);
00216 if (v->next) fprintf(out, ", ");
00217 }
00218 }
00219 break;
00220 case rdfid:
00221 fprintf(out, "Symbol(space, \"%s\")", value->as.rdfid.id);
00222 break;
00223 case local_name:
00224 fprintf(out, "obtain_local(\"%s\")", value->as.local_name.name);
00225 break;
00226 case current_content:
00227 {
00228 if (in_rule) {
00229 fprintf(out, "$$.content");
00230 } else {
00231 fprintf(out, "result.content");
00232 }
00233 break;
00234 }
00235 case subst:
00236 {
00237 char var[16];
00238
00239 if (in_rule) {
00240 if (value->as.subst.position == 0) {
00241 sprintf(var, "$$");
00242 } else {
00243 sprintf(var, "$%d", value->as.subst.position);
00244 }
00245 } else {
00246 sprintf(var, "result");
00247 }
00248
00249 switch (value->as.subst.part) {
00250 case value_part:
00251 fprintf(out, "%s.value", var);
00252 break;
00253 case content_part:
00254 fprintf(out, "%s.content", var);
00255 break;
00256 case text_part:
00257 fprintf(out,
00258 "symbolFor(%s.text, %s.text_end, sink->getSink())",
00259 var, var);
00260 break;
00261 }
00262 break;
00263 }
00264 }
00265 }
00266
00267 void print_value_text(FILE *out, Value* value) {
00268 switch (value->type) {
00269 case unused: abort();
00270 case text_buffer:
00271 fprintf(out, "text_buffer()");
00272 break;
00273 case literal_string:
00274
00275 fprintf(out, "\"%s\"", value->as.literal_string.string);
00276 break;
00277 case tuple:
00278 die("type mismatch, can't convert tuple to string");
00279 break;
00280 case current_content:
00281 die("type mismatch, can't convert current_content to string");
00282 break;
00283 case rdfid:
00284 die("type mismatch, can't convert rdfid to string");
00285 break;
00286 case local_name:
00287 die("type mismatch, can't convert local name to string");
00288 break;
00289 case subst:
00290 if (value->as.subst.position == 0) {
00291 fprintf(out, "$$");
00292 } else {
00293 fprintf(out, "$%d", value->as.subst.position);
00294 }
00295 switch (value->as.subst.part) {
00296 case value_part:
00297 die("type mismatch, can't convert value symbol to string"); break;
00298 case content_part:
00299 die("type mismatch, can't convert content symbol to string"); break;
00300 case text_part:
00301 fprintf(out, ".text"); break;
00302 }
00303 break;
00304 }
00305 }
00306
00307 void print_actions(FILE *out, Action* action, int last, Branch *b) {
00308 Action *a = action;
00309 if (a == 0 && !last) {
00310 return;
00311 }
00312
00313 if (b) fprintf(out, "[YYVALID;]\n");
00314
00315 if (a && a->type == nop) {
00316 if (b) fprintf(out, " { }");
00317 return;
00318 }
00319
00320 fprintf(out, "\n {\n");
00321
00322 if (b) {
00323 fprintf(out, " $$.content = symbolForAnonymous(\"content\", sink->getSink());\n");
00324 fprintf(out, " $$.value = symbolForAnonymous(\"value\", sink->getSink());\n");
00325 }
00326
00327 if (b) {
00328
00329
00330 int bytes=0;
00331 int term=0;
00332 int i;
00333 DLTreeNode *n = b->tree.first;
00334 for (i=1; n; i++, n=n->next) {
00335 Term *t = (Term *)n;
00336 if ( t->type == literal ) {
00337 bytes += 1;
00338 } else {
00339 term = i;
00340 break;
00341 }
00342 }
00343
00344 if (a && a->type == onechar) { bytes=1; term=0; }
00345
00346 if (term == 0) {
00347 fprintf(out, " $$.text = yylex_buffer - %d;\n", bytes);
00348 } else {
00349 fprintf(out, " $$.text = $%d.text - %d;\n", term, bytes);
00350 }
00351
00352 fprintf(out, " $$.text_end = yylex_buffer;\n");
00353 }
00354
00355 while (a) {
00356 fprintf(out, " ");
00357 switch (a->type) {
00358 case unused: abort();
00359 case nop: break;
00360 case onechar: break;
00361 case addto:
00362 fprintf(out, "sink->addTo(");
00363 print_value(out, a->as.addto.set, b!=0);
00364 fprintf(out, ", ");
00365 print_value(out, a->as.addto.object, b!=0);
00366 fprintf(out, ")");
00367 break;
00368 case includein:
00369 fprintf(out, "sink->includeIn(");
00370 print_value(out, a->as.includein.outer, b!=0);
00371 fprintf(out, ", ");
00372 print_value(out, a->as.includein.inner, b!=0);
00373 fprintf(out, ")");
00374 break;
00375 case appendtotext:
00376 fprintf(out, "append_to_text_buffer(");
00377 print_value_text(out, a->as.appendtotext.text);
00378 fprintf(out, ")");
00379 break;
00380 case cleartext:
00381 fprintf(out, "clear_text_buffer()");
00382 break;
00383 }
00384
00385 fprintf(out, ";\n");
00386 a = a->next;
00387 }
00388 fprintf(out, " }\n");
00389
00390 }
00391 void print_term(FILE *out, Grammar *g, Term *t) {
00392 switch(t->type) {
00393 case unused: fprintf(out, "<unused>");
00394 break;
00395 case literal: {
00396 char c = t->data.literal;
00397
00398 if (c == '\\') {
00399 fprintf(out, "'\\\\'");
00400 } else if (c == '\'') {
00401 fprintf(out, "'\\''");
00402 } else if (c == '\n') {
00403 fprintf(out, "'\\n'");
00404 } else if (c == '\r') {
00405 fprintf(out, "'\\r'");
00406 } else {
00407 fprintf(out, "'%c'", c);
00408 }
00409 }
00410 break;
00411 case rule_name:
00412
00413 fprintf(out, "%s", t->data.rule_name);
00414 break;
00415 case rule_pointer:
00416
00417 fprintf(out, "%s", t->data.rule_pointer->name);
00418 break;
00419 }
00420 assert(t->name);
00421 #if COMMENT_YACC_RULES
00422 if (t->name[0] != '_') fprintf(out, " /*=\"%s\"*/ ", t->name);
00423 #endif
00424 }
00425
00426 void print_branch(FILE *out, Grammar *g, Branch *b) {
00427 DLTreeNode *n = b->tree.first;
00428 while (n) {
00429 print_term(out, g, (Term *)n);
00430 print_actions(out, ((Term *)n)->actions, (n->next == 0), b);
00431 n=n->next;
00432 if (n) fprintf(out, " ");
00433 }
00434 }
00435
00436 void print_rule(FILE *out, Grammar *g, Rule *r) {
00437 DLTreeNode *n = r->tree.first;
00438
00439 assert(n);
00440
00441 fprintf(out, "%s\n : ", r->name);
00442 while (n) {
00443 print_branch(out, g, (Branch *)n);
00444 n=n->next;
00445 if (n) fprintf(out, "\n | ");
00446 }
00447 fprintf(out, "\n");
00448 fprintf(out, "\n");
00449 }
00450
00451 void print_grammar(FILE *out, Grammar *g) {
00452 DLTreeNode *n = g->tree.first;
00453
00454 post_process_charsets(g);
00455 fprintf(out, "%%{\n\n");
00456 fprintf(out, "static char id[] = \"machine generated by blindfold (should have various version infos)\";\n");
00457 fprintf(out, "#include <parser_common_head.h>\n");
00458 fprintf(out, "\n%%}\n\n");
00459 fprintf(out, "%%%%\n");
00460
00461 while (n) {
00462 print_rule(out, g, (Rule *)n);
00463 n=n->next;
00464 }
00465
00466 fprintf(out, "%%%%\n");
00467
00468 fprintf(out, "\n\nvoid top_level_actions()\n");
00469 print_actions(out, g->actions, 1, 0);
00470 fprintf(out, "\n");
00471
00472 fprintf(out, "#include <parser_common_foot.h>\n");
00473 }
00474
00475