00001 static char const
00002 cvsid[] = "$Id: main_8cpp-source.html,v 1.11 2001/10/10 20:40:59 sandro Exp $";
00003 #define VERSION "Blindfold Version 0.1.1 (main $Revision: 1.11 $ $Date: 2001/10/10 20:40:59 $)"
00004 #define TRACE_NAME "main"
00005
00010 #include <getopt.h>
00011 #include <sstream>
00012 #include <cstdio>
00013
00014 #include "Parser.h"
00015 #include "GrammarManager.h"
00016 #include "TripleSink.h"
00017 #include "LoggingTripleSink.h"
00018 #include "XSBAgent.h"
00019 #include "InMemoryPool.h"
00020 #include "Query.h"
00021 #include "XSBQuery.h"
00022 #include "XSBPool.h"
00023 #include "NTriplesParser.h"
00024 #include "NTriplesGenerator.h"
00025 #include "FlowManager.h"
00026 #include "ForeignVariableScope.h"
00027 #include "Trace.h"
00028 #include "Rule.h"
00029
00030 char* programName;
00031 bool gotCommand = false;
00032 std::string command;
00033
00034 void help() {
00035 std::ostream& o = std::cerr;
00036
00037 if (gotCommand) {
00038 if (command == "conjoin") {
00039 o << "The conjoin operation returns all triples in any of the input datasets, modifying\nthe names of existential variables so they do not collide." << endl;
00040 } else {
00041 o << "No documentation for \"" << command << "\"" << endl;
00042 }
00043 return;
00044 }
00045
00046 o <<
00047
00048 "\nusage: " << programName << " [general options] command [dataset...]\n"
00049 "\n"
00050 "GENERAL OPTIONS:\n"
00051 " -? --help [command] Give this help message, or help on the command\n";
00052 if (DO_TRACE) {
00053 o << " -t --trace module-name Turn tracing on for named module\n";
00054 } else {
00055 o << "(-t --trace module-name) (Tracing turned off at compile-time)\n";
00056 }
00057 o <<
00058
00059 " -v --version Report software version\n"
00060 "\n"
00061 "COMMANDS:\n"
00062 " report Report number of triples in conjoin, common, posdiff, negdiff\n"
00063 " conjoin Combine the datasets, keeping bNodes distincts\n"
00064 " common Find the greatest common subgraph of two datasets\n"
00065 " diff Give posdiff & negdiff, unix style\n"
00066 " posdiff What would you need to add to dataset B to equal dataset A?\n"
00067 " negdiff What would you need to add to dataset A to equal dataset B?\n"
00068 "\n"
00069 "DATASET SYNTAXES:\n"
00070 " default N-Triples file name, relative to current directory\n"
00071 "\n"
00072 "For more information see http://www.w3.org/2001/06/blindfold\n\n";
00073 }
00074
00075
00076 static void loadFile(Pool& pool, const char* filename)
00077 {
00078 NTriplesParser parser;
00079
00080 ForeignVariableScope tmp(pool.getScope());
00081 parser.setSink(&pool, &tmp);
00082 FlowManager::main.streamIn(filename, parser, true);
00083 FlowManager::main.run();
00084
00085
00086 }
00087
00088 static Pool* compare(Agent& agent,
00089 TripleSource *lefts, TripleSource *rights,
00090 bool intersect, int max)
00091 {
00092 TripleSource* result = 0;
00093 SimpleVariableScope leftScope;
00094 SimpleVariableScope rightScope;
00095
00096 QueryablePool* left = agent.newQueryablePool(&leftScope);
00097 QueryablePool* right = agent.newQueryablePool(&rightScope);
00098
00099 left->mUnion(*lefts);
00100 right->mUnion(*rights);
00101
00102
00103 TRACE "compare left: " << *left << endl;
00104
00105 TRACE "compare right: " << *right << endl;
00106
00107 if (intersect) {
00108 result = left->intersect(*right, max);
00109 } else {
00110 result = left->diff(*right, max);
00111 }
00112
00113 TRACE "compare RESULT: " << *result << endl;
00114
00115
00116 InMemoryPool* returnable = new InMemoryPool(*result);
00117
00118 delete left;
00119 delete right;
00120
00121 return returnable;
00122 }
00123
00124 int main(int argc, char** argv)
00125 {
00126 programName = argv[0];
00127 bool wantsHelp = false;
00128 int c;
00129 int option_index = 0;
00130 std::vector<TripleSource*> sources;
00131 int max=0x7fffffff;
00132 static struct option long_options[] =
00133 {
00134 {"help", 0, 0, '?'},
00135 {"trace", 1, 0, 't'},
00136 {"output", 1, 0, 'o'},
00137 {"max", 1, 0, 'm'},
00138 {"version", 0, 0, 'v'},
00139 {"rule", 0, 0, 'r'},
00140 {"universal-variable-pattern", 1, 0, 'u'},
00141 {0, 0, 0, 0}
00142 };
00143 static char* short_options =
00144 "-"
00145 "t:"
00146 "o:"
00147 "m:"
00148 "v"
00149 "r"
00150 "u:"
00151 "?";
00152 char *varPattern = ".*#(_|[A-Z]).*";
00153 std::vector<Rule*> rules;
00154 int gettingRule = 0;
00155 const TripleSource* rulePremise;
00156
00157 while (1) {
00158
00159 c = getopt_long(argc, argv, short_options, long_options, 0);
00160
00161 if (c == -1) break;
00162
00163 switch (c) {
00164
00165 case 1:
00166 if (gotCommand || gettingRule) {
00167 Pool* s = new InMemoryPool();
00168 loadFile(*s, optarg);
00169 TRACE "Loaded pool from " << optarg << *s << endl;
00170 if (gettingRule == 2) {
00171 TRACE "... and saved it as rule premise" << endl;
00172 rulePremise = s;
00173 gettingRule--;
00174 } else if (gettingRule == 1) {
00175 TRACE "... and using it as rule conclusion" << endl;
00176 rules.push_back(new Rule(*rulePremise, *s, varPattern));
00177 delete rulePremise;
00178 delete s;
00179 gettingRule--;
00180 } else {
00181 TRACE "... and saved it as source #" << sources.size() << endl;
00182 sources.push_back(s);
00183 }
00184 } else {
00185 command = optarg;
00186 gotCommand = true;
00187 }
00188 break;
00189
00190 case 'r':
00191 gettingRule = 2;
00192 break;
00193
00194 case 'u':
00195 varPattern = optarg;
00196 break;
00197
00198 case 't':
00199 Trace::set(optarg, true);
00200 break;
00201
00202 case 'm':
00203 max = atoi(optarg);
00204 break;
00205
00206 case 'o':
00207 NOT_IMPLEMENTED;
00208
00209 case 'v':
00210 std::cout << VERSION << endl;
00211 exit(0);
00212
00213 case ':':
00214 fprintf(stderr, "missing option arguments\n");
00215 wantsHelp = true;
00216 break;
00217
00218 case '?':
00219 if (optopt == '?') {
00220 wantsHelp = true;
00221 } else {
00222 std::cerr << "Try --help for a summary of options.\n";
00223 exit(2);
00224 }
00225 break;
00226
00227 default:
00228 die("getopt returned character code " << c << endl);
00229
00230 }
00231 }
00232
00233 if (wantsHelp || gotCommand==false) {
00234 help();
00235 exit(2);
00236 }
00237
00238 XSBAgent agent;
00239 int exitStatus = 0;
00240
00241 if (command == "conjoin") {
00242 InMemoryPool pool;
00243
00244 for (std::vector<TripleSource*>::iterator i=sources.begin(); i!=sources.end(); ++i) {
00245 pool.mUnion(**i);
00246 }
00247 pool.printNTriples(std::cout);
00248 } else if (command == "common") {
00249 if (sources.size() != 2) {
00250 std::cerr << "the 'common' command uses two datasets\n";
00251 exit(2);
00252 }
00253 TripleSource* p = compare(agent, sources[0], sources[1], true, max);
00254 p->printNTriples(std::cout);
00255 delete p;
00256 } else if (command == "posdiff") {
00257 if (sources.size() != 2) {
00258 std::cerr << "the 'posdiff' command uses two datasets\n";
00259 exit(2);
00260 }
00261 TripleSource* p = compare(agent, sources[0], sources[1], false, max);
00262 TRACE "Diff results: " << *p;
00263 p->printNTriples(std::cout);
00264 if (p->count() > 0) exitStatus=1;
00265 delete p;
00266 } else if (command == "negdiff") {
00267 if (sources.size() != 2) {
00268 std::cerr << "the 'negdiff' command uses two datasets\n";
00269 exit(2);
00270 }
00271 TripleSource* p = compare(agent, sources[1], sources[0], false, max);
00272 p->printNTriples(std::cout);
00273 if (p->count() > 0) exitStatus=1;
00274 delete p;
00275 } else if (command == "diff") {
00276 if (sources.size() != 2) {
00277 std::cerr << "the 'diff' command uses two datasets\n";
00278 exit(2);
00279 }
00280 Pool* diff = compare(agent, sources[0], sources[1], false, max);
00281 size_t diffCount = diff->count();
00282
00283 Pool* rdiff = compare(agent, sources[1], sources[0], false, max);
00284 size_t rdiffCount = rdiff->count();
00285
00286 if (diffCount + rdiffCount > 0) exitStatus = 1;
00287 rdiff->printNTriples(std::cout, "< ");
00288 diff->printNTriples(std::cout, "> ");
00289
00290 delete diff;
00291 delete rdiff;
00292 } else if (command == "report") {
00293 if (sources.size() != 2) {
00294 std::cerr << "the 'report' command uses two datasets\n";
00295 exit(2);
00296 }
00297 bool different = false;
00298
00299 std::cout << sources[0]->count() << " triples in dataset A" << endl;
00300 std::cout << sources[1]->count() << " triples in dataset B" << endl;
00301
00302 InMemoryPool u(*sources[0]);
00303 u.mUnion(*sources[1]);
00304
00305 std::cout << u.count() << " triples in conjoin A B" << endl;
00306
00307 TripleSource* inter = compare(agent, sources[0], sources[1], true, max);
00308 size_t interCount = inter->count();
00309 std::cout << interCount << " triples in common A B" << endl;
00310
00311 Pool* diff = compare(agent, sources[0], sources[1], false, max);
00312 size_t diffCount = diff->count();
00313 std::cout << diffCount << " triples in posdiff A B (A-common(A,B))" << endl;
00314
00315 Pool* rdiff = compare(agent, sources[1], sources[0], false, max);
00316 size_t rdiffCount = rdiff->count();
00317 std::cout << rdiffCount << " triples in negdiff A B (B-common(A,B))" << endl;
00318
00319 if (diffCount + rdiffCount > 0) exitStatus = 1;
00320 } else {
00321 std::cout << "Unknown command \"" << command << "\"\n.";
00322 exit(2);
00323 }
00324 agent.shutdown();
00325 exit(exitStatus);
00326 }
00327
00328 #undef TRACE_NAME