// compare.cc

#include "compare.h"
#include "ntc.h"
#include <iostream>
#include <set>
#include <vector>
#include <map>
#include <algorithm>
using namespace std;

struct signature {

	int sc;
	int pc;
	int oc;

	signature() : sc(0), pc(0), oc(0) {}

	signature(int ss, int pp, int oo) :
		sc(ss), pc(pp), oc(oo)
	{}

	bool operator==(const signature& t) const {
		return sc==t.sc && pc==t.pc && oc==t.oc;
	}

	bool operator<(const signature& t) const {
		return	sc < t.sc ||
				sc == t.sc && pc < t.pc ||
				sc == t.sc && pc == t.pc && oc < t.oc;
	}

};

inline ostream& operator<<(ostream& o, const signature& s) {
	return o << "(" << s.sc << ", " << s.pc << ", " << s.oc << ")";
}

typedef map<signature,set<int> > SIGMAP;
typedef map<int,set<signature> > SIGMAPSIZE;
typedef vector<signature> SIGVEC;
typedef map<signature,vector<int> > PERMMAP;

void DEBUG_dump_sigset(SIGMAP& s, map<int,string>& namemap);
void DEBUG_dump_sigmapsize(SIGMAPSIZE& sms);
void DEBUG_dump_sigorder(SIGVEC& so);

void compute_signature(map<int,signature>& sm, set<triple>& atriples);
void collate_sigmap_size(SIGMAPSIZE& sms, SIGMAP& sm);
void produce_sigorder(SIGVEC& so, SIGMAPSIZE& sms);
void load_initial_permutation(signature& cursig,
	PERMMAP& permutations,
	SIGMAP& sigmap);

bool compare(model& m1, model& m2) {

	/*
		We start by checking that the two static statement
		sets are the same.
	*/

	if (m1.striples.size() != m2.striples.size()) {
		if (NOISY) {
			cerr << "Non-anonymous statement sets of different size" << endl;
		} // if (NOISY)
		return false;
	}

	/*
		Check that the set of nonanonymous nodes are of the same size
	*/

	if (m1.nextpos != m2.nextpos) {
		if (NOISY) {
			cerr << "Non-anonymous node sets of different size" << endl;
		} // if (NOISY)
		return false;
	}

	/*
		Perform the same test on anonymous nodes
	*/

	if (m1.nextneg != m2.nextneg) {
		if (NOISY) {
			cerr << "The number of distinct anonymous nodes differs between models"
				<< endl;
		} // if (NOISY)
		return false;
	}
	int nextneg = m1.nextneg;

	/*
		Throughout this test, we'll be using a mapping function
		from the ids used by m1 to the ids used by m2.
		The nonanonymous part of this is fixed, and we compute this
		now.
		We use MAP[id]->id. We do this using a cheeky offset into an array
		of the appropriate size.
	*/

	int mm[m1.nextpos - nextneg];
	int * const MAP = mm + -nextneg;

	for (int i=1; i<m1.nextpos; i++) {
		map<string,int>::const_iterator nodei = m2.id.find(m1.name[i]);
		if (nodei == m2.id.end()) {
			if (NOISY) {
					cerr << "Node " << m1.name[i] <<
						" doesn't occur in second model" << endl;
			} // if (NOISY)
			return false;
		}

		MAP[i] = nodei->second;
	}

	/*
		Once we get this far, we know that the mapping of nonanonymous
		nodes is ok; we check the nonanonymous statements of m1 against
		those of m2 under this mapping
	*/

	set<triple>::const_iterator i = m1.striples.begin();
	while (i != m1.striples.end()) {

		if (m2.striples.end() ==
			m2.striples.find(triple(MAP[(*i).s], MAP[(*i).p], MAP[(*i).o]))
		) {
			if (NOISY) {
				cerr << "Nonanonymous statement not in second model:" << endl;
				cerr <<
					m1.name[(*i).s] << " " <<
					m1.name[(*i).p] << " " <<
					m1.name[(*i).o] << " ." << endl;
			} // if (NOISY)
			return false;
		}

		++ i;
	}

	/*
		By now, we know that the non-anonymous statement sets are the same.
		What remains? Well, we need to find if there exists a bijective
		mapping from the anonymous resources of the first model to those
		of the second model under which the atriples of the two models
		are congruent.

		We start with the simple counting tests. After that, what unfortunately
		remains is to test permutations of the anonymous nodes. This is,
		in the pathological case, O(n!). Now, this might be plausible for
		small test csaes - we nevertheless adopt a heuristic, namely to
		classify the anonymous nodes in each model into sets with the same
		'signature'.

		Once the compartmentalisation is done, we first check that the
		sizes of compartments match per signature, then generate
		permutations over the signature sets.

		The signatures can be generated in any manner that does not rely
		on the names of anonymous nodes. In this case, we simply use the
		triple (sc, pc, oc) of occurrence counts: how many atriples
		a particular node appears as the S, P or O of.
	*/

	if (m1.atriples.size() != m2.atriples.size()) {
		if (NOISY) {
			cerr << "The number of statements containing anonymous nodes differs"
				<< endl;
		} // if (NOISY)
		return false;
	}
	if (m1.atriples.size() == 0) return true;

	/*
		Compartmentalise the anonymous nodes.
	*/

	SIGMAP sigmap1, sigmap2;

	/*
		Calculate signature for each node and store it.
	*/

	map<int,signature> sm1, sm2;
	compute_signature(sm1, m1.atriples);
	compute_signature(sm2, m2.atriples);

	/*
		Collate anonymous nodes by signature into sets
	*/

	for (int an = -1; an > nextneg; an --) {
		sigmap1[sm1[an]].insert(an);
		sigmap2[sm2[an]].insert(an);
	}

	/*
		DEBUG: output by signatures
	*/

	// DEBUG_dump_sigset(sigmap1, m1.name);
	// DEBUG_dump_sigset(sigmap2, m2.name);

	/*
		Collate size of signature collection together with signature
	*/

	SIGMAPSIZE smsize1, smsize2;
	collate_sigmap_size(smsize1, sigmap1);
	collate_sigmap_size(smsize2, sigmap2);

	// DEBUG_dump_sigmapsize(smsize1);
	// DEBUG_dump_sigmapsize(smsize2);

	/*
		As another quick check, we ensure that the set of (signatures
		with _n_ anonymous nodes generating them) is the same
		for each value of _n_
	*/

	if (smsize1 != smsize2) {
		if (NOISY) {
			cerr << "the singature sets do not match by size" << endl;
		} // if (NOISY)
		return false;
	}

	/*
		Now we do the setup for permutation generation.
		First, we extract into a vector the signatures in
		descending order of correspondence set size.
	*/

	SIGVEC sigorder;
	produce_sigorder(sigorder, smsize1);
	// DEBUG_dump_sigorder(sigorder);

	SIGVEC::iterator cursig=sigorder.begin();
	PERMMAP permutations;

	/*
		Set up the permutation for this signature
	*/

	load_initial_permutation(*cursig, permutations, sigmap2);

	while (true) {

		/*
			Is this a signature or at the end?
			If the latter, we've finally got a finished potential
			permutation set up. Test the atriples against that
			mapping and stop if it works.
		*/

		if (cursig == sigorder.end()) {

		if (NOISY) {
			// cerr << "DEBUG: trying permutation" << endl;
		} // if (NOISY)
			set<triple>::const_iterator i = m1.atriples.begin();
			bool ok = true;
			while (i != m1.atriples.end()) {

				if (m2.atriples.end() ==
					m2.atriples.find(triple(MAP[(*i).s],
											MAP[(*i).p],
											MAP[(*i).o]))
				) {
					ok = false;
		if (NOISY) {
					// cerr << "DEBUG: ... permutation fails" << endl;
		} // if (NOISY)
					break;
				}

				++ i;
			}

			if (ok) {

				/*
					We have an answer.
				*/

				if (NOISY) {
						cout << "m1 node\tm2 node" << endl;
						for (int n = -1; n > nextneg; n--) {
							cout << m1.name[n] << "\t" << m2.name[MAP[n]] <<endl;
						}
				} // if (NOISY)
				return true;

			}

			/*
				Back up to the previous signature.
			*/

		backup:
			if (cursig == sigorder.begin()) break;
			if (NOISY) {
				// cerr << "DEBUG: backing up." << endl;
			} // if (NOISY)
			-- cursig;

			/*
				Try the next permutation. If none left, back up again.
			*/
			if (! next_permutation(permutations[*cursig].begin(),
				permutations[*cursig].end()) )
				goto backup;

			continue;

		}

		if (NOISY) {
			// cerr << "DEBUG: mooching signature " << *cursig << endl;
		} // if (NOISY)

		/*
			Copy the current permutation into MAP
		*/

		set<int>::const_iterator i1 = sigmap1[*cursig].begin();
		vector<int>::const_iterator i2 = permutations[*cursig].begin();
		
		for (; i1 != sigmap1[*cursig].end(); ++i1, ++i2) {
			MAP[*i1] = *i2;
		}

		/*
			Move on to the next signature - set up the first
			permutation and reloop to try it.
		*/

		++ cursig;
		load_initial_permutation(*cursig, permutations, sigmap2);

	}

	if (NOISY) {
		cerr << "Exhaustive search fails" << endl;
	} // if (NOISY)
	return false;

}

void compute_signature(map<int,signature>& sm, set<triple>& atriples) {
	for (set<triple>::const_iterator i = atriples.begin();
		i != atriples.end(); ++ i)
	{
		if ((*i).s < 0) sm[(*i).s].sc ++;
		if ((*i).p < 0) sm[(*i).p].pc ++;
		if ((*i).o < 0) sm[(*i).o].oc ++;
	}
}

void DEBUG_dump_sigset(SIGMAP& s, map<int,string>& namemap) {
	if (NOISY) {
		cerr << "DEBUG: m's signatures with anon nodes" << endl;
		for (SIGMAP::const_iterator ddi = s.begin(); ddi != s.end(); ++ ddi)
		{
			cerr << "Signature: " << (ddi->first);
			for (set<int>::const_iterator dai = (ddi->second).begin();
				dai != (ddi->second).end(); ++dai)
			{
				cerr << " " << namemap[*dai];
			}
			cerr << endl;
		}
	} // if (NOISY)
}

void collate_sigmap_size(SIGMAPSIZE& sms, SIGMAP& sm) {
	for (SIGMAP::const_iterator i = sm.begin(); i != sm.end(); ++i) {
		sms[(i->second).size()].insert(i->first);
	}
}

void DEBUG_dump_sigmapsize(SIGMAPSIZE& sms) {
	if (NOISY) {
		cerr << "DEBUG: m's signatures by size of set" << endl;
		for (SIGMAPSIZE::const_iterator i = sms.begin(); i != sms.end(); ++ i)
		{
			cerr << "Size of set: " << (i->first) << " - ";
			for (set<signature>::const_iterator ii = (i->second).begin();
				ii != (i->second).end(); ++ ii)
			{
				cerr << " " << (*ii);
			}
			cerr << endl;
		}
	} // if (NOISY)

}

void produce_sigorder(SIGVEC& so, SIGMAPSIZE& sms) {
	for (SIGMAPSIZE::reverse_iterator i = sms.rbegin();
		i != sms.rend(); ++i)
	{
		for (set<signature>::const_iterator j = (i->second).begin();
			j != (i->second).end(); ++j)
		{
			so.push_back(*j);
		}
	}
}

void DEBUG_dump_sigorder(SIGVEC& so) {
	if (NOISY) {
		cerr << "DEBUG: dump of signature order" << endl;
		for (SIGVEC::const_iterator i = so.begin(); i != so.end(); ++i) {
			cerr << " " << *i;
		}
		cerr << endl;
	} // if (NOISY)
}

void load_initial_permutation(signature& cursig,
	map<signature,vector<int> >& permutations,
	SIGMAP& sigmap)
{
	copy(sigmap[cursig].begin(), sigmap[cursig].end(),
		back_inserter(permutations[cursig]));
}

