#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Merge the Yahoo and Sindice results.

Input parameter is the number of results we would expect. The simple process is:

- take both lists, take the top elements
- if the one in yahoo is in sindice, add it to the final list, then if the one sindice is in yahoo, add that to the final list, too
- go on until the final list reached the maximum value

"""
import sys

SINDICE = 'sindice'
YAHOO   = 'yahoo'

tops = {
	SINDICE : 12,
	YAHOO   : 10
}

f_csv = "result.csv"
f_xml = "result.xml"

def take_top(origin) :
	"""
	return an array of two times the top of one of the result set. The array consist of the URI, the rank, and the origin
	"""
	retval = []
	with open(origin + '/result.csv') as f :
		for l in f.readlines() :
			(uri,rank) = l.strip().split(',')
			retval.append((uri, rank))
			if len(retval) >= 2 * tops[origin] :
				break
	return retval

def merge(sindice, yahoo) :
	def in_array(array, s) :
		i = 0
		while i < len(array) :
			if array[i][0] == s[0] : return i
			i = i + 1
		return -1
	
	retval = []
	i = 0
	while i < tops[SINDICE] or i < tops[YAHOO] :
		s = (i < len(sindice) and sindice[i]) or None
		y = (i < len(yahoo) and yahoo[i]) or None
		# Check if the sindice appears on the yahoo side; if so, add it to the results
		if s != None :
			k = in_array(yahoo, s)
			if k != -1 and in_array(retval,s) == -1 :
				retval.append((s[0], s[1], yahoo[k][1]))
		if y != None :
			k = in_array(sindice, y)
			if k != -1 and in_array(retval,y) == -1 :
				retval.append((y[0], sindice[k][1], y[1]))
		i = i + 1
	return retval

def dump_data(retval) :
	with open(f_csv,'w') as d_out :
		for t in retval :
			d_out.write("%s,%s,%s\n" % t)
	
	with open(f_xml,'w') as d_out :
		i = 1
		for (uri,s,y) in retval :
			d_out.write('                <tr><td>%s.</td><td><a href="%s">%s</a></td><td>%s</td><td>%s</td></tr>\n' % (i,uri,uri,s,y))
			i = i + 1

	

if __name__ == '__main__':
	sindice = take_top(SINDICE)
	yahoo   = take_top(YAHOO)
	merged  = merge(sindice, yahoo)
	dump_data(merged)
	