#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import mte_xtrakt, conv_table

def get_seqs(fname, minlen = 2):
	conv = conv_table.Converter()
	def flush_now(item):
		return item[2][0][1] == 'interp'
	def aglut(item):
		return not item[0]
	def tag(k):
		return posonly and k[1].split(':', 1)[0] or k[1]
	
	def flush(stack):
		if len(stack) >= minlen:
			#out = tuple(stack)
			out = []
			for sp, orth, lts in stack:
				mtags = set()
				for klem, ktag in lts:
					tag, base = conv.convert_single_tag(ktag, klem, orth)
					mtags.add(tag)
				out.append('|'.join(mtags))
			#out = u' '.join(map(lambda item: u'|'.join(set(map(lambda k: tag(k), item[2]))), stack))
			out = ' '.join(out)
		else:
			out = None
		del stack[:]
		return out
	
	src = mte_xtrakt.items_kipi(fname)
	
	try:
		last = src.next()
		stack = []
		while True:
			if flush_now(last):
				item = flush(stack)
				if item: yield item
				stack = [last]
				item = flush(stack)
				if item: yield item
			else:
				if not aglut(last): # non-aglut, flush it
					item = flush(stack)
					if item: yield item
				stack.append(last)
			last = src.next()
	except StopIteration:
		item = flush(stack)
		if item: yield item


def now(fname):
	#for seq in get_seqs(fname):
	seqset = set(get_seqs(fname))
	for seq in seqset:
		#(space, orth, (lemma, tag) list)
		#print (seq)
		print seq

if __name__ == '__main__':
	now(sys.argv[1])
