#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2005-2007 Harri Pitkänen (hatapitk@iki.fi)
# This program inflects Finnish words.
# This program requires Python version 2.4 or newer.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

# You may get the words and inflections classes for this command from a
# database dump of Joukahainen with the following query:
#  \pset fieldsep ;
#  \a
#  \t
#  \o sanat.txt
#  select wstruct, case when wclass in (1,2) then 'subst-' when wclass=3 then 'verbi-' end || infclass as infclass
#  from view_voikko_inflection where infclass is not null order by wstruct, infclass;

# Such list of words can be processed by running
# cat sanat.txt | voikko-inflect-word > taivutukset.txt

import voikkoinfl
import sys
import locale
import voikkoutils

NOUN_AFFIX_FILE = voikkoutils.get_preference('corevoikko') + '/data/subst.aff'
VERB_AFFIX_FILE = voikkoutils.get_preference('corevoikko') + '/data/verb.aff'
PARAM_ENCODING = voikkoutils.get_preference('encoding')

noun_types = voikkoinfl.readInflectionTypes(NOUN_AFFIX_FILE)
verb_types = voikkoinfl.readInflectionTypes(VERB_AFFIX_FILE)

def word_and_infl_class(fullclass):
	infclass_parts = fullclass.split('-')
	if len(infclass_parts) == 2:
		wordclass = infclass_parts[0]
		infclass = infclass_parts[1]
	elif len(infclass_parts) == 3:
		wordclass = infclass_parts[0]
		infclass = infclass_parts[1]+u'-'+infclass_parts[2]
	else:
		print 'Incorrect inflection class'
		sys.exit(1)
	if not wordclass in [u'subst', u'verbi']:
		print 'Incorrect word class'
		sys.exit(1)
	return (wordclass, infclass)

def print_inflected_word(word):
	if word.priority > 2: return
	if word.isCharacteristic: line = "!"
	else: line = ""
	line = ((line + word.formName).ljust(20)+word.inflectedWord).encode(PARAM_ENCODING)
	print line

def inflect_word(word, classes):
	global verb_types
	global noun_types
	(wclass, infclass) = word_and_infl_class(classes)
	if wclass == u'verbi': itypes = verb_types
	else: itypes = noun_types
	for iword in voikkoinfl.inflectWord(word, infclass, itypes):
		print_inflected_word(iword)

if len(sys.argv) == 3:
	inflect_word(unicode(sys.argv[1], PARAM_ENCODING), unicode(sys.argv[2], PARAM_ENCODING))
elif len(sys.argv) == 2 and sys.argv[1].find(";") != -1:
	ind = sys.argv[1].find(";")
	inflect_word(unicode(sys.argv[1][:ind], PARAM_ENCODING), unicode(sys.argv[1][ind+1:], PARAM_ENCODING))
elif len(sys.argv) == 1:
	line = sys.stdin.readline()
	while line.endswith('\n'):
		line = unicode(line, PARAM_ENCODING).strip()
		if len(line) == 0: break
		ind = line.find(u";")
		inflect_word(line[:ind], line[ind+1:])
		line = sys.stdin.readline()
else:
	print 'Usage: voikko-inflect-word'
	print '       voikko-inflect-word word inflection_class'
	print '       voikko-inflect-word word;inflection_class'
	print 'If no arguments are given, the program expects as input lines in format'
	print '     word;inflection_class'
	sys.exit(1)
