#!/usr/bin/env python3

# Copyright 2006 - 2013 Harri Pitkänen (hatapitk@iki.fi)
# Test program for Voikko.
# This program requires Python and Python interface to libvoikko.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
import locale
import codecs
import os
import subprocess
import getopt
import voikkoutils
from libvoikko import Voikko
from libvoikko import SuggestionStrategy
from libvoikko import VoikkoException

COREVOIKKO = voikkoutils.get_preference('corevoikko')
WORKSPACE = voikkoutils.get_preference('voikkotest_dir')
THREADS = voikkoutils.get_preference('voikkospell_threads')
ENCODING = voikkoutils.get_preference('encoding')
SPELLCMD = voikkoutils.get_preference('libvoikko_bin') + '/voikkospell'
DIFFVIEWCMD = voikkoutils.get_preference('diffviewcmd')
SM_DESTDIR = voikkoutils.get_preference('voikkotest_sm_destdir')

TESTFILES = COREVOIKKO + "/tests/voikkotest"
BUILDDIR = WORKSPACE + "/build"
INSTALLDIR = WORKSPACE + "/inst"
BASE = WORKSPACE + "/base"
WORDLIST = WORKSPACE + "/wordlist.txt.gz"
BASE_CORRECT = WORKSPACE + "/base-correct.txt"
BASE_INFO = WORKSPACE + "/base-info.txt"
CURRENT_CORRECT = WORKSPACE + "/current-correct.txt"
COMPATIBILITY_DIR = WORKSPACE + "/compatibility"
SMV = COREVOIKKO + "/voikko-fi"
CURRENT_TESTMOR = "fi-x-vfstd"

def _check_hyphens(section, expectedResult, result):
	failcount = 0
	if expectedResult != result:
		print('Hyphenation test failed in section "%s":' % section)
		print('  Expected "%s", got "%s"' % (expectedResult, result))
		return 1
	return 0

def _check_gcerrors(inputpara, expresults, results):
	failcount = 0
	for i in range(0, len(expresults)):
		if len(results) - 1 < i:
			print('Grammar test failed: less results than expected.')
			print('The first paragraph with missing results:')
			print(' ' + inputpara[i])
			return failcount + 1
		if expresults[i] != results[i]:
			print('Grammar test failed. Input:')
			print(' ' + inputpara[i])
			print('Expected results:')
			for r in expresults[i]: print(' ' + r)
			print('Actual results:')
			for r in results[i]: print(' ' + r)
			failcount = failcount + 1
	return failcount

# Run some tests with old morphologies to make sure that they work with current libvoikko
def runCompatibilityTests():
	assertionFileName = COMPATIBILITY_DIR + "/assertions.txt"
	if not os.path.isfile(assertionFileName):
		print("Warning: compatibility assertions file was not found, not performing any compatibility tests")
		return
	assertionFile = codecs.open(assertionFileName)
	index = assertionFile.read()
	assertionFile.close()
	compatTestCount = 0
	for subdir in os.listdir(COMPATIBILITY_DIR):
		if not subdir.startswith("mor-"):
			continue
		dictPath = COMPATIBILITY_DIR + "/" + subdir
		def dictTestFunct(d):
			return subdir[4:] in d.variant
		dicts = list(filter(dictTestFunct, Voikko.listDicts(dictPath)))
		if len(dicts) != 1:
			sys.stderr.write("Found " + repr(len(dicts)) + " matching dicts when looking from " + dictPath + "\n")
			sys.exit(1)
		d = dicts[0]
		v = Voikko(language = d.language + "-x-" + d.variant, path = dictPath)
		for line in index.split("\n"):
			if len(line) > 0 and not eval(line):
				print("Compatibility test failed: subdir='" + subdir + "', line='" + line + "'")
				sys.exit(1)
		v.terminate()
		compatTestCount = compatTestCount + 1
	if compatTestCount == 0:
		print("Warning: compatibility assertions were present but no morphologies were found")
	else:
		print("All " + repr(compatTestCount) + " compatibility tests were successful.")
	

# Returns a list of grammar errors.
def _check_grammar(paragraphlist, params, voikko):
	setOptions(voikko, params)
	paragrapherrors = []
	for paragraph in paragraphlist:
		errorStrings = []
		for error in voikko.grammarErrors(paragraph, "fi"):
			errorStrings.append(_grammar_error_repr(error))
		paragrapherrors.append(errorStrings)
	setDefaults(voikko)
	return paragrapherrors

def _grammar_error_repr(grammar_error):
	return '[code=%i, level=0, stpos=%i, len=%i, suggs={%s}]' % (
	    grammar_error.errorCode,
	    grammar_error.startPos,
	    grammar_error.errorLen,
	    ','.join('"%s"' % suggestion for suggestion in grammar_error.suggestions)
	)

def shell(cwd, command):
	res = subprocess.call(command, cwd = cwd, shell = True)
	if res != 0:
		sys.stderr.write("Shell command '%s' failed, working directory was '%s'\n" % (command, cwd))
		return False
	return True

def build(configMap):
	if not shell(configMap["SOURCEDIR"], configMap["BUILDCMD"]):
		sys.stderr.write("Failed to build the dictionary.\n")
		sys.exit(1)
	if "INSTALLCMD" in configMap:
		if not shell(configMap["SOURCEDIR"], configMap["INSTALLCMD"]):
			sys.stderr.write("Failed to install the dictionary.\n")
			sys.exit(1)

# Run the spelling tests
def spell_tests(voikko, testFile):
	if not os.access(testFile, os.R_OK):
		return True
	inputfile = codecs.open(testFile, "r", ENCODING)
	testcount = 0
	failcount = 0
	linecount = 0
	section = "(none)"
	negativeTest = False
	while True:
		line_orig = inputfile.readline()
		linecount = linecount + 1
		if line_orig == '': break
		line = stripWhitespaceAndComments(line_orig)
		if line.startswith('[') and line.endswith(']'):
			section = line[1:-1]
			setDefaults(voikko)
			setOptions(voikko, section)
			continue
		if line.startswith('!'):
			negativeTest = True
			word = line[1:]
		else:
			negativeTest = False
			word = line
		if len(word) == 0:
			continue
		testcount = testcount + 1
		try:
			wordOk = voikko.spell(word)
			if (not wordOk and not negativeTest) or (wordOk and negativeTest):
				print('Spelling test failed at line %i, section "%s":\n  %s' \
					% (linecount, section, line_orig))
				failcount = failcount + 1
		except Exception as e:
			sys.stderr.write("Exception while trying to spell word '%s'\n" % word)
			raise e
	inputfile.close()
	if failcount == 0:
		print(" All %i spelling tests were successful." % testcount)
		return True
	else:
		print(" %i out of %i spelling tests failed." % (failcount, testcount))
		return False

# Run the hyphenator tests
def hyphen_tests(voikko, testFile):
	if not os.access(testFile, os.R_OK):
		return True
	inputfile = codecs.open(testFile, "r", ENCODING)
	testcount = 0
	failcount = 0
	section = ""
	while True:
		line_orig = inputfile.readline()
		if line_orig == '': break
		line = stripWhitespaceAndComments(line_orig)
		if line.startswith('[') and line.endswith(']'):
			section = line[1:-1]
			continue
		setDefaults(voikko)
		setOptions(voikko, section)
		parts = line.split()
		if len(parts) == 0: continue
		inputword = parts[0].strip()
		expresult = parts[1].strip()
		testcount = testcount + 1
		result = voikko.hyphenate(inputword)
		failcount = failcount + _check_hyphens(section, expresult, result)
	inputfile.close()
	if failcount == 0:
		print(" All %i hyphenation tests were successful." % testcount)
		return True
	else:
		print(" %i out of %i hyphenation tests failed." % (failcount, testcount))
		return False

def read_nonempty(inputfile):
	while True:
		line = inputfile.readline()
		if line == "": return ""
		line = stripWhitespaceAndComments(line)
		if len(line) > 0: return line

def stripWhitespaceAndComments(line):
	if line.startswith("#"):
		return ""
	commentstart = line.find(" #")
	if commentstart != -1: line = line[:commentstart]
	return line.strip()

# Run grammar tests
def grammar_tests(voikko, testFile):
	if not os.access(testFile, os.R_OK):
		return True
	inputfile = codecs.open(testFile, "r", ENCODING)
	testcount = 0
	failcount = 0
	section = ""
	inputpara = []
	expresults = []
	line = read_nonempty(inputfile)
	while True:
		if line == '': break
		if line.startswith('{') and line.endswith('}'):
			if len(inputpara) > 0:
				results = _check_grammar(inputpara, section, voikko)
				failcount = failcount + _check_gcerrors(inputpara, expresults, results)
				inputpara = []
				expresults = []
			section = line[1:-1]
			line = read_nonempty(inputfile)
			continue
		if len(line) == 0:
			line = read_nonempty(inputfile)
			continue
		testcount = testcount + 1
		inputpara.append(line)
		expresult = []
		while True:
			line = read_nonempty(inputfile)
			if line == '': break
			if line[0] != '[': break
			expresult.append(line)
		expresults.append(expresult)
	inputfile.close()
	if len(inputpara) > 0:
		results = _check_grammar(inputpara, section, voikko)
		failcount = failcount + _check_gcerrors(inputpara, expresults, results)
	if failcount == 0:
		print(" All %i grammar tests were successful." % testcount)
		return True
	else:
		print(" %i out of %i grammar tests failed." % (failcount, testcount))
		return False

def setDefaults(voikko):
	voikko.setIgnoreDot(False)
	voikko.setIgnoreNumbers(False)
	voikko.setSuggestionStrategy(SuggestionStrategy.TYPO)
	voikko.setAcceptTitlesInGc(False)
	voikko.setAcceptUnfinishedParagraphsInGc(False)
	voikko.setAcceptBulletedListsInGc(False)
	voikko.setNoUglyHyphenation(False)
	voikko.setHyphenateUnknownWords(True)
	voikko.setMinHyphenatedWordLength(2)

def setOptions(voikko, section):
	for option in section.split(" "):
		if len(option) > 0:
			eval(option)

# Run suggestion tests
def suggestion_tests(voikko, testFile):
	if not os.access(testFile, os.R_OK):
		return True
	inputfile = codecs.open(testFile, "r", ENCODING)
	testcount = 0
	failcount = 0
	section = ""
	while True:
		line_orig = inputfile.readline()
		if line_orig == '': break
		line = stripWhitespaceAndComments(line_orig)
		if line.startswith('[') and line.endswith(']'):
			section = line[1:-1]
			continue
		parts = line.split('\t')
		if len(parts) < 2: continue
		testcount = testcount + 1
		setOptions(voikko, section)
		suggestions = voikko.suggest(parts[0])
		setDefaults(voikko)
		for expresult in parts[1:]:
			if expresult.startswith('!'):
				if expresult[1:] in suggestions:
					print('Suggestion test failed for "%s" in section [%s]: ' \
					'"%s" not expected.' % (parts[0], section, expresult[1:]))
					failcount = failcount + 1
			else:
				if not expresult in suggestions:
					print('Suggestion test failed for "%s" in section [%s]: ' \
					'"%s" expected.' % (parts[0], section, expresult))
					failcount = failcount + 1
				else:
					i = suggestions.index(expresult)
					suggestions = suggestions[i+1:]
	inputfile.close()
	if failcount == 0:
		print(" All %i suggestion tests were successful." % testcount)
		return True
	else:
		print(" %i failures in %i suggestion tests." % (failcount, testcount))
		return False

def valuesMatch(expected, actual):
	if type(expected) is tuple:
		return float(actual) >= expected[0] and float(actual) <= expected[1]
	else:
		return expected == actual

def containsMorphoAnalysis(expectedAnalysis, actualAnalyses):
	for actualAnalysis in actualAnalyses:
		match = True
		for (key, value) in expectedAnalysis.items():
			if not key in actualAnalysis or not valuesMatch(value, actualAnalysis[key]):
				match = False
				break
		if match:
			return None
	return "Did not produce expected analysis:\n expected=%s\n actual=%s" \
	       % (expectedAnalysis, actualAnalyses)

def diffMorphoAnalyses(expectedAnalyses, expectedAnalysesCount,
	               forbiddenKeys, forbiddenValues, actualAnalyses):
	if expectedAnalysesCount != None and \
	   len(actualAnalyses) != expectedAnalysesCount:
		return "Expected %i analyses, got %i." % \
		       (expectedAnalysesCount, len(actualAnalyses))
	for expectedAnalysis in expectedAnalyses:
		result = containsMorphoAnalysis(expectedAnalysis, actualAnalyses)
		if result != None:
			return result
	for forbiddenKey in forbiddenKeys:
		for actualAnalysis in actualAnalyses:
			if forbiddenKey in actualAnalysis:
				return "Analysis contained forbidden key %s." % forbiddenKey
	for forbiddenValue in forbiddenValues:
		for actualAnalysis in actualAnalyses:
			if forbiddenValue in list(actualAnalysis.values()):
				return "Analysis contained forbidden value %s." % forbiddenValue
	return None

# Run morphological analysis tests
def morpho_tests(voikko, testFile):
	if not os.access(testFile, os.R_OK):
		return True
	inputfile = codecs.open(testFile, "r", ENCODING)
	testcount = 0
	failcount = 0
	linecount = 0
	word = None
	expectedAnalysesCount = None
	expectedAnalyses = []
	forbiddenKeys = []
	forbiddenValues = []
	currentExpected = {}
	actualAnalyses = []
	while True:
		line_orig = inputfile.readline()
		linecount = linecount + 1
		if line_orig == '':
			break
		line = stripWhitespaceAndComments(line_orig)
		if line == '':
			continue
		elif line.startswith('word:'):
			testcount = testcount + 1
			if currentExpected != {}:
				expectedAnalyses.append(currentExpected)
			error = diffMorphoAnalyses(expectedAnalyses,
			        expectedAnalysesCount, forbiddenKeys, forbiddenValues, actualAnalyses)
			if error != None:
				print('Morphological test failed for word "%s":\n%s\n' \
					% (word, error))
				failcount = failcount + 1
			word = line[5:]
			actualAnalyses = voikko.analyze(word)
			expectedAnalyses = []
			forbiddenKeys = []
			forbiddenValues = []
			expectedAnalysesCount = None
			currentExpected = {}
		elif line.startswith('expected:'):
			expectedAnalysesCount = int(line[9:])
		elif line.startswith('!='):
			forbiddenValues.append(line[2:])
		elif line.startswith('!'):
			forbiddenKeys.append(line[1:])
		elif line.startswith('analysis'):
			expectedAnalyses.append(currentExpected)
			currentExpected = {}
		elif '=' in line:
			key = line[:line.find('=')]
			value = line[line.find('=')+1:]
			currentExpected[key] = value
		elif '∈' in line:
			key = line[:line.find('∈')]
			rangeStr = line[line.find('∈')+1:].split(',')
			currentExpected[key] = (float(rangeStr[0]), float(rangeStr[1]))
		else:
			print("Invalid expectation " + line)
			return False
	if currentExpected != {}:
		expectedAnalyses.append(currentExpected)
	error = diffMorphoAnalyses(expectedAnalyses,
	        expectedAnalysesCount, forbiddenKeys, forbiddenValues, actualAnalyses)
	if error != None:
		print('Morphological test failed for word "%s":\n%s\n' \
			% (word, error))
		failcount = failcount + 1
	inputfile.close()
	if failcount == 0:
		print(" All %i morphological tests were successful." % testcount)
		return True
	else:
		print(" %i out of %i morphological tests failed." % (failcount, testcount))
		return False

# Run tokenizer tests
def tokenizer_tests(voikko, testFile):
	if not os.access(testFile, os.R_OK):
		return True
	inputfile = codecs.open(testFile, "r", ENCODING)
	testcount = 0
	failcount = 0
	inputString = None
	while True:
		line_orig = inputfile.readline()
		if line_orig == '':
			break
		line = stripWhitespaceAndComments(line_orig)
		if line == '':
			continue
		if inputString is None:
			inputString = line
			testcount = testcount + 1
			continue
		# line should contain tokenization results
		expectedTokens = line
		actualTokens = repr(voikko.tokens(inputString))
		inputString = None
		if expectedTokens != actualTokens:
			print("Tokenizer test failed:")
			print(" Expected " + expectedTokens)
			print(" Actual   " + actualTokens)
			failcount = failcount + 1
	inputfile.close()
	if failcount == 0:
		print(" All %i tokenizer tests were successful." % testcount)
		return True
	else:
		print(" %i out of %i tokenizer tests failed." % (failcount, testcount))
		return False

# Run sentence tests
def sentence_tests(voikko, testFile):
	if not os.access(testFile, os.R_OK):
		return True
	inputfile = codecs.open(testFile, "r", ENCODING)
	testcount = 0
	failcount = 0
	inputString = None
	while True:
		line_orig = inputfile.readline()
		if line_orig == '':
			break
		line = stripWhitespaceAndComments(line_orig)
		if line == '':
			continue
		if inputString is None:
			inputString = line
			testcount = testcount + 1
			continue
		# line should contain sentence splitting results
		expectedSentences = line
		actualSentences = repr(voikko.sentences(inputString))
		inputString = None
		if expectedSentences != actualSentences:
			print("Sentence splitting test failed:")
			print(" Expected " + expectedSentences)
			print(" Actual   " + actualSentences)
			failcount = failcount + 1
	inputfile.close()
	if failcount == 0:
		print(" All %i sentence splitting tests were successful." % testcount)
		return True
	else:
		print(" %i out of %i sentence splitting tests failed." % (failcount, testcount))
		return False

# START OF MAIN PROGRAM

# Read command line options
(opts, args) = ([], [])
try:
	(opts, args) = getopt.getopt(sys.argv[1:], "", ["base", "current", "compare"])
except getopt.GetoptError:
	sys.stderr.write("Usage: voikkotest           (run only developer tests)\n")
	sys.stderr.write("Usage: voikkotest --base    (create baseline from current version)\n")
	sys.stderr.write("Usage: voikkotest --current (create new 'current' for comparison)\n")
	sys.stderr.write("Usage: voikkotest --compare (compare current version against the baseline)\n")
	sys.exit(1)

if "--base" in [opt[0] for opt in opts]:
	sys.stderr.write("Running voikkospell on full wordlist...\n")
	os.system('bash -c \'time zcat "%s" | "%s" -d %s -j %i -p "%s" | grep "^C:" > "%s"\'' % \
	          (WORDLIST, SPELLCMD, CURRENT_TESTMOR, THREADS, BASE, BASE_CORRECT))
	sys.exit(0)

if "--current" in [opt[0] for opt in opts]:
	voikko = Voikko(CURRENT_TESTMOR)
	if not spell_tests(voikko, COREVOIKKO + "/tests/voikkotest/" + CURRENT_TESTMOR + "/spell.txt"):
		sys.stderr.write("Some of the basic tests failed, fix this before using --current again!\n")
		sys.exit(1)
	voikko.terminate()
	sys.stderr.write("Running voikkospell on full wordlist...\n")
	os.system('bash -c \'time zcat "%s" | "%s" -d %s -j %i | grep "^C:" > "%s"\'' % \
	          (WORDLIST, SPELLCMD, CURRENT_TESTMOR, THREADS, CURRENT_CORRECT))
	diffcmd = 'diff -U0 "%s" "%s" | ' % (BASE_CORRECT, CURRENT_CORRECT)
	new_accept = int(os.popen('%s grep ^+C: | wc -l' % diffcmd).read())
	old_accept = int(os.popen('%s grep ^-C: | wc -l' % diffcmd).read())
	if new_accept == 0 and old_accept == 0:
		sys.stderr.write("No changes\n")
		sys.exit(0)
	sys.stderr.write("%s previously rejected strings are now accepted.\n" % new_accept)
	sys.stderr.write("%s previously accepted strings are now rejected.\n" % old_accept)

if "--compare" in [opt[0] for opt in opts]:
	os.system(DIFFVIEWCMD % (BASE_CORRECT, CURRENT_CORRECT))

else:
	globalsMap = {}
	globalsMap["COREVOIKKO"] = COREVOIKKO
	globalsMap["BUILDDIR"] = BUILDDIR
	globalsMap["INSTALLDIR"] = INSTALLDIR
	globalsMap["BASEINSTALLDIR"] = BASE
	
	for subdir in os.listdir(TESTFILES):
		testdir = TESTFILES + "/" + subdir
		if not os.access(testdir + "/config.txt", os.R_OK):
			continue
		if os.access(testdir + "/skip.this", os.R_OK):
			continue
		print("Starting test suite under", testdir)
		configFile = codecs.open(testdir + "/config.txt", "r", ENCODING)
		configMap = {}
		while True:
			line = configFile.readline()
			if line == '':
				break
			line = stripWhitespaceAndComments(line)
			if line == '':
				continue
			separatorPos = line.find("=")
			varName = line[:separatorPos].strip()
			varExpression = line[separatorPos+1:].strip()
			varValue = eval(varExpression, globalsMap)
			configMap[varName] = varValue
		configFile.close()
		build(configMap)
		print(" Build complete, starting tests")
		languageTag = configMap["LANGUAGE"]
		try:
			voikko = Voikko(language=languageTag, path=INSTALLDIR)
		except VoikkoException as e:
			print(("ERROR initializing libvoikko for language '%s' in '%s':" % (languageTag, INSTALLDIR)), e.args)
			sys.exit(1)
		ok = True
		ok = spell_tests(voikko, testdir + "/spell.txt") and ok
		ok = suggestion_tests(voikko, testdir + "/suggest.txt") and ok
		ok = morpho_tests(voikko, testdir + "/morpho.txt") and ok
		ok = hyphen_tests(voikko, testdir + "/hyphen.txt") and ok
		ok = grammar_tests(voikko, testdir + "/grammar.txt") and ok
		ok = tokenizer_tests(voikko, testdir + "/tokenizer.txt") and ok
		ok = sentence_tests(voikko, testdir + "/sentence.txt") and ok
		voikko.terminate()
		if not ok:
			print("FAILED!")
			sys.exit(1)
	
	runCompatibilityTests()
