#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright 2011 Harri Pitkänen (hatapitk@iki.fi)
# Program for analyzing readability of text.
# This program requires Python and Python module of libvoikko from
# libvoikko 3.0 or later.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

# References:
# http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test
# http://media.tkk.fi/GTTS/Suomi/dt&raportit/DI_J_Haataja.pdf

import sys
from locale import getpreferredencoding
from libvoikko import Voikko
from voikkostatistics import getStatistics

if '--help' in sys.argv:
	print "Usage:"
	print sys.argv[0], "[language]"
	print "Calculate readability statistics for text read from stdin."
	print " language: BCP 47 language tag of text language, default is fi"
	sys.exit(0)

language = u"fi"
if len(sys.argv) > 1:
	language = sys.argv[1]

voikko = Voikko(language)
voikko.setNoUglyHyphenation(False)
voikko.setHyphenateUnknownWords(True)

allText = unicode(sys.stdin.read(), getpreferredencoding())

stats = getStatistics(allText, voikko)
voikko.terminate()

print u"Language used for processing:", language
print u"Number of sentences:", stats[u"SENTENCES"]
print u"Number of words:", stats[u"WORDS"]
print u"Number of syllables:", stats[u"SYLLABLES"]
print u"Number of characters (without punctuation):", stats[u"LETTERS"]
print u"Number of characters (with punctuation):", stats[u"LETTERS"] + stats[u"PUNCTUATION"]
print u"Flesch Reading Ease:", stats[u"FLESCH_READING_EASE"]
print u"Flesch-Kincaid Grade Level:", stats[u"FLESCH_KINCAID_GRADE_LEVEL"]
print u"Wiion yksinkertainen luokkataso (1-12):", stats[u"WIIO_SIMPLE"]
