#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2008 - 2015 Harri Pitkänen (hatapitk@iki.fi)
# Script for automating the build of multiple dictionary variants for
# Voikko. This script should be run in the main directory of
# voikko-fi source package.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import codecs
from subprocess import getoutput
from subprocess import Popen, PIPE
from os import access, F_OK
from os import waitpid
from shutil import copyfile, move
from datetime import datetime

def runCmd(cmd):
	p = Popen(cmd, shell=True)
	sts = waitpid(p.pid, 0)
	if sts[1] != 0:
		print("Error while executing command: " + cmd)
		exit(1)

def indexHeader(indexFile):
	indexFile.write("<html><head><title>Voikon sanastoja</title>")
	indexFile.write("<style type='text/css'>")
	indexFile.write(".small {font-size:0.8em; font-style:italic;}")
	indexFile.write("table {border-collapse:collapse;}")
	indexFile.write("table td {border:1px solid black;}")
	indexFile.write("</style></head>")
	indexFile.write("<body><h1>Voikon sanastoja (sanastoformaatin versio 5)</h1>")
	indexFile.write("<p>Tämä sivu ja sivulla olevat Voikon sanaston versiot ")
	indexFile.write("on generoitu automaattisesti ohjelmalla <kbd>voikko-build-dicts</kbd>. ")
	indexFile.write("<strong>Sanastoformaatti 5 toimii libvoikon version 4.0 ja uudempien kanssa.</strong></p>")

def indexDict(indexFile, dict):
	indexFile.write("<tr><td>Tunniste: <kbd>" + dict.variant + "</kbd><br />")
	indexFile.write("Sisältö: <kbd>" + dict.nameFi + "</kbd></br />")
	indexFile.write("Sanatietueita Joukahaisesta: <kbd>" + repr(dict.wordRecords) + "</kbd></td>")
	dictFile = "dict" + dict.idSuffix + ".zip"
	indexFile.write("<td><a href='" + dictFile + "'>" + dictFile + "</a></td></tr>")

def indexFooter(indexFile):
	indexFile.write("</body></html>")

# === Define dictionary properties ===

class Voikkodict:
	idSuffix = ""
	variant = ""
	nameFi = ""
	nameEn = ""
	smOptions = ""
	wordRecords = 0

allDicts = []

d = Voikkodict()
d.idSuffix = ""
d.variant = "standard"
d.nameFi = "suomi (perussanasto)"
d.nameEn = "Finnish (basic vocabulary)"
d.smOptions = ""
allDicts.append(d)

d = Voikkodict()
d.idSuffix = "-erityis"
d.variant = "science"
d.nameFi = "suomi (matematiikan, fysiikan, kemian, biologian, maantieteen, geologian, lääketieteen, kasvatustieteen ja tietotekniikan sanastot)"
d.nameEn = "Finnish (scientific vocabulary)"
d.smOptions = 'GENLEX_OPTS="--extra-usage=it,medicine,science,nature,education"'
allDicts.append(d)

d = Voikkodict()
d.idSuffix = "-murre"
d.variant = "dialects"
d.nameFi = "suomi (murteellisten, vanhojen ja harvinaisten sanojen sanasto)"
d.nameEn = "Finnish (dialects)"
d.smOptions = 'GENLEX_OPTS="--style=dialect,old,international,foreign,inappropriate --min-frequency=10" VANHAT_MUODOT=yes'
allDicts.append(d)

d = Voikkodict()
d.idSuffix = "-kasvatus"
d.variant = "education"
d.nameFi = "suomi (kasvatustieteen sanasto)"
d.nameEn = "Finnish (education vocabulary)"
d.smOptions = 'GENLEX_OPTS="--extra-usage=education"'
allDicts.append(d)

d = Voikkodict()
d.idSuffix = "-laaketiede"
d.variant = "medicine"
d.nameFi = "suomi (matematiikan, fysiikan, kemian, biologian, maantieteen, geologian ja lääketieteen sanastot)"
d.nameEn = "Finnish (medical vocabulary)"
d.smOptions = 'GENLEX_OPTS="--extra-usage=medicine,science,nature"'
allDicts.append(d)

d = Voikkodict()
d.idSuffix = "-po"
d.variant = "po"
d.nameFi = "suomi (po-tiedostojen oikolukusanasto)"
d.nameEn = "Finnish (po file spelling vocabulary)"
d.smOptions = 'GENLEX_OPTS="--extra-usage=it,science"'
allDicts.append(d)

d = Voikkodict()
d.idSuffix = "-morpho"
d.variant = "morpho"
d.nameFi = "suomi, erittäin laaja sanasto (mukana myös morfologisessa analyysissä tarvittava lisäinformaatio)"
d.nameEn = "Finnish, very extensive vocabulary (with additional information needed in morphological analysis)"
d.smOptions = 'GENLEX_OPTS="--extra-usage=it,science,nature,medicine,education,orgname --style=dialect,old,international,foreign,inappropriate --min-frequency=10" VANHAT_MUODOT=yes VOIKKO_DEBUG=yes'
allDicts.append(d)

d = Voikkodict()
d.idSuffix = "-morphoid"
d.variant = "morphoid"
d.nameFi = "suomi, erittäin laaja sanasto (mukana myös morfologisessa analyysissä tarvittava lisäinformaatio ja linkitys Joukahaiseen)"
d.nameEn = "Finnish, very extensive vocabulary (with additional information needed in morphological analysis and links to Joukahainen)"
d.smOptions = 'GENLEX_OPTS="--extra-usage=it,science,nature,medicine,education,orgname --style=dialect,old,international,foreign,inappropriate --min-frequency=10 --sourceid" VANHAT_MUODOT=yes VOIKKO_DEBUG=yes'
allDicts.append(d)

WORK_DIR = "build-v5"

INDEX_FILE_NAME = WORK_DIR + "/index.html"

# === Initialization ===

runCmd("git pull -r")
startTime = datetime.now()
indexFile = codecs.open(INDEX_FILE_NAME, "w", "UTF-8")
indexHeader(indexFile)

# === Phase 1: build the dictionaries ===

DICT_DIR_PREFIX="dict"

for dict in allDicts:
	runCmd('make clean')
	runCmd('make vvfst-install DESTDIR="' + WORK_DIR + '/' + DICT_DIR_PREFIX
	                                       + dict.idSuffix + '" '
	       + dict.smOptions
	       + ' VOIKKO_VARIANT=' + dict.variant
	       + ' VOIKKO_DESCRIPTION="' + dict.nameFi + '"'
	       + ' SM_PATCHINFO="Development snapshot"')
	p = Popen("cat vvfst/joukahainen-*.lexc | grep '^[[]' | wc -l",
	          shell = True, stdout = PIPE, close_fds = True)
	(out, err) = p.communicate()
	dict.wordRecords = int(out)

# === Phase 2: build oxt packages ===

OXT_TEMPLATE_DIR="oxt-template"
OXT_PACKAGE_DIR="oxt"
DICT_FILES_TO_COPY=[
	'autocorr.vfst',
	'index.txt'
	'mor.vfst'
]
OXT_DESCRIPTION="description.xml"

oxtdir = WORK_DIR + '/' + OXT_TEMPLATE_DIR + '/'
if access(oxtdir + OXT_DESCRIPTION, F_OK):
	for dict in allDicts:
		for dictFile in DICT_FILES_TO_COPY:
			copyfile(WORK_DIR + '/' + DICT_DIR_PREFIX + dict.idSuffix + '/' + dictFile,
			         oxtdir + dictFile)
		descIn = codecs.open(oxtdir + OXT_DESCRIPTION, "r", "UTF-8")
		descOut = codecs.open(oxtdir + OXT_DESCRIPTION + ".new", "w", "UTF-8")
		line = descIn.readline()
		while (line != ""):
			descOut.write(line)
			if (line.find("<display-name>") >= 0):
				descIn.readline()
				descOut.write('    <name lang="en-US">Voikko - %s</name>\n' % dict.nameEn)
				descIn.readline()
				descOut.write('    <name lang="fi">Voikko - %s</name>\n' % dict.nameFi)
			line = descIn.readline()
		descIn.close()
		descOut.close()
		move(oxtdir + OXT_DESCRIPTION + ".new", oxtdir + OXT_DESCRIPTION)
		runCmd('cd ' + oxtdir + ' && zip -r ../voikko' + dict.idSuffix + '.oxt *')

# === Phase 3: build zip packages ===

indexFile.write("<h2>Sanastot</h2>")
indexFile.write("<table>")
for dict in allDicts:
	dirName = WORK_DIR + '/' + DICT_DIR_PREFIX + dict.idSuffix
	zipFile = DICT_DIR_PREFIX + dict.idSuffix + '.zip'
	runCmd('cd ' + dirName + ' && zip -r ../' + zipFile + ' *')
	indexDict(indexFile, dict)
indexFile.write("</table>")

# === Phase 4: build source package ===

runCmd('make clean')
runCmd('make dist-gzip SM_VERSION=snapshot')
runCmd('cp voikko-fi-snapshot.tar.gz ' + WORK_DIR)
runCmd('make clean SM_VERSION=snapshot')
indexFile.write("<h2>Lähdekoodi</h2>")
indexFile.write("<p><a href='voikko-fi-snapshot.tar.gz'>voikko-fi-snapshot.tar.gz</a></p>")

# === Finalisation ===

indexFile.write("<h2>Lähdekoodin versiotiedot</h2><pre>")
out = getoutput("git log --format=short HEAD^1..HEAD")
indexFile.write(out)
indexFile.write("</pre><p>Viimeisin päivitys Joukahaisesta")
out = getoutput("grep 'Time of generation:' vocabulary/joukahainen.xml | sed -e 's/T.*on://'")
indexFile.write(out)
indexFile.write("</p>")
endTime = datetime.now()
indexFile.write("<p class='small'>Generointi valmis " + endTime.isoformat() + "<br />")
indexFile.write("Aikaa kului " + repr((endTime - startTime).seconds) + " sekuntia</p>")
indexFooter(indexFile)
indexFile.close()
