Example #1
0
 public static void Write(string message, LogLevelType level, MPAlignerConfiguration conf=null)
 {
     if ((conf==null && level>=confLogLevel) || (conf!=null && level >= conf.logLevel && conf.logLevel!= LogLevelType.NONE)) {
         DateTime date = DateTime.Now;
         string dateStr = date.ToString("yyyy-MM-dd HH:mm:ss");
         if (level != LogLevelType.ERROR)
         {
             Console.Write("[MPAligner] [");
             Console.Write(level.ToString());
             Console.Write("] ");
             Console.Write(dateStr);
             Console.Write(" ");
             Console.WriteLine(message);
         }
         else
         {
             Console.Error.Write("[MPAligner] [");
             Console.Error.Write(level.ToString());
             Console.Error.Write("] ");
             Console.Error.Write(dateStr);
             Console.Error.Write(" ");
             Console.Error.WriteLine(message);
         }
     }
 }
Example #2
0
 public static void Write(string message, LogLevelType level, MPAlignerConfiguration conf = null)
 {
     if ((conf == null && level >= confLogLevel) || (conf != null && level >= conf.logLevel && conf.logLevel != LogLevelType.NONE))
     {
         DateTime date    = DateTime.Now;
         string   dateStr = date.ToString("yyyy-MM-dd HH:mm:ss");
         if (level != LogLevelType.ERROR)
         {
             Console.Write("[MPAligner] [");
             Console.Write(level.ToString());
             Console.Write("] ");
             Console.Write(dateStr);
             Console.Write(" ");
             Console.WriteLine(message);
         }
         else
         {
             Console.Error.Write("[MPAligner] [");
             Console.Error.Write(level.ToString());
             Console.Error.Write("] ");
             Console.Error.Write(dateStr);
             Console.Error.Write(" ");
             Console.Error.WriteLine(message);
         }
     }
 }
Example #3
0
 public static List<string> GetLangsFromConf(MPAlignerConfiguration configuration)
 {
     List<string> res = new List<string>();
     foreach(string lang in configuration.stopWordListEntryDict.Keys)
     {
         res.Add(lang);
     }
     return res;
 }
Example #4
0
        /// <summary>
        /// Loads the configuration from a specified <c>inputFile</c>.
        /// </summary>
        /// <param name='inputFile'>
        /// Input file.
        /// </param>
        public void Load(string inputFile)
        {
            string inputStr             = File.ReadAllText(inputFile, Encoding.UTF8);
            MPAlignerConfiguration conf = MPFrameworkFunctions.DeserializeString <MPAlignerConfiguration>(inputStr);

            dictConfEntryDict          = conf.dictConfEntryDict;
            mosesPath                  = conf.mosesPath;
            translConfEntryDict        = conf.translConfEntryDict;
            keepTrackOfFiles           = conf.keepTrackOfFiles;
            logLevel                   = conf.logLevel;
            forceEnDictInterlingua     = conf.forceEnDictInterlingua;
            forceEnTranslitInterlingua = conf.forceEnTranslitInterlingua;
            outputFormat               = conf.outputFormat;
            excDictEntryDict           = conf.excDictEntryDict;
            allowTrimmedAlignments     = conf.allowTrimmedAlignments;
            stopWordListEntryDict      = conf.stopWordListEntryDict;
            langPairEntryDict          = conf.langPairEntryDict;
            alignmentThreads           = conf.alignmentThreads;
            useMultiThreadedExecution  = conf.useMultiThreadedExecution;
            printTopTrgForSrc          = conf.printTopTrgForSrc;
            concLen = conf.concLen;
        }
Example #5
0
        public static bool GetTranslitConfig(MPAlignerConfiguration configuration, string srcLang, string trgLang, out MPAlignerConfigurationTranslEntry srcTranslitConf, out MPAlignerConfigurationTranslEntry trgTranslitConf, out MPAlignerConfigurationTranslEntry srcToTrgTranslitConf, out MPAlignerConfigurationTranslEntry trgToSrcTranslitConf)
        {
            Log.Write ("Searching for transliteration configurations.",LogLevelType.LIMITED_OUTPUT,configuration);
            srcTranslitConf = null;
            trgTranslitConf = null;
            srcToTrgTranslitConf = null;
            trgToSrcTranslitConf = null;
            string srcLangKey = srcLang+"_en";
            string trgLangKey = trgLang+"_en";
            string langKey = srcLang+"_"+trgLang;
            string langKey2 = trgLang+"_"+srcLang;

            //Define transliteration directions and whether or not to use EN as interlingua.
            if (configuration.forceEnTranslitInterlingua && configuration.translConfEntryDict.ContainsKey(srcLangKey)&&configuration.translConfEntryDict.ContainsKey(trgLangKey) && configuration.translConfEntryDict[srcLangKey].use && configuration.translConfEntryDict[trgLangKey].use)
            {
                srcTranslitConf= configuration.translConfEntryDict[srcLangKey];
                trgTranslitConf= configuration.translConfEntryDict[trgLangKey];
                Log.Write ("EN interlingua transliteration loaded for language "+srcLang+": "+ configuration.translConfEntryDict[srcLangKey].mosesIniPath, LogLevelType.LIMITED_OUTPUT,configuration);
                Log.Write ("EN interlingua transliteration loaded for language "+trgLang+": "+ configuration.translConfEntryDict[trgLangKey].mosesIniPath, LogLevelType.LIMITED_OUTPUT,configuration);
                return true;
            }else if (configuration.forceEnTranslitInterlingua){
                Log.Write ("Cannot force EN interlingua transliteration for the pair "+langKey+" as at least one of the interlingua transliteration configurations is disabled or missing!",LogLevelType.WARNING,configuration);
                Log.Write ("Will try falling back to direct transliteration without the EN interlingua.",LogLevelType.WARNING,configuration);
            }
            //If EN interlingua should not be used or one of the interlingua transliteration configurations is missing, try the direct transliteration.
            bool foundAtLeastOne = false;
            if (configuration.translConfEntryDict.ContainsKey(langKey) && configuration.translConfEntryDict[langKey].use)
            {
                srcToTrgTranslitConf = configuration.translConfEntryDict[langKey];
                Log.Write ("Transliteration loaded for language "+srcLang+" into language "+trgLang+": "+ configuration.translConfEntryDict[langKey].mosesIniPath, LogLevelType.LIMITED_OUTPUT,configuration);
                foundAtLeastOne = true;
            }
            else //If the direct dictionary does not exist, log a warning and continue.
            {
                Log.Write ("Direct transliteration for the pair "+langKey+" was not found.",LogLevelType.WARNING,configuration);
            }

            //If EN interlingua should not be used or one of the interlingua transliteration configurations is missing, try the direct transliteration also in a reverse direction.
            if (configuration.translConfEntryDict.ContainsKey(langKey2) && configuration.translConfEntryDict[langKey2].use)
            {
                trgToSrcTranslitConf = configuration.translConfEntryDict[langKey2];
                Log.Write ("Transliteration loaded for language "+trgLang+" into language "+srcLang+": "+ configuration.translConfEntryDict[langKey2].mosesIniPath, LogLevelType.LIMITED_OUTPUT,configuration);
                foundAtLeastOne = true;
            }
            else //If the direct dictionary does not exist, log a warning and continue.
            {
                Log.Write ("Direct transliteration for the pair "+langKey2+" was not found.",LogLevelType.WARNING,configuration);
            }

            if (!foundAtLeastOne)
            {
                Log.Write ("Direct transliteration for the pairs "+langKey+" nor "+langKey2+" were not found. Will try falling back to interlingua transliteration.",LogLevelType.WARNING,configuration);
                bool interlinguaTranslitLoaded = false;
                if (configuration.translConfEntryDict.ContainsKey(srcLangKey) && configuration.translConfEntryDict[srcLangKey].use)
                {
                    srcTranslitConf= configuration.translConfEntryDict[srcLangKey];
                    interlinguaTranslitLoaded = true;
                    Log.Write ("EN interlingua transliteration loaded for language "+srcLang+": "+ configuration.translConfEntryDict[srcLangKey].mosesIniPath,LogLevelType.LIMITED_OUTPUT,configuration);
                }
                if (configuration.translConfEntryDict.ContainsKey(trgLangKey) && configuration.translConfEntryDict[trgLangKey].use)
                {
                    trgTranslitConf= configuration.translConfEntryDict[trgLangKey];
                    interlinguaTranslitLoaded = true;
                    Log.Write ("EN interlingua transliteration loaded for language "+trgLang+": "+ configuration.translConfEntryDict[trgLangKey].mosesIniPath,LogLevelType.LIMITED_OUTPUT,configuration);
                }

                if (interlinguaTranslitLoaded)
                {
                    return true;
                }else if (configuration.forceEnTranslitInterlingua){
                    Log.Write ("Cannot force EN interlingua transliteration for the pair "+langKey+" as at least one of the interlingua transliteration configurations is disabled or missing!",LogLevelType.WARNING,configuration);
                    Log.Write ("The system will be executed without transliteration.",LogLevelType.WARNING,configuration);
                }
            }
            return false;
        }
Example #6
0
 /// <summary>
 /// Reads the language pair specific configuration - term alignment thresholds.
 /// </summary>
 /// <returns>The language pair configuration.</returns>
 /// <param name="srcLang">Source language.</param>
 /// <param name="trgLang">Target language.</param>
 /// <param name="configuration">Configuration.</param>
 static MPAlignerConfigurationLangPairEntry ReadLangPairConfig(string srcLang, string trgLang, MPAlignerConfiguration configuration)
 {
     string langKey = srcLang + "_" + trgLang;
     MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry ();
     if (configuration.langPairEntryDict.ContainsKey (langKey)) {
         lpeConf = configuration.langPairEntryDict [langKey];
     }
     else {
         lpeConf.srcLang = srcLang;
         lpeConf.trgLang = trgLang;
         lpeConf.finalAlignmentThr = 0.6;
         lpeConf.printThr = 0.6;//A default value of 0.6 is usually the lowest value that is still reasonable for the cognate-based overlaps, therefore, wethe default to 0.6. However, for different applications the threshold could be raised even higher.
         configuration.langPairEntryDict.Add (langKey, lpeConf);
     }
     return lpeConf;
 }
Example #7
0
 public static void ReadStopwordList(MPAlignerConfiguration configuration, string lang, out Dictionary<string, bool> stopwordDict)
 {
     Log.Write ("Searching for a stopword list for laguage "+lang+".",LogLevelType.LIMITED_OUTPUT,configuration);
     stopwordDict = new Dictionary<string, bool> ();
     if (configuration.stopWordListEntryDict.ContainsKey (lang) && configuration.stopWordListEntryDict[lang].use) {
         try{
             stopwordDict = StopwordListParser.ParseStopwordList(configuration.stopWordListEntryDict[lang]);
             Log.Write("Stopword list for language "+lang+" loaded: " + configuration.stopWordListEntryDict[lang].path, LogLevelType.LIMITED_OUTPUT,configuration);
         }
         catch{
             Log.Write ("Stopword list for laguage "+lang+" was not found or is corrupted.",LogLevelType.WARNING,configuration);
         }
     }
     else
     {
         Log.Write ("Stopword list for laguage "+lang+" was not found or is disabled.",LogLevelType.WARNING,configuration);
     }
     return;
 }
Example #8
0
 public static void ReadExceptionDictionary(MPAlignerConfiguration configuration, string srcLang, string trgLang, out Dictionary<string, Dictionary<string, bool>> srcToTrgExcDict)
 {
     srcToTrgExcDict = new Dictionary<string, Dictionary<string, bool>> ();
     string langKey = srcLang + "_" + trgLang;
     Log.Write ("Searching for an exception dictionary for the laguage pair "+langKey+".",LogLevelType.LIMITED_OUTPUT,configuration);
     if (configuration.excDictEntryDict.ContainsKey (langKey)&&configuration.excDictEntryDict[langKey].use) {
         try
         {
             srcToTrgExcDict = ExceptionDictionaryParser.ParseExceptionDictionary(configuration.excDictEntryDict[langKey]);
             Log.Write("Exception dictionary for the laguage pair "+langKey+" loaded: " + configuration.excDictEntryDict[langKey].path, LogLevelType.LIMITED_OUTPUT,configuration);
         }
         catch{
             Log.Write ("The exception dictionary for the laguage pair "+langKey+" was not found or is corrupted.",LogLevelType.WARNING,configuration);
         }
     }
     else
     {
         Log.Write ("The exception dictionary for the laguage pair "+langKey+" was not found or is disabled.",LogLevelType.WARNING,configuration);
     }
     return;
 }
Example #9
0
        public static bool ReadDictionaries(MPAlignerConfiguration configuration, string srcLang, string trgLang,out Dictionary<string, Dictionary<string, double>> srcDict, out Dictionary<string, Dictionary<string, double>> trgDict, out Dictionary<string, Dictionary<string, double>> srcToTrgDict, out Dictionary<string, Dictionary<string, double>> trgToSrcDict)
        {
            Log.Write ("Searching for and reading dictionaries.",LogLevelType.LIMITED_OUTPUT,configuration);
            srcDict = null;
            trgDict = null;
            srcToTrgDict = null;
            trgToSrcDict = null;
            string srcLangKey = srcLang+"_en";
            string trgLangKey = trgLang+"_en";
            string langKey = srcLang+"_"+trgLang;
            string langKey2 = trgLang+"_"+srcLang;
            //Read dictionaries. If reading fails, log a warning and continue.
            //At first we check if the EN interlingua should be used.
            if (configuration.forceEnDictInterlingua && configuration.dictConfEntryDict.ContainsKey(srcLangKey)&&configuration.dictConfEntryDict.ContainsKey(trgLangKey))
            {
                if (configuration.dictConfEntryDict[srcLangKey].use && configuration.dictConfEntryDict[trgLangKey].use)
                {
                    try{
                        srcDict = ProbabilisticDictionaryParser.ParseDictionary(configuration.dictConfEntryDict[srcLangKey]);
                        trgDict = ProbabilisticDictionaryParser.ParseDictionary(configuration.dictConfEntryDict[trgLangKey]);
                        ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[srcLangKey],srcDict);
                        ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[trgLangKey],trgDict);
                        Log.Write (srcLangKey+" dictionary with "+srcDict.Count.ToString()+" "+srcLang+" entries loaded: "+configuration.dictConfEntryDict[srcLangKey].path, LogLevelType.LIMITED_OUTPUT,configuration);
                        Log.Write (trgLangKey+" dictionary with "+trgDict.Count.ToString()+" "+trgLang+" entries loaded: "+configuration.dictConfEntryDict[trgLangKey].path, LogLevelType.LIMITED_OUTPUT,configuration);
                        return true;//Interlingua dictionary used.
                    }
                    catch{
                        srcDict = null;
                        trgDict = null;
                        Log.Write ("Cannot force EN interlingua dictionary usage for the pair "+langKey+" as one of the interlingua dictionaries may be missing or corrupt!",LogLevelType.WARNING,configuration);
                        Log.Write ("Will try fallback to direct dictionary without the EN interlingua.",LogLevelType.WARNING,configuration);
                    }
                }
                else
                {
                    Log.Write ("Cannot force EN interlingua dictionary usage for the pair "+langKey+" as at least one of the interlingua dictionaries is disabled!",LogLevelType.WARNING,configuration);
                    Log.Write ("Will try fallback to direct dictionary without the EN interlingua.",LogLevelType.WARNING,configuration);
                }
            }
            //If EN interlingua should not be used or one of the interlingua dictionaries is missing, try loading the direct dictionary.
            if (configuration.dictConfEntryDict.ContainsKey(langKey)&&configuration.dictConfEntryDict[langKey].use)
            {
                srcDict = null;
                trgDict = null;
                try{
                    srcToTrgDict = ProbabilisticDictionaryParser.ParseDictionary(configuration.dictConfEntryDict[langKey]);
                    ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[langKey],srcToTrgDict);
                    if (configuration.dictConfEntryDict.ContainsKey(langKey2)&&configuration.dictConfEntryDict[langKey2].use)
                    {
                        try
                        {
                            trgToSrcDict = ProbabilisticDictionaryParser.ParseDictionary(configuration.dictConfEntryDict[langKey2]);
                            ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[langKey2],trgToSrcDict);
                            Log.Write (langKey + " dictionary with "+srcToTrgDict.Count.ToString()+" "+srcLang+" entries loaded: "+ configuration.dictConfEntryDict[langKey].path,LogLevelType.LIMITED_OUTPUT,configuration);
                            Log.Write (langKey2 + " dictionary with "+trgToSrcDict.Count.ToString()+" "+trgLang+" entries loaded: "+ configuration.dictConfEntryDict[langKey2].path,LogLevelType.LIMITED_OUTPUT,configuration);
                            return false;
                        }
                        catch
                        {
                            Log.Write ("Cannot read the dictionary for the pair "+langKey2+"! The dictionary may be missing or corrupt! The "+langKey+" dictionary will be inverted.",LogLevelType.WARNING,configuration);
                            trgToSrcDict = GetInverseDictionary(srcToTrgDict, configuration.dictConfEntryDict[langKey]);
                            ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[langKey],trgToSrcDict);
                            Log.Write (langKey + " dictionary with "+srcToTrgDict.Count.ToString()+" "+srcLang+" entries loaded: "+ configuration.dictConfEntryDict[langKey].path,LogLevelType.LIMITED_OUTPUT,configuration);
                            Log.Write (langKey2 + " dictionary with "+trgToSrcDict.Count.ToString()+" "+trgLang+" entries loaded (inverse of): "+ configuration.dictConfEntryDict[langKey].path,LogLevelType.LIMITED_OUTPUT,configuration);
                            return false;
                        }
                    }
                    else
                    {
                        Log.Write ("For the pair "+langKey2+" the inverted "+langKey+" dictionary will be used.",LogLevelType.WARNING,configuration);
                        trgToSrcDict = GetInverseDictionary(srcToTrgDict, configuration.dictConfEntryDict[langKey]);
                        ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[langKey],trgToSrcDict);
                        Log.Write (langKey + " dictionary with "+srcToTrgDict.Count.ToString()+" "+srcLang+" entries loaded: "+ configuration.dictConfEntryDict[langKey].path,LogLevelType.LIMITED_OUTPUT,configuration);
                        Log.Write (langKey2 + " dictionary with "+trgToSrcDict.Count.ToString()+" "+trgLang+" entries loaded (inverse of): "+ configuration.dictConfEntryDict[langKey].path,LogLevelType.LIMITED_OUTPUT,configuration);
                        return false;
                    }
                }
                catch{ //If a dictionary for a language pair is not given (nor is interlingua usage specified, the system will not use a dictionary at all).
                    srcToTrgDict = null;
                    trgToSrcDict = null;
                    Log.Write ("Cannot read the dictionary for the pair "+langKey+"! The dictionary may be missing or corrupt! The system will try to fall back to the inverse dictionary!",LogLevelType.WARNING,configuration);
                }
            }
            if (configuration.dictConfEntryDict.ContainsKey(langKey2)&&configuration.dictConfEntryDict[langKey2].use)
            {
                Log.Write ("Direct dictionary for "+langKey+" missing or disabled. The "+langKey2+" will be used instead.",LogLevelType.WARNING,configuration);
                srcDict = null;
                trgDict = null;
                try
                {
                    trgToSrcDict = ProbabilisticDictionaryParser.ParseDictionary(configuration.dictConfEntryDict[langKey2]);
                    srcToTrgDict = GetInverseDictionary(trgToSrcDict, configuration.dictConfEntryDict[langKey2]);
                    ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[langKey2],srcToTrgDict);
                    ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[langKey2],trgToSrcDict);
                    Log.Write (langKey + " dictionary with "+srcToTrgDict.Count.ToString()+" "+srcLang+" entries loaded: " + configuration.dictConfEntryDict[langKey2].path, LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write (langKey2 + " dictionary with "+trgToSrcDict.Count.ToString()+" "+trgLang+" entries loaded (inverse of): " + configuration.dictConfEntryDict[langKey2].path, LogLevelType.LIMITED_OUTPUT,configuration);
                    return false;
                }
                catch
                {
                    Log.Write ("Cannot read the dictionary for the pair "+langKey2+"! The dictionary may be disabled, missing or corrupt. The system will try to fall back to interlingua dictionaries!",LogLevelType.WARNING,configuration);
                }
            }

            bool usingInterlingua = false;
            if (configuration.dictConfEntryDict.ContainsKey(srcLangKey)&&configuration.dictConfEntryDict[srcLangKey].use)
            {
                try{
                    srcDict = ProbabilisticDictionaryParser.ParseDictionary(configuration.dictConfEntryDict[srcLangKey]);
                    ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[srcLangKey],srcDict);
                    Log.Write (srcLangKey+" dictionary with "+srcDict.Count.ToString()+" "+srcLang+" entries loaded: " + configuration.dictConfEntryDict[srcLangKey].path, LogLevelType.LIMITED_OUTPUT,configuration);
                    //Interlingua dictionary used.
                    usingInterlingua=true;
                }
                catch{
                    srcDict = null;
                    Log.Write ("Source-to-EN dictionary is missing or corrupt!",LogLevelType.WARNING,configuration);
                }
            }

            if (configuration.dictConfEntryDict.ContainsKey(trgLangKey)&&configuration.dictConfEntryDict[trgLangKey].use)
            {
                try{
                    trgDict = ProbabilisticDictionaryParser.ParseDictionary(configuration.dictConfEntryDict[trgLangKey]);
                    ProbabilisticDictionaryParser.FilterTopEquivalents(configuration.dictConfEntryDict[trgLangKey],trgDict);
                    Log.Write (trgLangKey+" dictionary with "+trgDict.Count.ToString()+" "+trgLang+" entries loaded: " + configuration.dictConfEntryDict[trgLangKey].path, LogLevelType.LIMITED_OUTPUT,configuration);
                    //Interlingua dictionary used.
                    usingInterlingua=true;
                }
                catch{
                    trgDict = null;
                    Log.Write ("Target-to-EN dictionary is missing or corrupt!",LogLevelType.WARNING,configuration);
                }
            }

            if (usingInterlingua)
            {
                return true;
            }

            else //If the direct dictionary does not exist, log a warning and continue.
            {
                Log.Write ("At least one of the EN interlingua dictionaries is missing!",LogLevelType.WARNING,configuration);
            }
            Log.Write ("Dictionaries for the pair "+langKey+" were not found or loaded. The system will be executed without a dictionary!",LogLevelType.WARNING,configuration);
            srcDict = null;
            trgDict = null;
            srcToTrgDict = null;
            trgToSrcDict = null;
            return false;//Interlingua dictionary not used.
        }
Example #10
0
        public static void Main(string[] args)
        {
            string configFile = null;
            string method = null;
            string inputFile = null;
            string inputFormat = "tagged_plaintext";//Allowed values: tagged_plaintext, preprocessed_terms, term_list
            string srcInputFile = null;
            string trgInputFile = null;
            string srcLang = null;
            string trgLang = null;
            string outputFile = null;
            string consolidatedOutputFile = null;
            string outputFormat = "";//"tabsep";//Allowed values: ref_tabsep, tabsep, xml
            string preProcessedTermOutputFile = null;//"/home/marcis/Dropbox/MonoProjects/MPAligner/MPAligner/bin/Debug/testTermData.xml";//null;
            string tempTranslitFile = null;
            bool consolidateResults = false;
            double consolidationThreshold = 0;
            //bool logPrepData = false;
            string domainId = "";
            string collectionId = "";
            //The skipping parameters are just for debugging. Use them only manually!
            string skipSrc = "";
            string skipTrg = "";
            MPAlignerConfiguration configuration = null;
            //Read all configuration parameters from the command line.
            for (int i=0; i<args.Length; i++) {
                if ((args [i] == "-c" || args [i] == "--configuration") && args.Length > i + 1) {
                    configFile = args [i + 1];
                    configuration = new MPAlignerConfiguration ();
                    configuration.Load (configFile);
                } else if ((args [i] == "-m" || args [i] == "--method") && args.Length > i + 1) {
                    method = args [i + 1];
                } else if ((args [i] == "-i" || args [i] == "--input-file") && args.Length > i + 1) {
                    inputFile = args [i + 1];
                    //} else if (args [i] == "-lp" || args [i] == "--log-pre-processed") {
                    //	logPrepData = true;
                } else if ((args [i] == "-if" || args [i] == "--input-format") && args.Length > i + 1) {
                    inputFormat = args [i + 1];
                } else if ((args [i] == "-si" || args [i] == "--source-input") && args.Length > i + 1) {
                    srcInputFile = args [i + 1];
                } else if ((args [i] == "-ti" || args [i] == "--target-input") && args.Length > i + 1) {
                    trgInputFile = args [i + 1];
                } else if ((args [i] == "-sl" || args [i] == "--source-language") && args.Length > i + 1) {
                    srcLang = MPFramework.MPFrameworkFunctions.GetValidLangString (args [i + 1]);
                } else if ((args [i] == "-tl" || args [i] == "--target-language") && args.Length > i + 1) {
                    trgLang = MPFramework.MPFrameworkFunctions.GetValidLangString (args [i + 1]);
                } else if ((args [i] == "-o" || args [i] == "--output-file") && args.Length > i + 1) {
                    outputFile = args [i + 1];
                } else if ((args [i] == "-of" || args [i] == "--output-format") && args.Length > i + 1) {
                    outputFormat = args [i + 1];
                } else if ((args [i] == "-pto" || args [i] == "--pre-processed-term-output-file") && args.Length > i + 1) {
                    preProcessedTermOutputFile = args [i + 1];
                } else if ((args [i] == "-ttf" || args [i] == "--temp-translit-file") && args.Length > i + 1) {
                    tempTranslitFile = args [i + 1];
                } else if ((args [i] == "-ss" || args [i] == "--skip-source-file") && args.Length > i + 1) {
                    skipSrc = args [i + 1];
                } else if ((args [i] == "-st" || args [i] == "--skip-target-file") && args.Length > i + 1) {
                    skipTrg = args [i + 1];
                } else if ((args [i] == "-d_id" || args [i] == "--domain-id") && args.Length > i + 1) {
                    domainId = args [i + 1];
                } else if ((args [i] == "-c_id" || args [i] == "--collection-id") && args.Length > i + 1) {
                    collectionId = args [i + 1];
                } else if ((args [i] == "-ct" || args [i] == "--consolidation-threshold") && args.Length > i + 1) {
                    //Consolidation works only if the ref_tabsep output format is specified!
                    NumberFormatInfo nfi = new NumberFormatInfo ();
                    nfi.CurrencyDecimalSeparator = ".";
                    nfi.NumberDecimalSeparator = ".";
                    nfi.PercentDecimalSeparator = ".";
                    consolidationThreshold = Convert.ToDouble (args [i + 1], nfi);
                    consolidateResults = true;
                }
            }
            //Break if a method is not defined.
            if (string.IsNullOrWhiteSpace (method)) {
                Log.Write ("Method not specified!",LogLevelType.ERROR,configuration);
                PrintUsage ();
                return;
            }
            //Write a configuration file to the output file if the config method is specified.
            if (method.ToLower () == "config") {
                if (string.IsNullOrWhiteSpace (outputFile)) {
                    Log.Write("Output file not specified!",LogLevelType.ERROR,configuration);
                    PrintUsage ();
                    return;
                }
                MPAlignerConfiguration conf = new MPAlignerConfiguration ();
                MPAlignerConfigurationDictEntry cde = new MPAlignerConfigurationDictEntry ();
                cde.srcLang = "lv";
                cde.trgLang = "en";
                cde.path = "/home/marcis/TILDE/RESOURCES/DICT/lv_en_noisy";
                conf.dictConfEntryDict.Add ("lv_en", cde);
                cde = new MPAlignerConfigurationDictEntry ();
                cde.srcLang = "lt";
                cde.trgLang = "en";
                cde.path = "/home/marcis/TILDE/RESOURCES/DICT/lt_en";
                conf.dictConfEntryDict.Add ("lt_en", cde);
                MPAlignerConfigurationTranslEntry cte = new MPAlignerConfigurationTranslEntry ();
                cte.mosesIniPath = "/home/marcis/TILDE/RESOURCES/TRANSLIT_WORKING_DIR/LV-EN/lv-en-binarised-model.moses.ini";
                cte.srcLang = "lv";
                cte.trgLang = "en";
                conf.translConfEntryDict.Add ("lv_en", cte);
                cte = new MPAlignerConfigurationTranslEntry ();
                cte.mosesIniPath = "/home/marcis/TILDE/RESOURCES/TRANSLIT_WORKING_DIR/LV-EN/lt-en-binarised-model.moses.ini";
                cte.srcLang = "lt";
                cte.trgLang = "en";
                conf.translConfEntryDict.Add ("lt_en", cte);
                MPAlignerConfigurationLangPairEntry lpe = new MPAlignerConfigurationLangPairEntry ();
                lpe.srcLang = "lv";
                lpe.trgLang = "en";
                conf.langPairEntryDict.Add ("lv_en", lpe);
                lpe = new MPAlignerConfigurationLangPairEntry ();
                lpe.srcLang = "lt";
                lpe.trgLang = "en";
                conf.langPairEntryDict.Add ("lt_en", lpe);
                MPAlignerConfigurationExceptionEntry cee = new MPAlignerConfigurationExceptionEntry ();
                cee.srcLang = "lv";
                cee.trgLang = "en";
                cee.path = "/home/marcis/TILDE/RESOURCES/EXC_DICT/lv_en_exc";
                conf.excDictEntryDict.Add ("lv_en", cee);
                cee = new MPAlignerConfigurationExceptionEntry ();
                cee.srcLang = "lt";
                cee.trgLang = "en";
                cee.path = "/home/marcis/TILDE/RESOURCES/EXC_DICT/lt_en_exc";
                conf.excDictEntryDict.Add ("lt_en", cee);
                MPAlignerConfigurationStopWordListEntry cswle = new MPAlignerConfigurationStopWordListEntry ();
                cswle.lang = "lv";
                cswle.path = "/home/marcis/TILDE/RESOURCES/STOP_WORD/lv_stop";
                conf.stopWordListEntryDict.Add ("lv", cswle);
                cswle = new MPAlignerConfigurationStopWordListEntry ();
                cswle.lang = "lt";
                cswle.path = "/home/marcis/TILDE/RESOURCES/STOP_WORD/lt_stop";
                conf.stopWordListEntryDict.Add ("lt", cswle);
                cswle = new MPAlignerConfigurationStopWordListEntry ();
                cswle.lang = "en";
                cswle.path = "/home/marcis/TILDE/RESOURCES/STOP_WORD/en_stop";
                conf.stopWordListEntryDict.Add ("en", cswle);
                conf.Save (outputFile);
                return;
            }

            //Try reading the default configuration if none is passed, but if the default configuration can not be found, break.
            if (string.IsNullOrWhiteSpace (configFile) && File.Exists ("MPAlignerConfig.xml")) {
                configuration = new MPAlignerConfiguration ();
                configuration.Load (configFile);
            } else if (string.IsNullOrWhiteSpace (configFile)) {
                Log.Write("Configuration file missing in application directory and a substitution runtime configuration file is not specified!",LogLevelType.ERROR,configuration);
                PrintUsage ();
                return;
            }

            //In the case if an output format is not defined in the command line, read it from the configuration file.
            if (string.IsNullOrWhiteSpace (outputFormat))
                outputFormat = configuration.outputFormat;

            //In the case if the configuration does not specify an output format, use the default output format.
            if (string.IsNullOrWhiteSpace (outputFormat)) {
                outputFormat = "ref_tabsep";
            }

            Log.confLogLevel = configuration.logLevel;

            if (string.IsNullOrWhiteSpace (tempTranslitFile)) {
                tempTranslitFile = outputFile+".tmp";
            }

            Log.Write ("configFile: "+(configFile!=null?configFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("method: "+(method!=null?method:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("inputFile: "+(inputFile!=null?inputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("inputFormat: "+(inputFormat!=null?inputFormat:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("srcInputFile: "+(srcInputFile!=null?srcInputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("trgInputFile: "+(trgInputFile!=null?trgInputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("srcLang: "+(srcLang!=null?srcLang:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("trgLang: "+(trgLang!=null?trgLang:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("outputFile: "+(outputFile!=null?outputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("outputFormat: "+(outputFormat!=null?outputFormat:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("preProcessedTermOutputFile: "+(preProcessedTermOutputFile!=null?preProcessedTermOutputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("tempTranslitFile: "+(tempTranslitFile!=null?tempTranslitFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("consolidation threshold: "+(consolidateResults?consolidationThreshold.ToString():""),LogLevelType.LIMITED_OUTPUT,configuration);

            if (outputFormat == "ref_tabsep" && consolidateResults) {
                consolidatedOutputFile = outputFile;
                outputFile += ".raw";
            }

            //For document pair-based alignment.
            if (method.ToLower () == "taggedfilepairs") {
                char[] sep = {'\t'};
                if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                {
                    Log.Write("Input file list file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }
                if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                {
                    Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }
                //Read the alignment thresholds and other language pair specific numerical/single-value parameters.
                MPAlignerConfigurationLangPairEntry lpeConf = ReadLangPairConfig (srcLang, trgLang, configuration);
                //The size of the cache may affect the performance of the alignment!
                Dictionary<string, ProcessedTermEntry> srcTermCache = new Dictionary<string, ProcessedTermEntry>();
                Dictionary<string, ProcessedTermEntry> trgTermCache = new Dictionary<string, ProcessedTermEntry>();

                bool interlinguaDictUsed = false;
                bool interlinguaTranslitUsed = false;

                //Define dictionaries for pre-processing.
                Dictionary<string, Dictionary<string, double>> srcDict = null;
                Dictionary<string, Dictionary<string, double>> trgDict = null;
                Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                //Define transliteration configurations for pre-processing.
                MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                //Read dictionaries and transliterations.
                interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                //Define the alignments (the variable holding alignment results)
                Dictionary<string,Dictionary<string, AlignmentInfoElement>> alignments = new Dictionary<string, Dictionary<string, AlignmentInfoElement>>();

                //Define and read exception dictionaries.
                Dictionary<string, Dictionary<string, bool>> excDict = null;
                ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                //Define and read stopword lists.
                Dictionary<string,bool> srcStopWords = null;
                ReadStopwordList(configuration,srcLang,out srcStopWords);
                Dictionary<string,bool> trgStopWords = null;
                ReadStopwordList(configuration,trgLang,out trgStopWords);

                StreamReader sr = new StreamReader(inputFile,Encoding.UTF8);
                int pairCounter = 0;
                bool skip = !string.IsNullOrWhiteSpace(skipSrc)&&!string.IsNullOrWhiteSpace(skipTrg)?true:false;

                //Read input document alignment file and process file pairs.
                while(!sr.EndOfStream)
                {
                    pairCounter++;
                    string line = sr.ReadLine().Trim();
                    if (string.IsNullOrWhiteSpace(line)) continue;
                    string[] arr = line.Split(sep, StringSplitOptions.RemoveEmptyEntries);
                    if (arr.Length<2)
                    {
                        continue; //If the alignment line does not contain at least two entries, the document alignment is not valid.
                    }
                    string srcFile = arr[0];
                    string trgFile = arr[1];
                    if (!File.Exists(srcFile))
                    {
                        Log.Write("Input file \""+srcFile+"\" cannot be found!",LogLevelType.WARNING,configuration);
                        continue;
                    }
                    if (!File.Exists(trgFile))
                    {
                        Log.Write("Input file \""+trgFile+"\" cannot be found!",LogLevelType.WARNING,configuration);
                        continue;
                    }
                    string srcFileName = Path.GetFileName(srcFile);
                    string trgFileName = Path.GetFileName(trgFile);

                    //The skipping condition is for debugging - if the system crashes due to insufficient memory...
                    if (skip)
                    {
                        if (srcFileName==skipSrc&&trgFileName == skipTrg)
                        {
                            skip = false;
                        }
                        else
                        {
                            Log.Write("Skipping file pair "+srcFileName+" and " + trgFileName+".",LogLevelType.WARNING,configuration);
                            continue;
                        }
                    }

                    Log.Write("Processing file pair "+srcFileName+" and " + trgFileName+".",LogLevelType.LIMITED_OUTPUT,configuration);

                    //Define term entry data variables (used for sotring terms in pre-pre-processed and pre-processed states).
                    Dictionary<string,SimpleTermEntry> srcInitialList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> trgInitialList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> srcInitialTempList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> trgInitialTempList = new Dictionary<string, SimpleTermEntry>();

                    Dictionary<string, ProcessedTermEntry> srcTermList = new Dictionary<string, ProcessedTermEntry>();
                    Dictionary<string, ProcessedTermEntry> trgTermList = new Dictionary<string, ProcessedTermEntry>();
                    Dictionary<string, ProcessedTermEntry> srcTermTempList = new Dictionary<string, ProcessedTermEntry>();
                    Dictionary<string, ProcessedTermEntry> trgTermTempList = new Dictionary<string, ProcessedTermEntry>();

                    //Two input formats are currently supported - term-tagged plaintext files and term list (one term per line) files.
                    if (inputFormat=="tagged_plaintext")
                    {
                        //Read terms from the term-tagged documents.
                        srcInitialTempList = TermTaggedFileParser.ParseTermTaggedFile(srcFile,Encoding.UTF8, configuration.concLen);
                        trgInitialTempList = TermTaggedFileParser.ParseTermTaggedFile(trgFile,Encoding.UTF8, configuration.concLen);
                    }
                    else
                    {
                        //Read terms from the term list files.
                        srcInitialTempList = ListFileParser.Parse(srcFile,Encoding.UTF8);
                        trgInitialTempList = ListFileParser.Parse(trgFile,Encoding.UTF8);
                    }

                    //Search for already pre-processed source terms in the cache.
                    foreach(string term in srcInitialTempList.Keys)
                    {
                        string lower = term.ToLower();
                        if (srcTermCache.ContainsKey(lower))
                        {
                            if (!srcTermList.ContainsKey(lower)) srcTermList.Add(lower, srcTermCache[lower]);
                        }
                        else
                        {
                            srcInitialList.Add(term, srcInitialTempList[term]);
                        }
                    }

                    //Search for already pre-processed target terms in the cache.
                    foreach(string term in trgInitialTempList.Keys)
                    {
                        string lower = term.ToLower();
                        if (trgTermCache.ContainsKey(lower))
                        {
                            if (!trgTermList.ContainsKey(lower)) trgTermList.Add(lower, trgTermCache[lower]);
                        }
                        else
                        {
                            trgInitialList.Add(term, trgInitialTempList[term]);
                        }
                    }

                    //Now pre-process terms that have not been pre-processed again.
                    if (srcDict!=null||trgDict!=null)
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    else
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }

                    //Update the pre-processed term list for alignment.
                    foreach(string srcTerm in srcTermTempList.Keys)
                    {
                        if (!srcTermList.ContainsKey(srcTerm)) srcTermList.Add(srcTerm,srcTermTempList[srcTerm]);
                        if (!srcTermCache.ContainsKey(srcTerm)) srcTermCache.Add(srcTerm, srcTermTempList[srcTerm]);
                    }

                    foreach(string trgTerm in trgTermTempList.Keys)
                    {
                        if (!trgTermList.ContainsKey(trgTerm)) trgTermList.Add(trgTerm,trgTermTempList[trgTerm]);
                        if (!trgTermCache.ContainsKey(trgTerm)) trgTermCache.Add(trgTerm, trgTermTempList[trgTerm]);
                    }

                    //Execute alignment for one file pair.
                    List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                    //The execution may be multi-threaded or single-threaded. The multi-threaded execution may be instable. Therefore, be careful when using multi-threading.
                    if (configuration.useMultiThreadedExecution)
                    {
                        alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcFile, trgFile, excDict, srcStopWords, trgStopWords);
                    }
                    else
                    {
                        alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcFile, trgFile, excDict, srcStopWords, trgStopWords);
                    }
                    if (alignment!=null)
                    {
                        foreach(AlignmentInfoElement aie in alignment)
                        {
                            if (!alignments.ContainsKey(aie.srcEntry.lowercaceForm))
                            {
                                alignments.Add(aie.srcEntry.lowercaceForm, new Dictionary<string, AlignmentInfoElement>());
                            }
                            if (!alignments[aie.srcEntry.lowercaceForm].ContainsKey(aie.trgEntry.lowercaceForm))
                            {
                                alignments[aie.srcEntry.lowercaceForm].Add(aie.trgEntry.lowercaceForm, aie);
                            }
                        }
                    }
                    //If pre-processed term cache is full, empty it (this maybe can be imrpoved with the help of some sort of a flowing cache (always circulating).
                    if (srcTermCache.Count>50000)
                    {
                        srcTermCache.Clear();
                        srcTermCache = new Dictionary<string, ProcessedTermEntry>();
                        GC.Collect();
                        GC.WaitForPendingFinalizers();
                    }
                    if (trgTermCache.Count>50000)
                    {
                        trgTermCache.Clear();
                        trgTermCache = new Dictionary<string, ProcessedTermEntry>();
                        GC.Collect();
                        GC.WaitForPendingFinalizers();
                    }
                    //After each 50 pairs, print rsults.
                    if (pairCounter%50==0||alignments.Count>50000)
                    {
                        Log.Write("Printing intermediate results after "+pairCounter.ToString()+" file pairs",LogLevelType.LIMITED_OUTPUT,configuration);
                        List<AlignmentInfoElement> resAlignment = new List<AlignmentInfoElement>();
                        foreach(string src in alignments.Keys)
                        {
                            foreach(string trg in alignments[src].Keys)
                            {
                                resAlignment.Add(alignments[src][trg]);
                            }
                        }
                        AlignmentInfoElement.AppendList(outputFormat,outputFile,resAlignment,lpeConf,srcLang,trgLang,collectionId,domainId);
                        alignments.Clear();
                        alignments = new Dictionary<string, Dictionary<string, AlignmentInfoElement>>();
                        GC.Collect();
                        GC.WaitForPendingFinalizers();
                    }
                }
                sr.Close();
                //If there are alignments left, write them to the output file.
                if (!string.IsNullOrWhiteSpace(outputFile))
                {
                    Log.Write("Printing final results after "+pairCounter.ToString()+" file pairs",LogLevelType.LIMITED_OUTPUT,configuration);
                    List<AlignmentInfoElement> resAlignment = new List<AlignmentInfoElement>();
                    foreach(string src in alignments.Keys)
                    {
                        foreach(string trg in alignments[src].Keys)
                        {
                            resAlignment.Add(alignments[src][trg]);
                        }
                    }
                    AlignmentInfoElement.AppendList(outputFormat,outputFile,resAlignment,lpeConf,srcLang,trgLang,collectionId,domainId);
                }
            }
            else if (method.ToLower () == "singletaggedpair") //TODO: REFACTOR (the file pair list processing could be handled (wisely) through a single file pair processing method!!!
            {
                //Define the instances of source and target processed term lists.
                Dictionary<string, ProcessedTermEntry> srcTermList = new Dictionary<string, ProcessedTermEntry>();
                Dictionary<string, ProcessedTermEntry> trgTermList = new Dictionary<string, ProcessedTermEntry>();
                bool interlinguaDictUsed = false;
                bool interlinguaTranslitUsed = false;

                if (inputFormat=="preprocessed_terms")
                {
                    if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                    {
                        Log.Write("Pre-processed term input file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    PreprocessedTermData ptd = PreprocessedTermData.ReadFromFile(inputFile);
                    foreach(ProcessedTermEntry pte in ptd.srcTerms)
                    {
                        if(!srcTermList.ContainsKey(pte.lowercaceForm))
                        {
                            srcTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                    foreach(ProcessedTermEntry pte in ptd.trgTerms)
                    {
                        if(!trgTermList.ContainsKey(pte.lowercaceForm))
                        {
                            trgTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                    srcLang = ptd.srcLang;
                    trgLang = ptd.trgLang;
                    interlinguaDictUsed = ptd.interlinguaDictUsed;
                    interlinguaTranslitUsed = ptd.interlinguaTranslitUsed;

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                        if (configuration.useMultiThreadedExecution)
                        {
                            alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        else
                        {
                            alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
                else if (inputFormat=="term_list"||inputFormat=="tagged_plaintext")
                {
                    if (string.IsNullOrWhiteSpace(srcInputFile)||!File.Exists(srcInputFile)||string.IsNullOrWhiteSpace(trgInputFile)||!File.Exists(trgInputFile))
                    {
                        Log.Write("Source and/or target files not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }

                    Dictionary<string,SimpleTermEntry> srcInitialList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> trgInitialList = new Dictionary<string, SimpleTermEntry>();

                    if (inputFormat=="tagged_plaintext")
                    {
                        srcInitialList = TermTaggedFileParser.ParseTermTaggedFile(srcInputFile,Encoding.UTF8, configuration.concLen);
                        trgInitialList = TermTaggedFileParser.ParseTermTaggedFile(trgInputFile,Encoding.UTF8, configuration.concLen);
                    }
                    else
                    {
                        srcInitialList = ListFileParser.Parse(srcInputFile,Encoding.UTF8);
                        trgInitialList = ListFileParser.Parse(trgInputFile,Encoding.UTF8);
                    }
                    Log.Write ("Unprocessed source terms: "+srcInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Unprocessed target terms: "+trgInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Dictionary<string, Dictionary<string, double>> srcDict = null;
                    Dictionary<string, Dictionary<string, double>> trgDict = null;
                    Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                    Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                    MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                    interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                    interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                    if (srcDict!=null||trgDict!=null)
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    else
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    ///If pre-processed terms should be saved for future use an output format is created.
                    /// This functionality is not available for the file pair list-based processing.
                    if (!string.IsNullOrWhiteSpace(preProcessedTermOutputFile))
                    {
                        List<ProcessedTermEntry> srcTerms = new List<ProcessedTermEntry>(srcTermList.Values);
                        List<ProcessedTermEntry> trgTerms = new List<ProcessedTermEntry>(trgTermList.Values);
                        PreprocessedTermData ptd = new PreprocessedTermData();
                        ptd.interlinguaDictUsed = interlinguaDictUsed;
                        ptd.interlinguaTranslitUsed = interlinguaTranslitUsed;
                        ptd.srcTerms = srcTerms.ToArray();
                        ptd.trgTerms = trgTerms.ToArray();
                        ptd.srcLang = srcLang;
                        ptd.trgLang = trgLang;
                        string outStr = MPFramework.MPFrameworkFunctions.SerializeObjectInstance<PreprocessedTermData>(ptd);
                        File.WriteAllText(preProcessedTermOutputFile,outStr);
                    }

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                        if (configuration.useMultiThreadedExecution)
                        {
                            alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        else
                        {
                            alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
                else
                {
                    Log.Write ("Input format UNKNOWN or UNDEFINED.",LogLevelType.ERROR,configuration);
                    return;
                }
            }
            else if (method.ToLower () == "singletermpairlist") //Use this method only if filtering of term pairs or some sort of evaluation is necessary!
            {
                //Define the instances of source and target processed term lists.
                List<ProcessedTermEntry> srcTermList = new List<ProcessedTermEntry>();
                List<ProcessedTermEntry> trgTermList = new List<ProcessedTermEntry>();
                bool interlinguaDictUsed = false;
                bool interlinguaTranslitUsed = false;
                if (inputFormat=="preprocessed_terms")
                {
                    if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                    {
                        Log.Write("Pre-processed term input file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    PreprocessedTermData ptd = PreprocessedTermData.ReadFromFile(inputFile);
                    srcTermList.AddRange(ptd.srcTerms);
                    trgTermList.AddRange(ptd.trgTerms);
                    Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    srcLang = ptd.srcLang;
                    trgLang = ptd.trgLang;
                    interlinguaDictUsed = ptd.interlinguaDictUsed;
                    interlinguaTranslitUsed = ptd.interlinguaTranslitUsed;

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);
                    Log.Write ("Exception dictionary entries: "+excDict.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);
                    Log.Write ("Source language stopwords: "+srcStopWords.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);
                    Log.Write ("Target language stopwords: "+trgStopWords.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = Alignment.AlignListPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        Log.Write ("Alignment elements after alignment: "+alignment.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
                else
                {

                    if (string.IsNullOrWhiteSpace(srcInputFile)||!File.Exists(srcInputFile)||string.IsNullOrWhiteSpace(trgInputFile)||!File.Exists(trgInputFile))
                    {
                        Log.Write("Source and/or target files not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }

                    List<string> srcInitialList = new List<string>();
                    List<string> trgInitialList = new List<string>();
                    srcInitialList = ListFileParser.ParseList(srcInputFile,Encoding.UTF8);
                    trgInitialList = ListFileParser.ParseList(trgInputFile,Encoding.UTF8);
                    if (srcInitialList.Count!=trgInitialList.Count)
                    {
                        Log.Write("Source and target term lists are with different lengths",LogLevelType.ERROR,configuration);
                        throw new ArgumentException("Source and target term lists are with different lengths");
                    }
                    Log.Write ("Unprocessed source terms: "+srcInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Unprocessed target terms: "+trgInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Dictionary<string, Dictionary<string, double>> srcDict = null;
                    Dictionary<string, Dictionary<string, double>> trgDict = null;
                    Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                    Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                    MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                    interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                    interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                    if (srcDict!=null||trgDict!=null)
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    else
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    ///If pre-processed terms should be saved for future use an output format is created.
                    if (!string.IsNullOrWhiteSpace(preProcessedTermOutputFile))
                    {
                        PreprocessedTermData ptd = new PreprocessedTermData();
                        ptd.interlinguaDictUsed = interlinguaDictUsed;
                        ptd.interlinguaTranslitUsed = interlinguaTranslitUsed;
                        ptd.srcTerms = srcTermList.ToArray();
                        ptd.trgTerms = trgTermList.ToArray();
                        ptd.srcLang = srcLang;
                        ptd.trgLang = trgLang;

                        string outStr = MPFramework.MPFrameworkFunctions.SerializeObjectInstance<PreprocessedTermData>(ptd);
                        File.WriteAllText(preProcessedTermOutputFile,outStr);
                    }

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = Alignment.AlignListPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
            }
            else if (method.ToLower () == "eurovoceval")
            {
                if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                {
                    Log.Write("Eurovoc input file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }
                if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                {
                    Log.Write("Source or target language not specified!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }

                configuration.allowTrimmedAlignments = false;
                //configuration.useMultiThreadedExecution = false;
                configuration.printTopTrgForSrc = true;

                string logFile = outputFile+".res.log";
                StreamWriter sw = new StreamWriter(logFile, true, Encoding.UTF8);

                Dictionary<string,List<string>> eurovocDict = ReadEurovocDict(inputFile);;
                //List<string> langList = GetLangsFromConf(configuration);

                //for(int i = 0;i<langList.Count;i++)
                //{
                    //for(int j = 0;j<langList.Count;j++)
                    //{
                        //if (i==j) continue;
                        //srcLang = langList[i];
                        //trgLang = langList[j];
                Log.Write("Processing pair "+srcLang+"_"+trgLang,LogLevelType.LIMITED_OUTPUT,configuration);
                if (Char.IsDigit(outputFile[outputFile.Length-1])) outputFile = outputFile.Substring(0,outputFile.Length-1);
                string alignmentOutputFile = outputFile+"."+srcLang+"_"+trgLang+".align.txt";
                if (File.Exists(alignmentOutputFile))
                {
                    Log.Write("Pair "+srcLang+"_"+trgLang+" already processed! Evaluating...",LogLevelType.LIMITED_OUTPUT,configuration);
                    List<StringComparisonElement> terms = new List<StringComparisonElement>();
                    StreamReader sr = new StreamReader(alignmentOutputFile,Encoding.UTF8);
                    char[] sep = {'\t'};
                    NumberFormatInfo nfi = new NumberFormatInfo();
                    nfi.CurrencyDecimalSeparator=".";
                    nfi.NumberDecimalSeparator=".";
                    nfi.PercentDecimalSeparator=".";
                    while(!sr.EndOfStream)
                    {
                        string line = sr.ReadLine().Trim();
                        string[] arr = line.Split(sep,StringSplitOptions.None);
                        if (arr.Length>=3)
                        {
                            StringComparisonElement sce = new StringComparisonElement();
                            sce.src = arr[0];
                            sce.trg = arr[1];
                            sce.similarity = Convert.ToDouble(arr[2],nfi);
                            terms.Add(sce);
                        }
                    }
                    sr.Close();
                    terms.Sort();

                    List<double> scores = new List<double>();
                    double tmp = 0;
                    while (tmp<=1)
                    {
                        scores.Add(tmp);
                        tmp+=0.01;
                    }
                    List<double> correct = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        correct.Add(0);
                    }
                    List<double> total = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        total.Add(0);
                    }

                    int totalForRec = 0;
                    Dictionary<string,Dictionary<string,bool>> goldList = new Dictionary<string, Dictionary<string, bool>>();
                    for (int s = 0;s<eurovocDict[srcLang].Count;s++)
                    {
                        if (!eurovocDict[srcLang][s].Contains("(under translation)")&&!eurovocDict[trgLang][s].Contains("(under translation)"))
                        {
                            totalForRec++;
                            if (!goldList.ContainsKey(eurovocDict[srcLang][s].ToLower())) goldList.Add(eurovocDict[srcLang][s].ToLower(), new Dictionary<string,bool>());
                            if (!goldList[eurovocDict[srcLang][s].ToLower()].ContainsKey(eurovocDict[trgLang][s].ToLower())) goldList[eurovocDict[srcLang][s].ToLower()].Add(eurovocDict[trgLang][s].ToLower(),true);
                        }
                    }

                    string previousSrc = null;
                    foreach(StringComparisonElement sce in terms)
                    {
                        string currSrc = sce.src;
                        if (previousSrc!=currSrc.ToLower())
                        {
                            string src = sce.src.ToLower();
                            string trg = sce.trg.ToLower();
                            double alignScore = sce.similarity;
                            bool corr = false;
                            if (goldList.ContainsKey(src)&&goldList[src].ContainsKey(trg)) corr = true;
                            for (int s =0;s<scores.Count;s++)
                            {
                                if (scores[s]<=alignScore)
                                {
                                    if (corr) correct[s]++;
                                    total[s]++;
                                }
                            }
                            previousSrc = currSrc.ToLower();
                        }
                    }

                    for(int s=0;s<scores.Count;s++)
                    {
                        double corr = correct[s];
                        double tot = total[s];
                        double totCorr = totalForRec;
                        double prec = corr/tot*100;
                        double rec = corr/totCorr*100;
                        double f1 = prec*rec*2/(prec+rec);
                        sw.WriteLine(srcLang+"\t"+trgLang+"\t"+scores[s].ToString()+"\t"+corr.ToString()+"\t"+tot.ToString()+"\t"+totCorr.ToString()+"\t"+prec.ToString()+"\t"+rec.ToString()+"\t"+f1.ToString());
                    }
                    sw.Flush();
                    //}
                    //}
                    sw.Close();
                    //continue;
                    return;
                }
                string preprocessedOutputFile = outputFile+"."+srcLang+"_"+trgLang+".prep.txt";
                Dictionary<string,SimpleTermEntry> srcInitialList = StringListToDict(eurovocDict[srcLang]);
                Dictionary<string,SimpleTermEntry> trgInitialList = StringListToDict(eurovocDict[trgLang]);

                Log.Write ("Unprocessed source terms: "+srcInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                Log.Write ("Unprocessed target terms: "+trgInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                Dictionary<string, Dictionary<string, double>> srcDict = null;
                Dictionary<string, Dictionary<string, double>> trgDict = null;
                Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                bool interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                bool interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                Dictionary<string,ProcessedTermEntry> srcTermList = new Dictionary<string,ProcessedTermEntry>();
                Dictionary<string,ProcessedTermEntry> trgTermList = new Dictionary<string,ProcessedTermEntry>();

                if (File.Exists(preprocessedOutputFile))
                {
                    Log.Write("Preprocessed term data found! Reading pre-processed data to save time!", LogLevelType.WARNING,configuration);
                    PreprocessedTermData ptd1 = PreprocessedTermData.ReadFromFile(preprocessedOutputFile);
                    interlinguaDictUsed = ptd1.interlinguaDictUsed;
                    interlinguaTranslitUsed = ptd1.interlinguaTranslitUsed;
                    foreach(ProcessedTermEntry pte in ptd1.srcTerms)
                    {
                        if (!srcTermList.ContainsKey(pte.lowercaceForm))
                        {
                            srcTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                    foreach(ProcessedTermEntry pte in ptd1.trgTerms)
                    {
                        if (!trgTermList.ContainsKey(pte.lowercaceForm))
                        {
                            trgTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                }
                else if (interlinguaDictUsed&&interlinguaTranslitUsed)
                {
                    string dir = Path.GetDirectoryName(preprocessedOutputFile);
                    if (!dir.EndsWith(Path.DirectorySeparatorChar.ToString())) dir+=Path.DirectorySeparatorChar.ToString();
                    string prepSrcToTrgFile = dir+"eurovoc_preprocessed_"+srcLang+"_en.xml";
                    string prepTrgToSrcFile = dir+"eurovoc_preprocessed_"+trgLang+"_en.xml";
                    if (File.Exists(prepSrcToTrgFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+srcLang+"_en.xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        srcTermList = ProcessedTermEntry.ReadFromFile(prepSrcToTrgFile);
                    }
                    if (File.Exists(prepTrgToSrcFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+trgLang+"_en.xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        trgTermList = ProcessedTermEntry.ReadFromFile(prepTrgToSrcFile);
                    }
                }
                else if (!interlinguaDictUsed&&!interlinguaTranslitUsed)
                {
                    string dir = Path.GetDirectoryName(preprocessedOutputFile);
                    if (!dir.EndsWith(Path.DirectorySeparatorChar.ToString())) dir+=Path.DirectorySeparatorChar.ToString();
                    string prepSrcToTrgFile = dir+"eurovoc_preprocessed_"+srcLang+"_"+trgLang+".xml";
                    string prepTrgToSrcFile = dir+"eurovoc_preprocessed_"+trgLang+"_"+srcLang+".xml";
                    if (File.Exists(prepSrcToTrgFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+srcLang+"_"+trgLang+".xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        srcTermList = ProcessedTermEntry.ReadFromFile(prepSrcToTrgFile);
                    }
                    if (File.Exists(prepTrgToSrcFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+trgLang+"_"+srcLang+".xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        trgTermList = ProcessedTermEntry.ReadFromFile(prepTrgToSrcFile);
                    }
                }

                if (srcDict!=null||trgDict!=null)
                {
                    if (srcTranslitConf!=null && trgTranslitConf!=null)
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                    else
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                }
                else
                {
                    if (srcTranslitConf!=null && trgTranslitConf!=null)
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                    else
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                }
                Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                ///If pre-processed terms should be saved for future use an output format is created.

                List<ProcessedTermEntry> srcTerms = new List<ProcessedTermEntry>(srcTermList.Values);
                List<ProcessedTermEntry> trgTerms = new List<ProcessedTermEntry>(trgTermList.Values);
                PreprocessedTermData ptd = new PreprocessedTermData();
                ptd.interlinguaDictUsed = interlinguaDictUsed;
                ptd.interlinguaTranslitUsed = interlinguaTranslitUsed;
                ptd.srcTerms = srcTerms.ToArray();
                ptd.trgTerms = trgTerms.ToArray();
                ptd.srcLang = srcLang;
                ptd.trgLang = trgLang;

                string outStr = MPFramework.MPFrameworkFunctions.SerializeObjectInstance<PreprocessedTermData>(ptd);
                File.WriteAllText(preprocessedOutputFile,outStr);

                Dictionary<string, Dictionary<string, bool>> excDict = null;
                ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                Dictionary<string,bool> srcStopWords = null;
                ReadStopwordList(configuration,srcLang,out srcStopWords);

                Dictionary<string,bool> trgStopWords = null;
                ReadStopwordList(configuration,trgLang,out trgStopWords);

                //Need to pre-set the alignment thresholds, otherwise these will be overriden by defaults.
                MPAlignerConfigurationLangPairEntry lpeConf = ReadLangPairConfig (srcLang, trgLang, configuration);

                List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                if (configuration.useMultiThreadedExecution)
                {
                    alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                }
                else
                {
                    alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                }
                //Multi-threaded execution is not stable at the moment...
                //List<AlignmentInfoElement> alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                AlignmentInfoElement.PrintList(outputFormat, alignmentOutputFile, alignment, configuration.printTopTrgForSrc, lpeConf,srcLang,trgLang,collectionId,domainId);
                {
                    List<double> scores = new List<double>();
                    double tmp = 0;
                    while (tmp<=1)
                    {
                        scores.Add(tmp);
                        tmp+=0.01;
                    }
                    List<double> correct = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        correct.Add(0);
                    }
                    List<double> total = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        total.Add(0);
                    }

                    int totalForRec = 0;
                    Dictionary<string,Dictionary<string,bool>> goldList = new Dictionary<string, Dictionary<string, bool>>();
                    for (int s = 0;s<eurovocDict[srcLang].Count;s++)
                    {
                            if (!eurovocDict[srcLang][s].ToLower().Contains("(under translation)")&&!eurovocDict[trgLang][s].ToLower().Contains("(under translation)"))
                        {
                            totalForRec++;
                                if (!goldList.ContainsKey(eurovocDict[srcLang][s].ToLower())) goldList.Add(eurovocDict[srcLang][s].ToLower(), new Dictionary<string,bool>());
                                if (!goldList[eurovocDict[srcLang][s].ToLower()].ContainsKey(eurovocDict[trgLang][s].ToLower())) goldList[eurovocDict[srcLang][s].ToLower()].Add(eurovocDict[trgLang][s].ToLower(),true);
                        }
                    }

                    string previousSrc = null;
                    alignment.Sort();
                    foreach(AlignmentInfoElement aie in alignment)
                    {
                        string currSrc = AlignmentInfoElement.GetStrFromEntry(aie.srcEntry.surfaceFormWords, aie.minSrcId, aie.maxSrcId);
                        if (previousSrc!=currSrc.ToLower())
                        {
                            string src = aie.srcEntry.surfaceForm.ToLower();
                            string trg = aie.trgEntry.surfaceForm.ToLower();
                            double alignScore = aie.alignmentScore;
                            bool corr = false;
                            if (goldList.ContainsKey(src)&&goldList[src].ContainsKey(trg)) corr = true;
                            for (int s =0;s<scores.Count;s++)
                            {
                                if (scores[s]<=alignScore)
                                {
                                    if (corr) correct[s]++;
                                    total[s]++;
                                }
                            }
                            previousSrc = currSrc.ToLower();
                        }
                    }

                    for(int s=0;s<scores.Count;s++)
                    {
                        double corr = correct[s];
                        double tot = total[s];
                        double totCorr = totalForRec;
                        double prec = corr/tot*100;
                        double rec = corr/totCorr*100;
                        double f1 = prec*rec*2/(prec+rec);
                        sw.WriteLine(srcLang+"\t"+trgLang+"\t"+scores[s].ToString()+"\t"+corr.ToString()+"\t"+tot.ToString()+"\t"+totCorr.ToString()+"\t"+prec.ToString()+"\t"+rec.ToString()+"\t"+f1.ToString());
                    }
                    sw.Flush();
                    //}
                    //}
                    sw.Close();
                }
            }
            if (File.Exists(tempTranslitFile)) File.Delete(tempTranslitFile);
            if (consolidateResults) {
                Log.Write ("Consolidating aligned term pairs with a threshold of: "+consolidationThreshold.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                //In the case if -ct (consolidation threshold) was defined and the output format has been ref_tabsep, the consolidation of results is perfomed.
                ConsolidationElement.ConsolidateRefTabsep(outputFile, consolidatedOutputFile,consolidationThreshold);
            }
        }
Example #11
0
        public static List<AlignmentInfoElement> AlignListPairs(MPAlignerConfiguration configuration, List<ProcessedTermEntry> srcTermList, List<ProcessedTermEntry> trgTermList, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }

            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            for(int i=0;i< srcTermList.Count;i++)
            {
                ProcessedTermEntry srcPte = srcTermList[i];
                ProcessedTermEntry trgPte = trgTermList[i];
                if (srcPte!=null && trgPte!=null)
                {
                    AlignmentInfoElement aie = new AlignmentInfoElement();
                    List<WordAlignmentElement> srcToTrg = new List<WordAlignmentElement>();
                    List<WordAlignmentElement> trgToSrc = new List<WordAlignmentElement>();
                    maxStrLen = 0;

                    if (interlinguaDictUsed && interlinguaTranslitUsed)
                    {

                        ///Types:
                        /// 0 - dictionary,
                        /// 1 - simple translit,
                        /// 2 - target or source,
                        /// 3 - translit

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.transliterationList, srcToTrg, trgToSrc, 0, 3);

                        //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                        //Translation is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.translationList, srcToTrg, trgToSrc, 3, 0);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                        //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 3);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    else if (interlinguaTranslitUsed)
                    {
                        //Translation is in target language; SOURCE TRANSLATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                        //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 2);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        //Translation is in target language; SOURCE vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);
                    }
                    else if (interlinguaDictUsed)
                    {
                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                        //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                        //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    else
                    {
                        //Translation is in target language; SOURCE TRANSLATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                        //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                        //Translation is in target language; SOURCE vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);

                        //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    aie.srcToTrgAlignments = srcToTrg;
                    aie.trgToSrcAlignments = trgToSrc;

                    aie.srcEntry = srcPte;
                    aie.trgEntry = trgPte;

                    ConsolidateOverlaps(lpeConf,aie, excDict);
                    bool valid = CreateStrListsForEval(configuration,aie,srcStopWords,trgStopWords,false);
                    aie.alignmentScore = EvaluateAlignmentScore(lpeConf,aie);
                    //If you wish to debug the process, comment the lines below that clear the alignments...
                    aie.srcToTrgAlignments.Clear();
                    aie.trgToSrcAlignments.Clear();
                    aie.consolidatedAlignment.Clear();
                    aie.srcFile = srcFile;
                    aie.trgFile = trgFile;
                    res.Add(aie);
                }
            }
            return res;
        }
Example #12
0
        public static List<AlignmentInfoElement> AlignPairsMultiThreaded(MPAlignerConfiguration configuration, Dictionary<string, ProcessedTermEntry> srcTerms, Dictionary<string, ProcessedTermEntry> trgTerms, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }
            Log.Write ("Starting alignmet of "+ srcTerms.Count.ToString()+" "+srcLang+" and "+ trgTerms.Count.ToString()+" "+trgLang+" terms.",LogLevelType.LIMITED_OUTPUT);

            int threadCount = configuration.alignmentThreads;

            STPStartInfo stpStartInfo = new STPStartInfo();
            stpStartInfo.IdleTimeout = 100*1000;
            stpStartInfo.MaxWorkerThreads = 5*threadCount;
            stpStartInfo.MinWorkerThreads = threadCount;
            stpStartInfo.EnableLocalPerformanceCounters = true;

            SmartThreadPool smartThreadPool = new SmartThreadPool(stpStartInfo);

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }
            int counter = 0;
            //threadedAlignments = new List<AlignmentInfoElement>();

            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            Dictionary<string,Dictionary<string,bool>> alignedList = new Dictionary<string, Dictionary<string, bool>>();
            List<IWorkItemResult<AlignmentInfoElement>> wirList = new List<IWorkItemResult<AlignmentInfoElement>>(1000);
            _configuration = configuration;
            _interlinguaDictUsed=interlinguaDictUsed;
            _interlinguaTranslitUsed=interlinguaTranslitUsed;
            _srcFile=srcFile;
            _trgFile=trgFile;
            _excDict=excDict;
            _srcStopWords=srcStopWords;
            _trgStopWords=trgStopWords;
            _lpeConf=lpeConf;
            foreach(string srcTerm in srcTerms.Keys)
            {
                counter++;
                if (counter%50==0)
                {
                    Console.Write(".");
                    if (counter%1000==0)
                    {
                        Console.WriteLine(" - "+counter.ToString());
                    }
                }
                ProcessedTermEntry srcPte = srcTerms[srcTerm];
                foreach(string trgTerm in trgTerms.Keys)
                {
                    //List<Tuple<ProcessedTermEntry,ProcessedTermEntry>> unProcessed = new List<Tuple<ProcessedTermEntry, ProcessedTermEntry>>();
                    if (wirList.Count>=100000)
                    {
                        smartThreadPool.WaitForIdle();
                        for(int i=0;i<wirList.Count;i++)
                        {
                            if (wirList[i].IsCompleted && wirList[i].Exception==null)
                            {
                                AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                                if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                                {
                                    res.Add(aie);
                                    if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                    if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                                }
                            }
                            else if (!wirList[i].IsCompleted)
                            {
                                int times = 100;
                                while(!wirList[i].IsCompleted && times>0)
                                {
                                    times--;
                                    System.Threading.Thread.Sleep(100);
                                }
                                if (wirList[i].IsCompleted && wirList[i].Exception==null)
                                {
                                    AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                                    if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                                    {
                                        res.Add(aie);
                                        if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                        if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                                    }
                                }
                            }
                        }
                        wirList.Clear();
                    }
                    try
                    {
                        IWorkItemResult<AlignmentInfoElement> wir = smartThreadPool.QueueWorkItem(
                            new Amib.Threading.Func<ProcessedTermEntry, ProcessedTermEntry, AlignmentInfoElement>(AlignSingleTermPair), srcPte, trgTerms[trgTerm]);
                        if (wir!=null) wirList.Add(wir);
                    }
                    catch
                    {
                        Log.Write("Thread exception catched - cannot create a new thread within term alignment!", LogLevelType.WARNING);
                    }
                    //smartThreadPool
                    /*while(smartThreadPool.PerformanceCountersReader.WorkItemsQueued>=100)
                    {
                        System.Threading.Thread.Sleep(5);
                    }*/

                    //AlignmentInfoElement aie = AlignSingleTermPair (configuration, trgTerms[trgTerm], interlinguaDictUsed, interlinguaTranslitUsed, srcFile, trgFile, excDict, srcStopWords, trgStopWords, lpeConf, srcPte);
                    //if (aie!=null)
                    //{
                        //res.Add(aie);
                    //}
                }
            }
            //Console.WriteLine();
            if (wirList.Count>0)
            {
                smartThreadPool.WaitForIdle();
                for(int i=0;i<wirList.Count;i++)
                {
                    if (wirList[i].IsCompleted && wirList[i].Exception==null)
                    {
                        AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                        if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                        {
                            res.Add(aie);
                            if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                            if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                        }
                    }
                    else if (!wirList[i].IsCompleted)
                    {
                        int times = 100;
                        while(!wirList[i].IsCompleted && times>0)
                        {
                            times--;
                            System.Threading.Thread.Sleep(100);
                        }
                        if (wirList[i].IsCompleted && wirList[i].Exception==null)
                        {
                            AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                            if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                            {
                                res.Add(aie);
                                if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                            }
                        }
                    }
                }
                wirList.Clear();
            }
            try{
                smartThreadPool.Shutdown(true,100);
                smartThreadPool.Dispose();
                smartThreadPool = null;
                GC.Collect();
                GC.WaitForPendingFinalizers();
            }
            catch
            {
                try
                {
                    smartThreadPool.Shutdown(true,100);
                    smartThreadPool.Dispose();
                    smartThreadPool = null;
                    GC.Collect();
                    GC.WaitForPendingFinalizers();
                }
                catch
                {
                    GC.Collect();
                    GC.WaitForPendingFinalizers();
                }
            }
            Log.Write ("Alignmet finished - "+ res.Count.ToString()+" term pairs aligned over the alignment threshold " +lpeConf.finalAlignmentThr.ToString()+".\n",LogLevelType.LIMITED_OUTPUT);
            return res;
        }
Example #13
0
        public static List<AlignmentInfoElement> AlignPairs(MPAlignerConfiguration configuration, Dictionary<string, ProcessedTermEntry> srcTerms, Dictionary<string, ProcessedTermEntry> trgTerms, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }
            Log.Write ("Starting alignmet of "+ srcTerms.Count.ToString()+" "+srcLang+" and "+ trgTerms.Count.ToString()+" "+trgLang+" terms.",LogLevelType.LIMITED_OUTPUT);

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }
            int counter = 0;

            Dictionary<string,Dictionary<string,bool>> alignedList = new Dictionary<string, Dictionary<string, bool>>();
            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            foreach(string srcTerm in srcTerms.Keys)
            {
                counter++;
                if (counter%50==0)
                {
                    Console.Write(".");
                    if (counter%1000==0)
                    {
                        Console.WriteLine(" - "+counter.ToString());
                    }
                }
                ProcessedTermEntry srcPte = srcTerms[srcTerm];
                foreach(string trgTerm in trgTerms.Keys)
                {
                    ProcessedTermEntry trgPte = trgTerms[trgTerm];
                    if (srcPte!=null && trgPte!=null)
                    {
                        AlignmentInfoElement aie = new AlignmentInfoElement();
                        List<WordAlignmentElement> srcToTrg = new List<WordAlignmentElement>();
                        List<WordAlignmentElement> trgToSrc = new List<WordAlignmentElement>();
                        maxStrLen = 0;

                        if (interlinguaDictUsed && interlinguaTranslitUsed)
                        {

                            ///Types:
                            /// 0 - dictionary,
                            /// 1 - simple translit,
                            /// 2 - target,
                            /// 3 - translit

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.transliterationList, srcToTrg, trgToSrc, 0, 3);

                            //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                            //Translation is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.translationList, srcToTrg, trgToSrc, 3, 0);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                            //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 3);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        }
                        else if (interlinguaTranslitUsed)
                        {
                            //Translation is in target language; SOURCE TRANSLATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                            //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 2);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                            //Translation is in target language; SOURCE vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);
                        }
                        else if (interlinguaDictUsed)
                        {
                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                            //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                            //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                            //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        }
                        else
                        {
                            //Translation is in target language; SOURCE TRANSLATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                            //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                            //Translation is in target language; SOURCE vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);

                            //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        }
                        aie.srcToTrgAlignments = srcToTrg;
                        aie.trgToSrcAlignments = trgToSrc;
                        aie.srcToTrgAlignments.Sort(
                            delegate(WordAlignmentElement w1, WordAlignmentElement w2)
                            {
                                double avgW1Overlap = (w1.fromOverlap+w1.toOverlap)/2;
                                double avgW2Overlap = (w2.fromOverlap+w2.toOverlap)/2;
                                // Descending sort of toOverlap's if the
                                if (avgW1Overlap!=avgW2Overlap)
                                {
                                    return avgW2Overlap.CompareTo(avgW1Overlap);
                                }
                                if (w2.fromLen == w1.fromLen)
                                {
                                    if (w2.toOverlap==w1.toOverlap)
                                    {
                                        return w1.fromId.CompareTo(w2.fromId);
                                    }
                                    return w2.toOverlap.CompareTo(w1.toOverlap);
                                }
                                return w2.fromLen.CompareTo(w1.fromLen);
                            }
                        );
                        aie.trgToSrcAlignments.Sort(
                            delegate(WordAlignmentElement w1, WordAlignmentElement w2)
                            {
                                double avgW1Overlap = (w1.fromOverlap+w1.toOverlap)/2;
                                double avgW2Overlap = (w2.fromOverlap+w2.toOverlap)/2;
                                // Descending sort of toOverlap's if the
                                if (avgW1Overlap!=avgW2Overlap)
                                {
                                    return avgW2Overlap.CompareTo(avgW1Overlap);
                                }
                                // Descending sort of toOverlap's if the
                                if (w2.toLen == w1.toLen)
                                {
                                    if (w2.fromOverlap==w1.fromOverlap)
                                    {
                                        return w1.toId.CompareTo(w2.toId);
                                    }
                                    return w2.fromOverlap.CompareTo(w1.fromOverlap);
                                }
                                return w2.toLen.CompareTo(w1.toLen);
                            }
                        );

                        aie.srcEntry = srcPte;
                        aie.trgEntry = trgPte;

                        ConsolidateOverlaps(lpeConf,aie, excDict);
                        if(CreateStrListsForEval(configuration,aie,srcStopWords,trgStopWords))
                        {
                            if (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr))
                            {
                                if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);

                                aie.alignmentScore = EvaluateAlignmentScore(lpeConf,aie);
                                if (aie.alignmentScore>=lpeConf.finalAlignmentThr)
                                {
                                    //If you wish to debug the process, comment the lines below that clear the alignments...
                                    aie.srcToTrgAlignments.Clear();
                                    aie.trgToSrcAlignments.Clear();
                                    aie.consolidatedAlignment.Clear();
                                    aie.srcFile = srcFile;
                                    aie.trgFile = trgFile;
                                    res.Add(aie);
                                }
                            }
                        }
                    }
                }
            }
            Console.WriteLine(" - "+counter.ToString());
            Log.Write ("Alignmet finished - "+ res.Count.ToString()+" term pairs aligned over the alignment threshold " +lpeConf.finalAlignmentThr.ToString()+".\n",LogLevelType.LIMITED_OUTPUT);
            return res;
        }
Example #14
0
        public static bool CreateStrListsForEval(MPAlignerConfiguration conf, AlignmentInfoElement aie, Dictionary<string,bool> srcStopWords, Dictionary<string,bool> trgStopWords, bool stripListsOnError = true)
        {
            if (aie == null || aie.consolidatedAlignment == null || aie.srcEntry == null || aie.trgEntry == null) {
                return false;
            }
            aie.minSrcId = Int32.MaxValue;
            aie.minTrgId = Int32.MaxValue;
            aie.maxSrcId = Int32.MinValue;
            aie.maxTrgId = Int32.MinValue;

            double srcMultiplier = 1;
            double trgMultiplier = 1;

            int prevFromId = -1;
            int prevToId = -1;

            int minSrcNonStop = Int32.MaxValue;
            int maxSrcNonStop = Int32.MinValue;
            int minTrgNonStop = Int32.MaxValue;
            int maxTrgNonStop = Int32.MinValue;

            //aie.finalAlignment = new List<StringComparisonElement>();

            StringBuilder src = new StringBuilder ();
            StringBuilder trg = new StringBuilder ();
            //StringBuilder srcL = new StringBuilder ();
            //StringBuilder trgL = new StringBuilder ();

            Dictionary<int,bool> srcIds = new Dictionary<int, bool> ();
            Dictionary<int,bool> trgIds = new Dictionary<int, bool> ();

            bool onlyStopSrc = true;
            bool onlyStopTrg = true;
            /*if (!conf.allowTrimmedAlignments)
            {
                aie.minSrcId = 0;
                aie.maxSrcId = aie.srcEntry.lowercaseWords.Count-1;
                aie.minTrgId = 0;
                aie.maxTrgId = aie.trgEntry.lowercaseWords.Count-1;
            }*/

            foreach (WordAlignmentElement wae in aie.consolidatedAlignment) {
                double srcLen = aie.srcEntry.lowercaseWords [wae.fromId].Length;
                double trgLen = aie.trgEntry.lowercaseWords [wae.toId].Length;
                //if (conf.allowTrimmedAlignments)
                //{
                if (wae.fromId < aie.minSrcId) {
                    aie.minSrcId = wae.fromId;
                }
                if (wae.fromId < minSrcNonStop && !srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId])) {
                    minSrcNonStop = wae.fromId;
                }
                if (wae.fromId > aie.maxSrcId) {
                    aie.maxSrcId = wae.fromId;
                }
                if (wae.fromId > maxSrcNonStop && !srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId])) {
                    maxSrcNonStop = wae.fromId;
                }
                if (wae.toId < aie.minTrgId) {
                    aie.minTrgId = wae.toId;
                }
                if (wae.toId < minTrgNonStop && !trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId])) {
                    minTrgNonStop = wae.toId;
                }
                if (wae.toId > aie.maxTrgId) {
                    aie.maxTrgId = wae.toId;
                }
                if (wae.toId > maxTrgNonStop && !trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId])) {
                    maxTrgNonStop = wae.toId;
                }
                //}
                //TODO: For the future - there is a limitation that you cannot evaluate alignments where for one token you have acquired overlaps in different languages (f.e., term is in [EN][LV] aligned segments, but the final string can only be in EN - it cannot be split in two parts!).
                if (!conf.allowTrimmedAlignments
                    ||(minSrcNonStop<=wae.fromId && maxSrcNonStop>=wae.fromId
                           && minTrgNonStop<=wae.toId && maxTrgNonStop>=wae.toId))
                {
                    if (wae.fromId == prevFromId) {
                        if (wae.toId != prevToId) {
                            prevToId = wae.toId;
                            //trg.Append(" ");
                            string trgStr = GetCorrectString (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId);
                            if (!trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId]))
                                onlyStopTrg = false;
                            trg.Append (trgStr);
                            trgMultiplier *= (((aie.trgEntry.len - trgLen) / aie.trgEntry.len) + (trgLen / aie.trgEntry.len) * GetProbab (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId));
                        }
                    } else if (wae.toId == prevToId) {
                        if (wae.fromId != prevFromId) {
                            prevFromId = wae.fromId;
                            //src.Append(" ");
                            string srcStr = GetCorrectString (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId);
                            if (!srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId]))
                                onlyStopSrc = false;
                            src.Append (srcStr);
                            srcMultiplier *= (((aie.srcEntry.len - srcLen) / aie.srcEntry.len) + (srcLen / aie.srcEntry.len) * GetProbab (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId));
                        }
                    } else {
                        prevToId = wae.toId;
                        prevFromId = wae.fromId;
                        string srcStr = GetCorrectString (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId);
                        if (!srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId]))
                            onlyStopSrc = false;
                        src.Append (srcStr);
                        string trgStr = GetCorrectString (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId);
                        if (!trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId]))
                            onlyStopTrg = false;
                        trg.Append (trgStr);
                        srcMultiplier *= (((aie.srcEntry.len - srcLen) / aie.srcEntry.len) + (srcLen / aie.srcEntry.len) * GetProbab (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId));
                        trgMultiplier *= (((aie.trgEntry.len - trgLen) / aie.trgEntry.len) + (trgLen / aie.trgEntry.len) * GetProbab (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId));
                    }
                    if (!srcIds.ContainsKey (wae.fromId))
                        srcIds.Add (wae.fromId, true);
                    if (!trgIds.ContainsKey (wae.toId))
                        trgIds.Add (wae.toId, true);
                }
            }

            //Try to find words in the middle of the current alignment that have not been aligned.
            //If such are found, penalise the source and target alignments by the length of the wrong alignment.
            for (int i=0; i<aie.srcEntry.lowercaseWords.Count; i++) {
                if (i >= aie.minSrcId && i <= aie.maxSrcId && !srcIds.ContainsKey (i)) {
                    string str = aie.srcEntry.lowercaseWords [i];
                    if (!srcStopWords.ContainsKey (str))
                        onlyStopSrc = false;
                    src.Append (str);
                    trg.Append (new String (' ', str.Length));
                }
            }

            for (int i=0; i<aie.trgEntry.lowercaseWords.Count; i++) {
                if (i >= aie.minTrgId && i <= aie.maxTrgId && !trgIds.ContainsKey (i)) {
                    string str = aie.trgEntry.lowercaseWords [i];
                    if (!trgStopWords.ContainsKey (str))
                        onlyStopTrg = false;
                    trg.Append (str);
                    src.Append (new String (' ', str.Length));
                }
            }

            if (src.Length > 0) {
                aie.srcStrForAlignment = src.ToString ();
                aie.trgStrForAlignment = trg.ToString ();
            }
            aie.srcMultiplier = srcMultiplier;
            aie.trgMultiplier = trgMultiplier;
            bool wasBad = false;
            if (!conf.allowTrimmedAlignments) {
                if (aie.minSrcId > 0 || aie.minTrgId > 0 || aie.maxSrcId + 1 < aie.srcEntry.lowercaseWords.Count || aie.maxTrgId + 1 < aie.trgEntry.lowercaseWords.Count) {
                    wasBad = true;
                }
                aie.minSrcId = 0;
                aie.minTrgId = 0;
                aie.maxSrcId = aie.srcEntry.lowercaseWords.Count - 1;
                aie.maxTrgId = aie.trgEntry.lowercaseWords.Count - 1;
            } else {
                if (minSrcNonStop >= 0 && minSrcNonStop < aie.srcEntry.lowercaseWords.Count
                    && maxSrcNonStop >= 0 && maxSrcNonStop < aie.srcEntry.lowercaseWords.Count
                    && minTrgNonStop >= 0 && minTrgNonStop < aie.trgEntry.lowercaseWords.Count
                    && maxTrgNonStop >= 0 && maxTrgNonStop < aie.trgEntry.lowercaseWords.Count) {
                    aie.minSrcId = minSrcNonStop;
                    aie.minTrgId = minTrgNonStop;
                    aie.maxSrcId = maxSrcNonStop;
                    aie.maxTrgId = maxTrgNonStop;
                }
                else if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                    return false;
                }
            }

            //Just to be on the safe side - check whether the ID's are in index boundaries:
            if (aie.minSrcId < 0 || aie.minTrgId < 0 || aie.maxSrcId + 1 > aie.srcEntry.lowercaseWords.Count || aie.maxTrgId + 1 > aie.trgEntry.lowercaseWords.Count) {
                aie.minSrcId = 0;
                aie.minTrgId = 0;
                aie.maxSrcId = aie.srcEntry.lowercaseWords.Count - 1;
                aie.maxTrgId = aie.trgEntry.lowercaseWords.Count - 1;
                if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                }
                return false;
            }

            if (wasBad) {
                if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                }
                return false;
            }

            if (onlyStopSrc || onlyStopTrg) {
                if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                }
                return false;
            }

            aie.alignedLowSrcStr = AlignmentInfoElement.GetStrFromEntry(aie.srcEntry.lowercaseWords,aie.minSrcId,aie.maxSrcId);
            aie.alignedLowTrgStr = AlignmentInfoElement.GetStrFromEntry(aie.trgEntry.lowercaseWords,aie.minTrgId,aie.maxTrgId);
            return true;
        }