Esempio n. 1
0
        public static bool[] AdjustAlignmentMap(MPAlignerConfigurationLangPairEntry lpeConf,bool[] mapOne, bool[] mapTwo, bool[] formerMap)
        {
            if (mapOne==null||mapTwo==null)
            {
                throw new ArgumentNullException("Either mapOne or mapTwo are empty (null)!");
            }
            bool[] res = mapOne;

            int l1 = mapOne.Length;
            int l2 = mapTwo.Length;
            int l3 = formerMap!=null?formerMap.Length:-1;

            for (int i=0;i<mapOne.Length;i++)
            {
                int adjM2Index = (i*l2)/l1;
                int adjM3Index = (i*l3)/l1;
                if (mapTwo[adjM2Index])
                {
                    res[i]=true;
                }
                else if (formerMap!=null && formerMap[adjM3Index]==true)
                {
                    res[i]=false; //If we changed the alignments, we need to remove alignments from the former alignment map.
                }
            }
            return res;
        }
Esempio n. 2
0
        public static void AppendList(string outputFormat, string outputFile, List <AlignmentInfoElement> list, MPAlignerConfigurationLangPairEntry lpeConf = null, string srcLang = "", string trgLang = "", string collectionId = "", string domain = "")
        {
            MPAlignerConfigurationLangPairEntry lpeConfig = lpeConf != null?lpeConf:new MPAlignerConfigurationLangPairEntry();

            if (list == null)
            {
                return;
            }
            Encoding utf8WithoutBom = new UTF8Encoding(false);
            //string outFile = outputFile+".xml";
            NumberFormatInfo nfi = new NumberFormatInfo();

            nfi.CurrencyDecimalSeparator = ".";
            nfi.NumberDecimalSeparator   = ".";
            nfi.PercentDecimalSeparator  = ".";
            StreamWriter sw = new StreamWriter(outputFile, true, utf8WithoutBom);

            PrintTabsep(sw, outputFormat, list, false, lpeConfig, srcLang, trgLang, collectionId, domain);
            sw.Close();
        }
Esempio n. 3
0
        public static void PrintList(string outputFormat, string outputFile, List <AlignmentInfoElement> list, bool printTopSrc, MPAlignerConfigurationLangPairEntry lpeConf = null, string srcLang = "", string trgLang = "", string collectionId = "", string domain = "")
        {
            MPAlignerConfigurationLangPairEntry lpeConfig = lpeConf != null?lpeConf:new MPAlignerConfigurationLangPairEntry();

            if (list == null)
            {
                return;
            }
            Encoding utf8WithoutBom = new UTF8Encoding(false);

            //string outFile = outputFile+".xml";
            if (outputFormat.ToLower() == "xml")
            {
                list.Sort();
                List <AlignmentInfoElement> newList = new List <AlignmentInfoElement>();
                string previousSrc = null;
                foreach (AlignmentInfoElement aie in list)
                {
                    string currSrc = GetStrFromEntry(aie.srcEntry.surfaceFormWords, aie.minSrcId, aie.maxSrcId, aie.srcEntry.lemmaSeq).Trim();
                    string currTrg = GetStrFromEntry(aie.trgEntry.surfaceFormWords, aie.minTrgId, aie.maxTrgId, aie.trgEntry.lemmaSeq).Trim();
                    if (string.IsNullOrWhiteSpace(currTrg) || string.IsNullOrWhiteSpace(currSrc))
                    {
                        continue;
                    }
                    if ((!printTopSrc || previousSrc != currSrc.ToLower()) && aie.alignmentScore >= lpeConfig.printThr)
                    {
                        newList.Add(aie);
                        previousSrc = currSrc.ToLower();
                    }
                }
                string outStr = MPFramework.MPFrameworkFunctions.SerializeObjectInstance <List <AlignmentInfoElement> >(newList);
                File.WriteAllText(outputFile, outStr, utf8WithoutBom);
            }
            else
            {
                StreamWriter sw = new StreamWriter(outputFile, false, utf8WithoutBom);
                PrintTabsep(sw, outputFormat, list, printTopSrc, lpeConfig, srcLang, trgLang, collectionId, domain);
                sw.Close();
            }
        }
Esempio n. 4
0
 /// <summary>
 /// Reads the language pair specific configuration - term alignment thresholds.
 /// </summary>
 /// <returns>The language pair configuration.</returns>
 /// <param name="srcLang">Source language.</param>
 /// <param name="trgLang">Target language.</param>
 /// <param name="configuration">Configuration.</param>
 static MPAlignerConfigurationLangPairEntry ReadLangPairConfig(string srcLang, string trgLang, MPAlignerConfiguration configuration)
 {
     string langKey = srcLang + "_" + trgLang;
     MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry ();
     if (configuration.langPairEntryDict.ContainsKey (langKey)) {
         lpeConf = configuration.langPairEntryDict [langKey];
     }
     else {
         lpeConf.srcLang = srcLang;
         lpeConf.trgLang = trgLang;
         lpeConf.finalAlignmentThr = 0.6;
         lpeConf.printThr = 0.6;//A default value of 0.6 is usually the lowest value that is still reasonable for the cognate-based overlaps, therefore, wethe default to 0.6. However, for different applications the threshold could be raised even higher.
         configuration.langPairEntryDict.Add (langKey, lpeConf);
     }
     return lpeConf;
 }
Esempio n. 5
0
        public static void Main(string[] args)
        {
            string configFile = null;
            string method = null;
            string inputFile = null;
            string inputFormat = "tagged_plaintext";//Allowed values: tagged_plaintext, preprocessed_terms, term_list
            string srcInputFile = null;
            string trgInputFile = null;
            string srcLang = null;
            string trgLang = null;
            string outputFile = null;
            string consolidatedOutputFile = null;
            string outputFormat = "";//"tabsep";//Allowed values: ref_tabsep, tabsep, xml
            string preProcessedTermOutputFile = null;//"/home/marcis/Dropbox/MonoProjects/MPAligner/MPAligner/bin/Debug/testTermData.xml";//null;
            string tempTranslitFile = null;
            bool consolidateResults = false;
            double consolidationThreshold = 0;
            //bool logPrepData = false;
            string domainId = "";
            string collectionId = "";
            //The skipping parameters are just for debugging. Use them only manually!
            string skipSrc = "";
            string skipTrg = "";
            MPAlignerConfiguration configuration = null;
            //Read all configuration parameters from the command line.
            for (int i=0; i<args.Length; i++) {
                if ((args [i] == "-c" || args [i] == "--configuration") && args.Length > i + 1) {
                    configFile = args [i + 1];
                    configuration = new MPAlignerConfiguration ();
                    configuration.Load (configFile);
                } else if ((args [i] == "-m" || args [i] == "--method") && args.Length > i + 1) {
                    method = args [i + 1];
                } else if ((args [i] == "-i" || args [i] == "--input-file") && args.Length > i + 1) {
                    inputFile = args [i + 1];
                    //} else if (args [i] == "-lp" || args [i] == "--log-pre-processed") {
                    //	logPrepData = true;
                } else if ((args [i] == "-if" || args [i] == "--input-format") && args.Length > i + 1) {
                    inputFormat = args [i + 1];
                } else if ((args [i] == "-si" || args [i] == "--source-input") && args.Length > i + 1) {
                    srcInputFile = args [i + 1];
                } else if ((args [i] == "-ti" || args [i] == "--target-input") && args.Length > i + 1) {
                    trgInputFile = args [i + 1];
                } else if ((args [i] == "-sl" || args [i] == "--source-language") && args.Length > i + 1) {
                    srcLang = MPFramework.MPFrameworkFunctions.GetValidLangString (args [i + 1]);
                } else if ((args [i] == "-tl" || args [i] == "--target-language") && args.Length > i + 1) {
                    trgLang = MPFramework.MPFrameworkFunctions.GetValidLangString (args [i + 1]);
                } else if ((args [i] == "-o" || args [i] == "--output-file") && args.Length > i + 1) {
                    outputFile = args [i + 1];
                } else if ((args [i] == "-of" || args [i] == "--output-format") && args.Length > i + 1) {
                    outputFormat = args [i + 1];
                } else if ((args [i] == "-pto" || args [i] == "--pre-processed-term-output-file") && args.Length > i + 1) {
                    preProcessedTermOutputFile = args [i + 1];
                } else if ((args [i] == "-ttf" || args [i] == "--temp-translit-file") && args.Length > i + 1) {
                    tempTranslitFile = args [i + 1];
                } else if ((args [i] == "-ss" || args [i] == "--skip-source-file") && args.Length > i + 1) {
                    skipSrc = args [i + 1];
                } else if ((args [i] == "-st" || args [i] == "--skip-target-file") && args.Length > i + 1) {
                    skipTrg = args [i + 1];
                } else if ((args [i] == "-d_id" || args [i] == "--domain-id") && args.Length > i + 1) {
                    domainId = args [i + 1];
                } else if ((args [i] == "-c_id" || args [i] == "--collection-id") && args.Length > i + 1) {
                    collectionId = args [i + 1];
                } else if ((args [i] == "-ct" || args [i] == "--consolidation-threshold") && args.Length > i + 1) {
                    //Consolidation works only if the ref_tabsep output format is specified!
                    NumberFormatInfo nfi = new NumberFormatInfo ();
                    nfi.CurrencyDecimalSeparator = ".";
                    nfi.NumberDecimalSeparator = ".";
                    nfi.PercentDecimalSeparator = ".";
                    consolidationThreshold = Convert.ToDouble (args [i + 1], nfi);
                    consolidateResults = true;
                }
            }
            //Break if a method is not defined.
            if (string.IsNullOrWhiteSpace (method)) {
                Log.Write ("Method not specified!",LogLevelType.ERROR,configuration);
                PrintUsage ();
                return;
            }
            //Write a configuration file to the output file if the config method is specified.
            if (method.ToLower () == "config") {
                if (string.IsNullOrWhiteSpace (outputFile)) {
                    Log.Write("Output file not specified!",LogLevelType.ERROR,configuration);
                    PrintUsage ();
                    return;
                }
                MPAlignerConfiguration conf = new MPAlignerConfiguration ();
                MPAlignerConfigurationDictEntry cde = new MPAlignerConfigurationDictEntry ();
                cde.srcLang = "lv";
                cde.trgLang = "en";
                cde.path = "/home/marcis/TILDE/RESOURCES/DICT/lv_en_noisy";
                conf.dictConfEntryDict.Add ("lv_en", cde);
                cde = new MPAlignerConfigurationDictEntry ();
                cde.srcLang = "lt";
                cde.trgLang = "en";
                cde.path = "/home/marcis/TILDE/RESOURCES/DICT/lt_en";
                conf.dictConfEntryDict.Add ("lt_en", cde);
                MPAlignerConfigurationTranslEntry cte = new MPAlignerConfigurationTranslEntry ();
                cte.mosesIniPath = "/home/marcis/TILDE/RESOURCES/TRANSLIT_WORKING_DIR/LV-EN/lv-en-binarised-model.moses.ini";
                cte.srcLang = "lv";
                cte.trgLang = "en";
                conf.translConfEntryDict.Add ("lv_en", cte);
                cte = new MPAlignerConfigurationTranslEntry ();
                cte.mosesIniPath = "/home/marcis/TILDE/RESOURCES/TRANSLIT_WORKING_DIR/LV-EN/lt-en-binarised-model.moses.ini";
                cte.srcLang = "lt";
                cte.trgLang = "en";
                conf.translConfEntryDict.Add ("lt_en", cte);
                MPAlignerConfigurationLangPairEntry lpe = new MPAlignerConfigurationLangPairEntry ();
                lpe.srcLang = "lv";
                lpe.trgLang = "en";
                conf.langPairEntryDict.Add ("lv_en", lpe);
                lpe = new MPAlignerConfigurationLangPairEntry ();
                lpe.srcLang = "lt";
                lpe.trgLang = "en";
                conf.langPairEntryDict.Add ("lt_en", lpe);
                MPAlignerConfigurationExceptionEntry cee = new MPAlignerConfigurationExceptionEntry ();
                cee.srcLang = "lv";
                cee.trgLang = "en";
                cee.path = "/home/marcis/TILDE/RESOURCES/EXC_DICT/lv_en_exc";
                conf.excDictEntryDict.Add ("lv_en", cee);
                cee = new MPAlignerConfigurationExceptionEntry ();
                cee.srcLang = "lt";
                cee.trgLang = "en";
                cee.path = "/home/marcis/TILDE/RESOURCES/EXC_DICT/lt_en_exc";
                conf.excDictEntryDict.Add ("lt_en", cee);
                MPAlignerConfigurationStopWordListEntry cswle = new MPAlignerConfigurationStopWordListEntry ();
                cswle.lang = "lv";
                cswle.path = "/home/marcis/TILDE/RESOURCES/STOP_WORD/lv_stop";
                conf.stopWordListEntryDict.Add ("lv", cswle);
                cswle = new MPAlignerConfigurationStopWordListEntry ();
                cswle.lang = "lt";
                cswle.path = "/home/marcis/TILDE/RESOURCES/STOP_WORD/lt_stop";
                conf.stopWordListEntryDict.Add ("lt", cswle);
                cswle = new MPAlignerConfigurationStopWordListEntry ();
                cswle.lang = "en";
                cswle.path = "/home/marcis/TILDE/RESOURCES/STOP_WORD/en_stop";
                conf.stopWordListEntryDict.Add ("en", cswle);
                conf.Save (outputFile);
                return;
            }

            //Try reading the default configuration if none is passed, but if the default configuration can not be found, break.
            if (string.IsNullOrWhiteSpace (configFile) && File.Exists ("MPAlignerConfig.xml")) {
                configuration = new MPAlignerConfiguration ();
                configuration.Load (configFile);
            } else if (string.IsNullOrWhiteSpace (configFile)) {
                Log.Write("Configuration file missing in application directory and a substitution runtime configuration file is not specified!",LogLevelType.ERROR,configuration);
                PrintUsage ();
                return;
            }

            //In the case if an output format is not defined in the command line, read it from the configuration file.
            if (string.IsNullOrWhiteSpace (outputFormat))
                outputFormat = configuration.outputFormat;

            //In the case if the configuration does not specify an output format, use the default output format.
            if (string.IsNullOrWhiteSpace (outputFormat)) {
                outputFormat = "ref_tabsep";
            }

            Log.confLogLevel = configuration.logLevel;

            if (string.IsNullOrWhiteSpace (tempTranslitFile)) {
                tempTranslitFile = outputFile+".tmp";
            }

            Log.Write ("configFile: "+(configFile!=null?configFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("method: "+(method!=null?method:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("inputFile: "+(inputFile!=null?inputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("inputFormat: "+(inputFormat!=null?inputFormat:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("srcInputFile: "+(srcInputFile!=null?srcInputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("trgInputFile: "+(trgInputFile!=null?trgInputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("srcLang: "+(srcLang!=null?srcLang:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("trgLang: "+(trgLang!=null?trgLang:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("outputFile: "+(outputFile!=null?outputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("outputFormat: "+(outputFormat!=null?outputFormat:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("preProcessedTermOutputFile: "+(preProcessedTermOutputFile!=null?preProcessedTermOutputFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("tempTranslitFile: "+(tempTranslitFile!=null?tempTranslitFile:""),LogLevelType.LIMITED_OUTPUT,configuration);
            Log.Write ("consolidation threshold: "+(consolidateResults?consolidationThreshold.ToString():""),LogLevelType.LIMITED_OUTPUT,configuration);

            if (outputFormat == "ref_tabsep" && consolidateResults) {
                consolidatedOutputFile = outputFile;
                outputFile += ".raw";
            }

            //For document pair-based alignment.
            if (method.ToLower () == "taggedfilepairs") {
                char[] sep = {'\t'};
                if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                {
                    Log.Write("Input file list file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }
                if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                {
                    Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }
                //Read the alignment thresholds and other language pair specific numerical/single-value parameters.
                MPAlignerConfigurationLangPairEntry lpeConf = ReadLangPairConfig (srcLang, trgLang, configuration);
                //The size of the cache may affect the performance of the alignment!
                Dictionary<string, ProcessedTermEntry> srcTermCache = new Dictionary<string, ProcessedTermEntry>();
                Dictionary<string, ProcessedTermEntry> trgTermCache = new Dictionary<string, ProcessedTermEntry>();

                bool interlinguaDictUsed = false;
                bool interlinguaTranslitUsed = false;

                //Define dictionaries for pre-processing.
                Dictionary<string, Dictionary<string, double>> srcDict = null;
                Dictionary<string, Dictionary<string, double>> trgDict = null;
                Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                //Define transliteration configurations for pre-processing.
                MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                //Read dictionaries and transliterations.
                interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                //Define the alignments (the variable holding alignment results)
                Dictionary<string,Dictionary<string, AlignmentInfoElement>> alignments = new Dictionary<string, Dictionary<string, AlignmentInfoElement>>();

                //Define and read exception dictionaries.
                Dictionary<string, Dictionary<string, bool>> excDict = null;
                ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                //Define and read stopword lists.
                Dictionary<string,bool> srcStopWords = null;
                ReadStopwordList(configuration,srcLang,out srcStopWords);
                Dictionary<string,bool> trgStopWords = null;
                ReadStopwordList(configuration,trgLang,out trgStopWords);

                StreamReader sr = new StreamReader(inputFile,Encoding.UTF8);
                int pairCounter = 0;
                bool skip = !string.IsNullOrWhiteSpace(skipSrc)&&!string.IsNullOrWhiteSpace(skipTrg)?true:false;

                //Read input document alignment file and process file pairs.
                while(!sr.EndOfStream)
                {
                    pairCounter++;
                    string line = sr.ReadLine().Trim();
                    if (string.IsNullOrWhiteSpace(line)) continue;
                    string[] arr = line.Split(sep, StringSplitOptions.RemoveEmptyEntries);
                    if (arr.Length<2)
                    {
                        continue; //If the alignment line does not contain at least two entries, the document alignment is not valid.
                    }
                    string srcFile = arr[0];
                    string trgFile = arr[1];
                    if (!File.Exists(srcFile))
                    {
                        Log.Write("Input file \""+srcFile+"\" cannot be found!",LogLevelType.WARNING,configuration);
                        continue;
                    }
                    if (!File.Exists(trgFile))
                    {
                        Log.Write("Input file \""+trgFile+"\" cannot be found!",LogLevelType.WARNING,configuration);
                        continue;
                    }
                    string srcFileName = Path.GetFileName(srcFile);
                    string trgFileName = Path.GetFileName(trgFile);

                    //The skipping condition is for debugging - if the system crashes due to insufficient memory...
                    if (skip)
                    {
                        if (srcFileName==skipSrc&&trgFileName == skipTrg)
                        {
                            skip = false;
                        }
                        else
                        {
                            Log.Write("Skipping file pair "+srcFileName+" and " + trgFileName+".",LogLevelType.WARNING,configuration);
                            continue;
                        }
                    }

                    Log.Write("Processing file pair "+srcFileName+" and " + trgFileName+".",LogLevelType.LIMITED_OUTPUT,configuration);

                    //Define term entry data variables (used for sotring terms in pre-pre-processed and pre-processed states).
                    Dictionary<string,SimpleTermEntry> srcInitialList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> trgInitialList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> srcInitialTempList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> trgInitialTempList = new Dictionary<string, SimpleTermEntry>();

                    Dictionary<string, ProcessedTermEntry> srcTermList = new Dictionary<string, ProcessedTermEntry>();
                    Dictionary<string, ProcessedTermEntry> trgTermList = new Dictionary<string, ProcessedTermEntry>();
                    Dictionary<string, ProcessedTermEntry> srcTermTempList = new Dictionary<string, ProcessedTermEntry>();
                    Dictionary<string, ProcessedTermEntry> trgTermTempList = new Dictionary<string, ProcessedTermEntry>();

                    //Two input formats are currently supported - term-tagged plaintext files and term list (one term per line) files.
                    if (inputFormat=="tagged_plaintext")
                    {
                        //Read terms from the term-tagged documents.
                        srcInitialTempList = TermTaggedFileParser.ParseTermTaggedFile(srcFile,Encoding.UTF8, configuration.concLen);
                        trgInitialTempList = TermTaggedFileParser.ParseTermTaggedFile(trgFile,Encoding.UTF8, configuration.concLen);
                    }
                    else
                    {
                        //Read terms from the term list files.
                        srcInitialTempList = ListFileParser.Parse(srcFile,Encoding.UTF8);
                        trgInitialTempList = ListFileParser.Parse(trgFile,Encoding.UTF8);
                    }

                    //Search for already pre-processed source terms in the cache.
                    foreach(string term in srcInitialTempList.Keys)
                    {
                        string lower = term.ToLower();
                        if (srcTermCache.ContainsKey(lower))
                        {
                            if (!srcTermList.ContainsKey(lower)) srcTermList.Add(lower, srcTermCache[lower]);
                        }
                        else
                        {
                            srcInitialList.Add(term, srcInitialTempList[term]);
                        }
                    }

                    //Search for already pre-processed target terms in the cache.
                    foreach(string term in trgInitialTempList.Keys)
                    {
                        string lower = term.ToLower();
                        if (trgTermCache.ContainsKey(lower))
                        {
                            if (!trgTermList.ContainsKey(lower)) trgTermList.Add(lower, trgTermCache[lower]);
                        }
                        else
                        {
                            trgInitialList.Add(term, trgInitialTempList[term]);
                        }
                    }

                    //Now pre-process terms that have not been pre-processed again.
                    if (srcDict!=null||trgDict!=null)
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    else
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermTempList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermTempList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }

                    //Update the pre-processed term list for alignment.
                    foreach(string srcTerm in srcTermTempList.Keys)
                    {
                        if (!srcTermList.ContainsKey(srcTerm)) srcTermList.Add(srcTerm,srcTermTempList[srcTerm]);
                        if (!srcTermCache.ContainsKey(srcTerm)) srcTermCache.Add(srcTerm, srcTermTempList[srcTerm]);
                    }

                    foreach(string trgTerm in trgTermTempList.Keys)
                    {
                        if (!trgTermList.ContainsKey(trgTerm)) trgTermList.Add(trgTerm,trgTermTempList[trgTerm]);
                        if (!trgTermCache.ContainsKey(trgTerm)) trgTermCache.Add(trgTerm, trgTermTempList[trgTerm]);
                    }

                    //Execute alignment for one file pair.
                    List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                    //The execution may be multi-threaded or single-threaded. The multi-threaded execution may be instable. Therefore, be careful when using multi-threading.
                    if (configuration.useMultiThreadedExecution)
                    {
                        alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcFile, trgFile, excDict, srcStopWords, trgStopWords);
                    }
                    else
                    {
                        alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcFile, trgFile, excDict, srcStopWords, trgStopWords);
                    }
                    if (alignment!=null)
                    {
                        foreach(AlignmentInfoElement aie in alignment)
                        {
                            if (!alignments.ContainsKey(aie.srcEntry.lowercaceForm))
                            {
                                alignments.Add(aie.srcEntry.lowercaceForm, new Dictionary<string, AlignmentInfoElement>());
                            }
                            if (!alignments[aie.srcEntry.lowercaceForm].ContainsKey(aie.trgEntry.lowercaceForm))
                            {
                                alignments[aie.srcEntry.lowercaceForm].Add(aie.trgEntry.lowercaceForm, aie);
                            }
                        }
                    }
                    //If pre-processed term cache is full, empty it (this maybe can be imrpoved with the help of some sort of a flowing cache (always circulating).
                    if (srcTermCache.Count>50000)
                    {
                        srcTermCache.Clear();
                        srcTermCache = new Dictionary<string, ProcessedTermEntry>();
                        GC.Collect();
                        GC.WaitForPendingFinalizers();
                    }
                    if (trgTermCache.Count>50000)
                    {
                        trgTermCache.Clear();
                        trgTermCache = new Dictionary<string, ProcessedTermEntry>();
                        GC.Collect();
                        GC.WaitForPendingFinalizers();
                    }
                    //After each 50 pairs, print rsults.
                    if (pairCounter%50==0||alignments.Count>50000)
                    {
                        Log.Write("Printing intermediate results after "+pairCounter.ToString()+" file pairs",LogLevelType.LIMITED_OUTPUT,configuration);
                        List<AlignmentInfoElement> resAlignment = new List<AlignmentInfoElement>();
                        foreach(string src in alignments.Keys)
                        {
                            foreach(string trg in alignments[src].Keys)
                            {
                                resAlignment.Add(alignments[src][trg]);
                            }
                        }
                        AlignmentInfoElement.AppendList(outputFormat,outputFile,resAlignment,lpeConf,srcLang,trgLang,collectionId,domainId);
                        alignments.Clear();
                        alignments = new Dictionary<string, Dictionary<string, AlignmentInfoElement>>();
                        GC.Collect();
                        GC.WaitForPendingFinalizers();
                    }
                }
                sr.Close();
                //If there are alignments left, write them to the output file.
                if (!string.IsNullOrWhiteSpace(outputFile))
                {
                    Log.Write("Printing final results after "+pairCounter.ToString()+" file pairs",LogLevelType.LIMITED_OUTPUT,configuration);
                    List<AlignmentInfoElement> resAlignment = new List<AlignmentInfoElement>();
                    foreach(string src in alignments.Keys)
                    {
                        foreach(string trg in alignments[src].Keys)
                        {
                            resAlignment.Add(alignments[src][trg]);
                        }
                    }
                    AlignmentInfoElement.AppendList(outputFormat,outputFile,resAlignment,lpeConf,srcLang,trgLang,collectionId,domainId);
                }
            }
            else if (method.ToLower () == "singletaggedpair") //TODO: REFACTOR (the file pair list processing could be handled (wisely) through a single file pair processing method!!!
            {
                //Define the instances of source and target processed term lists.
                Dictionary<string, ProcessedTermEntry> srcTermList = new Dictionary<string, ProcessedTermEntry>();
                Dictionary<string, ProcessedTermEntry> trgTermList = new Dictionary<string, ProcessedTermEntry>();
                bool interlinguaDictUsed = false;
                bool interlinguaTranslitUsed = false;

                if (inputFormat=="preprocessed_terms")
                {
                    if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                    {
                        Log.Write("Pre-processed term input file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    PreprocessedTermData ptd = PreprocessedTermData.ReadFromFile(inputFile);
                    foreach(ProcessedTermEntry pte in ptd.srcTerms)
                    {
                        if(!srcTermList.ContainsKey(pte.lowercaceForm))
                        {
                            srcTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                    foreach(ProcessedTermEntry pte in ptd.trgTerms)
                    {
                        if(!trgTermList.ContainsKey(pte.lowercaceForm))
                        {
                            trgTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                    srcLang = ptd.srcLang;
                    trgLang = ptd.trgLang;
                    interlinguaDictUsed = ptd.interlinguaDictUsed;
                    interlinguaTranslitUsed = ptd.interlinguaTranslitUsed;

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                        if (configuration.useMultiThreadedExecution)
                        {
                            alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        else
                        {
                            alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
                else if (inputFormat=="term_list"||inputFormat=="tagged_plaintext")
                {
                    if (string.IsNullOrWhiteSpace(srcInputFile)||!File.Exists(srcInputFile)||string.IsNullOrWhiteSpace(trgInputFile)||!File.Exists(trgInputFile))
                    {
                        Log.Write("Source and/or target files not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }

                    Dictionary<string,SimpleTermEntry> srcInitialList = new Dictionary<string, SimpleTermEntry>();
                    Dictionary<string,SimpleTermEntry> trgInitialList = new Dictionary<string, SimpleTermEntry>();

                    if (inputFormat=="tagged_plaintext")
                    {
                        srcInitialList = TermTaggedFileParser.ParseTermTaggedFile(srcInputFile,Encoding.UTF8, configuration.concLen);
                        trgInitialList = TermTaggedFileParser.ParseTermTaggedFile(trgInputFile,Encoding.UTF8, configuration.concLen);
                    }
                    else
                    {
                        srcInitialList = ListFileParser.Parse(srcInputFile,Encoding.UTF8);
                        trgInitialList = ListFileParser.Parse(trgInputFile,Encoding.UTF8);
                    }
                    Log.Write ("Unprocessed source terms: "+srcInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Unprocessed target terms: "+trgInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Dictionary<string, Dictionary<string, double>> srcDict = null;
                    Dictionary<string, Dictionary<string, double>> trgDict = null;
                    Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                    Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                    MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                    interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                    interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                    if (srcDict!=null||trgDict!=null)
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    else
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    ///If pre-processed terms should be saved for future use an output format is created.
                    /// This functionality is not available for the file pair list-based processing.
                    if (!string.IsNullOrWhiteSpace(preProcessedTermOutputFile))
                    {
                        List<ProcessedTermEntry> srcTerms = new List<ProcessedTermEntry>(srcTermList.Values);
                        List<ProcessedTermEntry> trgTerms = new List<ProcessedTermEntry>(trgTermList.Values);
                        PreprocessedTermData ptd = new PreprocessedTermData();
                        ptd.interlinguaDictUsed = interlinguaDictUsed;
                        ptd.interlinguaTranslitUsed = interlinguaTranslitUsed;
                        ptd.srcTerms = srcTerms.ToArray();
                        ptd.trgTerms = trgTerms.ToArray();
                        ptd.srcLang = srcLang;
                        ptd.trgLang = trgLang;
                        string outStr = MPFramework.MPFrameworkFunctions.SerializeObjectInstance<PreprocessedTermData>(ptd);
                        File.WriteAllText(preProcessedTermOutputFile,outStr);
                    }

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                        if (configuration.useMultiThreadedExecution)
                        {
                            alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        else
                        {
                            alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        }
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
                else
                {
                    Log.Write ("Input format UNKNOWN or UNDEFINED.",LogLevelType.ERROR,configuration);
                    return;
                }
            }
            else if (method.ToLower () == "singletermpairlist") //Use this method only if filtering of term pairs or some sort of evaluation is necessary!
            {
                //Define the instances of source and target processed term lists.
                List<ProcessedTermEntry> srcTermList = new List<ProcessedTermEntry>();
                List<ProcessedTermEntry> trgTermList = new List<ProcessedTermEntry>();
                bool interlinguaDictUsed = false;
                bool interlinguaTranslitUsed = false;
                if (inputFormat=="preprocessed_terms")
                {
                    if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                    {
                        Log.Write("Pre-processed term input file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    PreprocessedTermData ptd = PreprocessedTermData.ReadFromFile(inputFile);
                    srcTermList.AddRange(ptd.srcTerms);
                    trgTermList.AddRange(ptd.trgTerms);
                    Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    srcLang = ptd.srcLang;
                    trgLang = ptd.trgLang;
                    interlinguaDictUsed = ptd.interlinguaDictUsed;
                    interlinguaTranslitUsed = ptd.interlinguaTranslitUsed;

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);
                    Log.Write ("Exception dictionary entries: "+excDict.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);
                    Log.Write ("Source language stopwords: "+srcStopWords.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);
                    Log.Write ("Target language stopwords: "+trgStopWords.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = Alignment.AlignListPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        Log.Write ("Alignment elements after alignment: "+alignment.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
                else
                {

                    if (string.IsNullOrWhiteSpace(srcInputFile)||!File.Exists(srcInputFile)||string.IsNullOrWhiteSpace(trgInputFile)||!File.Exists(trgInputFile))
                    {
                        Log.Write("Source and/or target files not specified or cannot be found!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }
                    if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                    {
                        Log.Write("Source and/or target languages not specified!",LogLevelType.ERROR,configuration);
                        PrintUsage();
                        return;
                    }

                    List<string> srcInitialList = new List<string>();
                    List<string> trgInitialList = new List<string>();
                    srcInitialList = ListFileParser.ParseList(srcInputFile,Encoding.UTF8);
                    trgInitialList = ListFileParser.ParseList(trgInputFile,Encoding.UTF8);
                    if (srcInitialList.Count!=trgInitialList.Count)
                    {
                        Log.Write("Source and target term lists are with different lengths",LogLevelType.ERROR,configuration);
                        throw new ArgumentException("Source and target term lists are with different lengths");
                    }
                    Log.Write ("Unprocessed source terms: "+srcInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Unprocessed target terms: "+trgInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Dictionary<string, Dictionary<string, double>> srcDict = null;
                    Dictionary<string, Dictionary<string, double>> trgDict = null;
                    Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                    Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                    MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                    MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                    interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                    interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                    if (srcDict!=null||trgDict!=null)
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    else
                    {
                        if (srcTranslitConf!=null && trgTranslitConf!=null)
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                        else
                        {
                            srcTermList = ProcessedTermEntry.ProcessTermsList(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                            trgTermList = ProcessedTermEntry.ProcessTermsList(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        }
                    }
                    Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                    Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                    ///If pre-processed terms should be saved for future use an output format is created.
                    if (!string.IsNullOrWhiteSpace(preProcessedTermOutputFile))
                    {
                        PreprocessedTermData ptd = new PreprocessedTermData();
                        ptd.interlinguaDictUsed = interlinguaDictUsed;
                        ptd.interlinguaTranslitUsed = interlinguaTranslitUsed;
                        ptd.srcTerms = srcTermList.ToArray();
                        ptd.trgTerms = trgTermList.ToArray();
                        ptd.srcLang = srcLang;
                        ptd.trgLang = trgLang;

                        string outStr = MPFramework.MPFrameworkFunctions.SerializeObjectInstance<PreprocessedTermData>(ptd);
                        File.WriteAllText(preProcessedTermOutputFile,outStr);
                    }

                    Dictionary<string, Dictionary<string, bool>> excDict = null;
                    ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                    Dictionary<string,bool> srcStopWords = null;
                    ReadStopwordList(configuration,srcLang,out srcStopWords);

                    Dictionary<string,bool> trgStopWords = null;
                    ReadStopwordList(configuration,trgLang,out trgStopWords);

                    if (!string.IsNullOrWhiteSpace(outputFile))
                    {
                        List<AlignmentInfoElement> alignment = Alignment.AlignListPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                        AlignmentInfoElement.PrintList(outputFormat,outputFile,alignment, configuration.printTopTrgForSrc,null,srcLang,trgLang,collectionId,domainId);
                    }
                }
            }
            else if (method.ToLower () == "eurovoceval")
            {
                if (string.IsNullOrWhiteSpace(inputFile)||!File.Exists(inputFile))
                {
                    Log.Write("Eurovoc input file not specified or cannot be found!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }
                if (string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
                {
                    Log.Write("Source or target language not specified!",LogLevelType.ERROR,configuration);
                    PrintUsage();
                    return;
                }

                configuration.allowTrimmedAlignments = false;
                //configuration.useMultiThreadedExecution = false;
                configuration.printTopTrgForSrc = true;

                string logFile = outputFile+".res.log";
                StreamWriter sw = new StreamWriter(logFile, true, Encoding.UTF8);

                Dictionary<string,List<string>> eurovocDict = ReadEurovocDict(inputFile);;
                //List<string> langList = GetLangsFromConf(configuration);

                //for(int i = 0;i<langList.Count;i++)
                //{
                    //for(int j = 0;j<langList.Count;j++)
                    //{
                        //if (i==j) continue;
                        //srcLang = langList[i];
                        //trgLang = langList[j];
                Log.Write("Processing pair "+srcLang+"_"+trgLang,LogLevelType.LIMITED_OUTPUT,configuration);
                if (Char.IsDigit(outputFile[outputFile.Length-1])) outputFile = outputFile.Substring(0,outputFile.Length-1);
                string alignmentOutputFile = outputFile+"."+srcLang+"_"+trgLang+".align.txt";
                if (File.Exists(alignmentOutputFile))
                {
                    Log.Write("Pair "+srcLang+"_"+trgLang+" already processed! Evaluating...",LogLevelType.LIMITED_OUTPUT,configuration);
                    List<StringComparisonElement> terms = new List<StringComparisonElement>();
                    StreamReader sr = new StreamReader(alignmentOutputFile,Encoding.UTF8);
                    char[] sep = {'\t'};
                    NumberFormatInfo nfi = new NumberFormatInfo();
                    nfi.CurrencyDecimalSeparator=".";
                    nfi.NumberDecimalSeparator=".";
                    nfi.PercentDecimalSeparator=".";
                    while(!sr.EndOfStream)
                    {
                        string line = sr.ReadLine().Trim();
                        string[] arr = line.Split(sep,StringSplitOptions.None);
                        if (arr.Length>=3)
                        {
                            StringComparisonElement sce = new StringComparisonElement();
                            sce.src = arr[0];
                            sce.trg = arr[1];
                            sce.similarity = Convert.ToDouble(arr[2],nfi);
                            terms.Add(sce);
                        }
                    }
                    sr.Close();
                    terms.Sort();

                    List<double> scores = new List<double>();
                    double tmp = 0;
                    while (tmp<=1)
                    {
                        scores.Add(tmp);
                        tmp+=0.01;
                    }
                    List<double> correct = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        correct.Add(0);
                    }
                    List<double> total = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        total.Add(0);
                    }

                    int totalForRec = 0;
                    Dictionary<string,Dictionary<string,bool>> goldList = new Dictionary<string, Dictionary<string, bool>>();
                    for (int s = 0;s<eurovocDict[srcLang].Count;s++)
                    {
                        if (!eurovocDict[srcLang][s].Contains("(under translation)")&&!eurovocDict[trgLang][s].Contains("(under translation)"))
                        {
                            totalForRec++;
                            if (!goldList.ContainsKey(eurovocDict[srcLang][s].ToLower())) goldList.Add(eurovocDict[srcLang][s].ToLower(), new Dictionary<string,bool>());
                            if (!goldList[eurovocDict[srcLang][s].ToLower()].ContainsKey(eurovocDict[trgLang][s].ToLower())) goldList[eurovocDict[srcLang][s].ToLower()].Add(eurovocDict[trgLang][s].ToLower(),true);
                        }
                    }

                    string previousSrc = null;
                    foreach(StringComparisonElement sce in terms)
                    {
                        string currSrc = sce.src;
                        if (previousSrc!=currSrc.ToLower())
                        {
                            string src = sce.src.ToLower();
                            string trg = sce.trg.ToLower();
                            double alignScore = sce.similarity;
                            bool corr = false;
                            if (goldList.ContainsKey(src)&&goldList[src].ContainsKey(trg)) corr = true;
                            for (int s =0;s<scores.Count;s++)
                            {
                                if (scores[s]<=alignScore)
                                {
                                    if (corr) correct[s]++;
                                    total[s]++;
                                }
                            }
                            previousSrc = currSrc.ToLower();
                        }
                    }

                    for(int s=0;s<scores.Count;s++)
                    {
                        double corr = correct[s];
                        double tot = total[s];
                        double totCorr = totalForRec;
                        double prec = corr/tot*100;
                        double rec = corr/totCorr*100;
                        double f1 = prec*rec*2/(prec+rec);
                        sw.WriteLine(srcLang+"\t"+trgLang+"\t"+scores[s].ToString()+"\t"+corr.ToString()+"\t"+tot.ToString()+"\t"+totCorr.ToString()+"\t"+prec.ToString()+"\t"+rec.ToString()+"\t"+f1.ToString());
                    }
                    sw.Flush();
                    //}
                    //}
                    sw.Close();
                    //continue;
                    return;
                }
                string preprocessedOutputFile = outputFile+"."+srcLang+"_"+trgLang+".prep.txt";
                Dictionary<string,SimpleTermEntry> srcInitialList = StringListToDict(eurovocDict[srcLang]);
                Dictionary<string,SimpleTermEntry> trgInitialList = StringListToDict(eurovocDict[trgLang]);

                Log.Write ("Unprocessed source terms: "+srcInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                Log.Write ("Unprocessed target terms: "+trgInitialList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                Dictionary<string, Dictionary<string, double>> srcDict = null;
                Dictionary<string, Dictionary<string, double>> trgDict = null;
                Dictionary<string, Dictionary<string, double>> srcToTrgDict = null;
                Dictionary<string, Dictionary<string, double>> trgToSrcDict = null;

                MPAlignerConfigurationTranslEntry srcTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgTranslitConf = null;
                MPAlignerConfigurationTranslEntry srcToTrgTranslitConf = null;
                MPAlignerConfigurationTranslEntry trgToSrcTranslitConf = null;

                bool interlinguaDictUsed = ReadDictionaries(configuration,srcLang,trgLang, out srcDict, out trgDict, out srcToTrgDict, out trgToSrcDict);
                bool interlinguaTranslitUsed = GetTranslitConfig(configuration,srcLang,trgLang,out srcTranslitConf,out trgTranslitConf,out srcToTrgTranslitConf, out trgToSrcTranslitConf);

                Dictionary<string,ProcessedTermEntry> srcTermList = new Dictionary<string,ProcessedTermEntry>();
                Dictionary<string,ProcessedTermEntry> trgTermList = new Dictionary<string,ProcessedTermEntry>();

                if (File.Exists(preprocessedOutputFile))
                {
                    Log.Write("Preprocessed term data found! Reading pre-processed data to save time!", LogLevelType.WARNING,configuration);
                    PreprocessedTermData ptd1 = PreprocessedTermData.ReadFromFile(preprocessedOutputFile);
                    interlinguaDictUsed = ptd1.interlinguaDictUsed;
                    interlinguaTranslitUsed = ptd1.interlinguaTranslitUsed;
                    foreach(ProcessedTermEntry pte in ptd1.srcTerms)
                    {
                        if (!srcTermList.ContainsKey(pte.lowercaceForm))
                        {
                            srcTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                    foreach(ProcessedTermEntry pte in ptd1.trgTerms)
                    {
                        if (!trgTermList.ContainsKey(pte.lowercaceForm))
                        {
                            trgTermList.Add(pte.lowercaceForm,pte);
                        }
                    }
                }
                else if (interlinguaDictUsed&&interlinguaTranslitUsed)
                {
                    string dir = Path.GetDirectoryName(preprocessedOutputFile);
                    if (!dir.EndsWith(Path.DirectorySeparatorChar.ToString())) dir+=Path.DirectorySeparatorChar.ToString();
                    string prepSrcToTrgFile = dir+"eurovoc_preprocessed_"+srcLang+"_en.xml";
                    string prepTrgToSrcFile = dir+"eurovoc_preprocessed_"+trgLang+"_en.xml";
                    if (File.Exists(prepSrcToTrgFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+srcLang+"_en.xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        srcTermList = ProcessedTermEntry.ReadFromFile(prepSrcToTrgFile);
                    }
                    if (File.Exists(prepTrgToSrcFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+trgLang+"_en.xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        trgTermList = ProcessedTermEntry.ReadFromFile(prepTrgToSrcFile);
                    }
                }
                else if (!interlinguaDictUsed&&!interlinguaTranslitUsed)
                {
                    string dir = Path.GetDirectoryName(preprocessedOutputFile);
                    if (!dir.EndsWith(Path.DirectorySeparatorChar.ToString())) dir+=Path.DirectorySeparatorChar.ToString();
                    string prepSrcToTrgFile = dir+"eurovoc_preprocessed_"+srcLang+"_"+trgLang+".xml";
                    string prepTrgToSrcFile = dir+"eurovoc_preprocessed_"+trgLang+"_"+srcLang+".xml";
                    if (File.Exists(prepSrcToTrgFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+srcLang+"_"+trgLang+".xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        srcTermList = ProcessedTermEntry.ReadFromFile(prepSrcToTrgFile);
                    }
                    if (File.Exists(prepTrgToSrcFile))
                    {
                        Log.Write ("Reading processed term list: eurovoc_preprocessed_"+trgLang+"_"+srcLang+".xml",LogLevelType.LIMITED_OUTPUT,configuration);
                        trgTermList = ProcessedTermEntry.ReadFromFile(prepTrgToSrcFile);
                    }
                }

                if (srcDict!=null||trgDict!=null)
                {
                    if (srcTranslitConf!=null && trgTranslitConf!=null)
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                    else
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                }
                else
                {
                    if (srcTranslitConf!=null && trgTranslitConf!=null)
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                    else
                    {
                        if (srcTermList.Count<1)
                            srcTermList = ProcessedTermEntry.ProcessTerms(srcInitialList,srcToTrgDict,srcLang,srcToTrgTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                        if (trgTermList.Count<1)
                            trgTermList = ProcessedTermEntry.ProcessTerms(trgInitialList,trgToSrcDict,trgLang,trgToSrcTranslitConf, configuration.mosesPath, tempTranslitFile,configuration.alignmentThreads);
                    }
                }
                Log.Write ("Pre-processed source terms: "+srcTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                Log.Write ("Pre-processed target terms: "+trgTermList.Count.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);

                ///If pre-processed terms should be saved for future use an output format is created.

                List<ProcessedTermEntry> srcTerms = new List<ProcessedTermEntry>(srcTermList.Values);
                List<ProcessedTermEntry> trgTerms = new List<ProcessedTermEntry>(trgTermList.Values);
                PreprocessedTermData ptd = new PreprocessedTermData();
                ptd.interlinguaDictUsed = interlinguaDictUsed;
                ptd.interlinguaTranslitUsed = interlinguaTranslitUsed;
                ptd.srcTerms = srcTerms.ToArray();
                ptd.trgTerms = trgTerms.ToArray();
                ptd.srcLang = srcLang;
                ptd.trgLang = trgLang;

                string outStr = MPFramework.MPFrameworkFunctions.SerializeObjectInstance<PreprocessedTermData>(ptd);
                File.WriteAllText(preprocessedOutputFile,outStr);

                Dictionary<string, Dictionary<string, bool>> excDict = null;
                ReadExceptionDictionary(configuration,srcLang, trgLang,out excDict);

                Dictionary<string,bool> srcStopWords = null;
                ReadStopwordList(configuration,srcLang,out srcStopWords);

                Dictionary<string,bool> trgStopWords = null;
                ReadStopwordList(configuration,trgLang,out trgStopWords);

                //Need to pre-set the alignment thresholds, otherwise these will be overriden by defaults.
                MPAlignerConfigurationLangPairEntry lpeConf = ReadLangPairConfig (srcLang, trgLang, configuration);

                List<AlignmentInfoElement> alignment = new List<AlignmentInfoElement>();
                if (configuration.useMultiThreadedExecution)
                {
                    alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                }
                else
                {
                    alignment = Alignment.AlignPairs(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                }
                //Multi-threaded execution is not stable at the moment...
                //List<AlignmentInfoElement> alignment = Alignment.AlignPairsMultiThreaded(configuration,srcTermList,trgTermList,interlinguaDictUsed,interlinguaTranslitUsed,srcLang,trgLang, srcInputFile, trgInputFile, excDict, srcStopWords, trgStopWords);
                AlignmentInfoElement.PrintList(outputFormat, alignmentOutputFile, alignment, configuration.printTopTrgForSrc, lpeConf,srcLang,trgLang,collectionId,domainId);
                {
                    List<double> scores = new List<double>();
                    double tmp = 0;
                    while (tmp<=1)
                    {
                        scores.Add(tmp);
                        tmp+=0.01;
                    }
                    List<double> correct = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        correct.Add(0);
                    }
                    List<double> total = new List<double>();
                    for(int t=0;t<scores.Count;t++)
                    {
                        total.Add(0);
                    }

                    int totalForRec = 0;
                    Dictionary<string,Dictionary<string,bool>> goldList = new Dictionary<string, Dictionary<string, bool>>();
                    for (int s = 0;s<eurovocDict[srcLang].Count;s++)
                    {
                            if (!eurovocDict[srcLang][s].ToLower().Contains("(under translation)")&&!eurovocDict[trgLang][s].ToLower().Contains("(under translation)"))
                        {
                            totalForRec++;
                                if (!goldList.ContainsKey(eurovocDict[srcLang][s].ToLower())) goldList.Add(eurovocDict[srcLang][s].ToLower(), new Dictionary<string,bool>());
                                if (!goldList[eurovocDict[srcLang][s].ToLower()].ContainsKey(eurovocDict[trgLang][s].ToLower())) goldList[eurovocDict[srcLang][s].ToLower()].Add(eurovocDict[trgLang][s].ToLower(),true);
                        }
                    }

                    string previousSrc = null;
                    alignment.Sort();
                    foreach(AlignmentInfoElement aie in alignment)
                    {
                        string currSrc = AlignmentInfoElement.GetStrFromEntry(aie.srcEntry.surfaceFormWords, aie.minSrcId, aie.maxSrcId);
                        if (previousSrc!=currSrc.ToLower())
                        {
                            string src = aie.srcEntry.surfaceForm.ToLower();
                            string trg = aie.trgEntry.surfaceForm.ToLower();
                            double alignScore = aie.alignmentScore;
                            bool corr = false;
                            if (goldList.ContainsKey(src)&&goldList[src].ContainsKey(trg)) corr = true;
                            for (int s =0;s<scores.Count;s++)
                            {
                                if (scores[s]<=alignScore)
                                {
                                    if (corr) correct[s]++;
                                    total[s]++;
                                }
                            }
                            previousSrc = currSrc.ToLower();
                        }
                    }

                    for(int s=0;s<scores.Count;s++)
                    {
                        double corr = correct[s];
                        double tot = total[s];
                        double totCorr = totalForRec;
                        double prec = corr/tot*100;
                        double rec = corr/totCorr*100;
                        double f1 = prec*rec*2/(prec+rec);
                        sw.WriteLine(srcLang+"\t"+trgLang+"\t"+scores[s].ToString()+"\t"+corr.ToString()+"\t"+tot.ToString()+"\t"+totCorr.ToString()+"\t"+prec.ToString()+"\t"+rec.ToString()+"\t"+f1.ToString());
                    }
                    sw.Flush();
                    //}
                    //}
                    sw.Close();
                }
            }
            if (File.Exists(tempTranslitFile)) File.Delete(tempTranslitFile);
            if (consolidateResults) {
                Log.Write ("Consolidating aligned term pairs with a threshold of: "+consolidationThreshold.ToString(),LogLevelType.LIMITED_OUTPUT,configuration);
                //In the case if -ct (consolidation threshold) was defined and the output format has been ref_tabsep, the consolidation of results is perfomed.
                ConsolidationElement.ConsolidateRefTabsep(outputFile, consolidatedOutputFile,consolidationThreshold);
            }
        }
Esempio n. 6
0
        private static void PrintTabsep(StreamWriter sw, string outputFormat, List <AlignmentInfoElement> list, bool printTopSrc, MPAlignerConfigurationLangPairEntry lpeConf, string srcLang = "", string trgLang = "", string collectionId = "", string domain = "")
        {
            NumberFormatInfo nfi = new NumberFormatInfo();

            nfi.CurrencyDecimalSeparator = ".";
            nfi.NumberDecimalSeparator   = ".";
            nfi.PercentDecimalSeparator  = ".";
            bool   fullFormat = outputFormat.ToLower() == "tabsep"?false:true;
            string prevSrc    = null;

            list.Sort();
            foreach (AlignmentInfoElement aie in list)
            {
                string currSrc    = GetStrFromEntry(aie.srcEntry.surfaceFormWords, aie.minSrcId, aie.maxSrcId, aie.srcEntry.lemmaSeq).Trim();
                string srcLemmas  = GetStrFromEntry(aie.srcEntry.lemmaSeq, aie.minSrcId, aie.maxSrcId).Trim();
                string srcMsd     = GetStrFromEntry(aie.srcEntry.msdSeq, aie.minSrcId, aie.maxSrcId).Trim();
                string srcNorm    = GetStrFromEntry(aie.srcEntry.normSeq, aie.minSrcId, aie.maxSrcId).Trim();
                string srcNormMsd = GetStrFromEntry(aie.srcEntry.normMsdSeq, aie.minSrcId, aie.maxSrcId).Trim();
                if (string.IsNullOrWhiteSpace(currSrc))
                {
                    continue;
                }
                if ((!printTopSrc || prevSrc != currSrc.ToLower()) && aie.alignmentScore >= lpeConf.printThr)
                {
                    string currTrg    = GetStrFromEntry(aie.trgEntry.surfaceFormWords, aie.minTrgId, aie.maxTrgId, aie.trgEntry.lemmaSeq).Trim();
                    string trgLemmas  = GetStrFromEntry(aie.trgEntry.lemmaSeq, aie.minTrgId, aie.maxTrgId).Trim();
                    string trgMsd     = GetStrFromEntry(aie.trgEntry.msdSeq, aie.minTrgId, aie.maxTrgId).Trim();
                    string trgNorm    = GetStrFromEntry(aie.trgEntry.normSeq, aie.minTrgId, aie.maxTrgId).Trim();
                    string trgNormMsd = GetStrFromEntry(aie.trgEntry.normMsdSeq, aie.minTrgId, aie.maxTrgId).Trim();
                    if (string.IsNullOrWhiteSpace(currTrg))
                    {
                        continue;
                    }
                    if (fullFormat)
                    {
                        sw.Write(srcLang);
                        sw.Write("\t");
                    }
                    sw.Write(currSrc);
                    sw.Write("\t");
                    if (fullFormat)
                    {
                        sw.Write(trgLang);
                        sw.Write("\t");
                    }
                    sw.Write(currTrg);
                    sw.Write("\t");
                    if (fullFormat)
                    {
                        sw.Write(domain);
                        sw.Write("\t");
                        sw.Write(collectionId);
                        sw.Write("\t");
                    }
                    sw.Write(aie.alignmentScore.ToString("0.000000", nfi));
                    if (fullFormat)
                    {
                        sw.Write("\t");
                        sw.Write(srcMsd);
                        sw.Write("\t");
                        sw.Write(srcLemmas);
                        sw.Write("\t");
                        sw.Write(srcNorm);
                        sw.Write("\t");
                        sw.Write(srcNormMsd);
                        sw.Write("\t");
                        sw.Write(aie.srcEntry.concordance);
                        sw.Write("\t");
                        sw.Write(trgMsd);
                        sw.Write("\t");
                        sw.Write(trgLemmas);
                        sw.Write("\t");
                        sw.Write(trgNorm);
                        sw.Write("\t");
                        sw.Write(trgNormMsd);
                        sw.Write("\t");
                        sw.Write(aie.trgEntry.concordance);
                        sw.Write("\t");
                        sw.Write(aie.srcFile);
                        sw.Write("\t");
                        sw.Write(aie.trgFile);
                        sw.Write("\t");
                    }
                    sw.WriteLine();
                    prevSrc = currSrc.ToLower();
                }
            }
        }
Esempio n. 7
0
 public static bool CheckOverlapThreshold(MPAlignerConfigurationLangPairEntry lpeConf, double overlap, short fromType, short toType)
 {
     ///Types:
     /// 0 - dictionary,
     /// 1 - simple translit,
     /// 2 - target,
     /// 3 - translit
     if (lpeConf!=null)
     {
         if (fromType == 0)
         {
             if (toType == 0)
             {
                 return overlap>=lpeConf.dictEntryOverlapThr;
             }
             else if (toType == 1)
             {
                 return overlap>=lpeConf.dictToSTranslitOverlapThr;
             }
             else if (toType == 2)
             {
                 return overlap>=lpeConf.dictToWordOverlapThr;
             }
             else if (toType == 3)
             {
                 return overlap>=lpeConf.dictToTranslitOverlapThr;
             }
         }
         else if (fromType == 1)
         {
             if (toType == 1)
             {
                 return overlap>=lpeConf.sTranslitOverlapThr;
             }
             else if (toType == 0)
             {
                 return overlap>=lpeConf.dictToSTranslitOverlapThr;
             }
             else if (toType == 2)
             {
                 return overlap>=lpeConf.sTranslitToWordOverlapThr;
             }
             else if (toType == 3)
             {
                 return overlap>=lpeConf.translitToSTranslitOverlapThr;
             }
         }
         else if (fromType == 3)
         {
             if (toType == 3)
             {
                 return overlap>=lpeConf.translitEntryOverlapThr;
             }
             else if (toType == 1)
             {
                 return overlap>=lpeConf.translitToSTranslitOverlapThr;
             }
             else if (toType == 2)
             {
                 return overlap>=lpeConf.translitToWordOverlapThr;
             }
             else if (toType == 0)
             {
                 return overlap>=lpeConf.dictToTranslitOverlapThr;
             }
         }
     }
     else
     {
         throw new ArgumentNullException("The language pair configuration entry is empty!");
     }
     return false; //Such a combination should not be possible at all...
 }
Esempio n. 8
0
        public static List<AlignmentInfoElement> AlignListPairs(MPAlignerConfiguration configuration, List<ProcessedTermEntry> srcTermList, List<ProcessedTermEntry> trgTermList, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }

            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            for(int i=0;i< srcTermList.Count;i++)
            {
                ProcessedTermEntry srcPte = srcTermList[i];
                ProcessedTermEntry trgPte = trgTermList[i];
                if (srcPte!=null && trgPte!=null)
                {
                    AlignmentInfoElement aie = new AlignmentInfoElement();
                    List<WordAlignmentElement> srcToTrg = new List<WordAlignmentElement>();
                    List<WordAlignmentElement> trgToSrc = new List<WordAlignmentElement>();
                    maxStrLen = 0;

                    if (interlinguaDictUsed && interlinguaTranslitUsed)
                    {

                        ///Types:
                        /// 0 - dictionary,
                        /// 1 - simple translit,
                        /// 2 - target or source,
                        /// 3 - translit

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.transliterationList, srcToTrg, trgToSrc, 0, 3);

                        //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                        //Translation is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.translationList, srcToTrg, trgToSrc, 3, 0);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                        //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 3);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    else if (interlinguaTranslitUsed)
                    {
                        //Translation is in target language; SOURCE TRANSLATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                        //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 2);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        //Translation is in target language; SOURCE vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);
                    }
                    else if (interlinguaDictUsed)
                    {
                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                        //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                        //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    else
                    {
                        //Translation is in target language; SOURCE TRANSLATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                        //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                        //Translation is in target language; SOURCE vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);

                        //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    aie.srcToTrgAlignments = srcToTrg;
                    aie.trgToSrcAlignments = trgToSrc;

                    aie.srcEntry = srcPte;
                    aie.trgEntry = trgPte;

                    ConsolidateOverlaps(lpeConf,aie, excDict);
                    bool valid = CreateStrListsForEval(configuration,aie,srcStopWords,trgStopWords,false);
                    aie.alignmentScore = EvaluateAlignmentScore(lpeConf,aie);
                    //If you wish to debug the process, comment the lines below that clear the alignments...
                    aie.srcToTrgAlignments.Clear();
                    aie.trgToSrcAlignments.Clear();
                    aie.consolidatedAlignment.Clear();
                    aie.srcFile = srcFile;
                    aie.trgFile = trgFile;
                    res.Add(aie);
                }
            }
            return res;
        }
Esempio n. 9
0
 //, Dictionary<string, Dictionary<string, bool>> excDict)
 public static void AlignStringProbabEntryListToStringList(MPAlignerConfigurationLangPairEntry lpeConf, List<List<StringProbabEntry>> srcList, List<string> trgList, List<WordAlignmentElement> srcToTrg, List<WordAlignmentElement> trgToSrc, short fromType, short toType)
 {
     if (srcToTrg == null) {
         srcToTrg = new List<WordAlignmentElement> ();
     }
     if (trgToSrc == null) {
         trgToSrc = new List<WordAlignmentElement>();
     }
     for (int srcId=0; srcId<srcList.Count;srcId++)
     {
         for (int srcTypeId=0;srcTypeId<srcList[srcId].Count;srcTypeId++)
         {
             for (int trgId=0; trgId<trgList.Count;trgId++)
             {
                 string srcStr = srcList[srcId][srcTypeId].str;
                 string trgStr = trgList[trgId];
                 int trgTypeId = -1;
                 AlignStrings (lpeConf, srcToTrg, trgToSrc, fromType, toType, srcId, srcTypeId, trgId, trgTypeId, srcStr, trgStr);//, excDict);
             }
         }
     }
 }
Esempio n. 10
0
        public static List<AlignmentInfoElement> AlignPairs(MPAlignerConfiguration configuration, Dictionary<string, ProcessedTermEntry> srcTerms, Dictionary<string, ProcessedTermEntry> trgTerms, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }
            Log.Write ("Starting alignmet of "+ srcTerms.Count.ToString()+" "+srcLang+" and "+ trgTerms.Count.ToString()+" "+trgLang+" terms.",LogLevelType.LIMITED_OUTPUT);

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }
            int counter = 0;

            Dictionary<string,Dictionary<string,bool>> alignedList = new Dictionary<string, Dictionary<string, bool>>();
            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            foreach(string srcTerm in srcTerms.Keys)
            {
                counter++;
                if (counter%50==0)
                {
                    Console.Write(".");
                    if (counter%1000==0)
                    {
                        Console.WriteLine(" - "+counter.ToString());
                    }
                }
                ProcessedTermEntry srcPte = srcTerms[srcTerm];
                foreach(string trgTerm in trgTerms.Keys)
                {
                    ProcessedTermEntry trgPte = trgTerms[trgTerm];
                    if (srcPte!=null && trgPte!=null)
                    {
                        AlignmentInfoElement aie = new AlignmentInfoElement();
                        List<WordAlignmentElement> srcToTrg = new List<WordAlignmentElement>();
                        List<WordAlignmentElement> trgToSrc = new List<WordAlignmentElement>();
                        maxStrLen = 0;

                        if (interlinguaDictUsed && interlinguaTranslitUsed)
                        {

                            ///Types:
                            /// 0 - dictionary,
                            /// 1 - simple translit,
                            /// 2 - target,
                            /// 3 - translit

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.transliterationList, srcToTrg, trgToSrc, 0, 3);

                            //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                            //Translation is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.translationList, srcToTrg, trgToSrc, 3, 0);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                            //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 3);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        }
                        else if (interlinguaTranslitUsed)
                        {
                            //Translation is in target language; SOURCE TRANSLATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                            //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 2);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                            //Translation is in target language; SOURCE vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);
                        }
                        else if (interlinguaDictUsed)
                        {
                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                            //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                            //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                            //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        }
                        else
                        {
                            //Translation is in target language; SOURCE TRANSLATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                            //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                            //Translation is in target language; SOURCE vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);

                            //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        }
                        aie.srcToTrgAlignments = srcToTrg;
                        aie.trgToSrcAlignments = trgToSrc;
                        aie.srcToTrgAlignments.Sort(
                            delegate(WordAlignmentElement w1, WordAlignmentElement w2)
                            {
                                double avgW1Overlap = (w1.fromOverlap+w1.toOverlap)/2;
                                double avgW2Overlap = (w2.fromOverlap+w2.toOverlap)/2;
                                // Descending sort of toOverlap's if the
                                if (avgW1Overlap!=avgW2Overlap)
                                {
                                    return avgW2Overlap.CompareTo(avgW1Overlap);
                                }
                                if (w2.fromLen == w1.fromLen)
                                {
                                    if (w2.toOverlap==w1.toOverlap)
                                    {
                                        return w1.fromId.CompareTo(w2.fromId);
                                    }
                                    return w2.toOverlap.CompareTo(w1.toOverlap);
                                }
                                return w2.fromLen.CompareTo(w1.fromLen);
                            }
                        );
                        aie.trgToSrcAlignments.Sort(
                            delegate(WordAlignmentElement w1, WordAlignmentElement w2)
                            {
                                double avgW1Overlap = (w1.fromOverlap+w1.toOverlap)/2;
                                double avgW2Overlap = (w2.fromOverlap+w2.toOverlap)/2;
                                // Descending sort of toOverlap's if the
                                if (avgW1Overlap!=avgW2Overlap)
                                {
                                    return avgW2Overlap.CompareTo(avgW1Overlap);
                                }
                                // Descending sort of toOverlap's if the
                                if (w2.toLen == w1.toLen)
                                {
                                    if (w2.fromOverlap==w1.fromOverlap)
                                    {
                                        return w1.toId.CompareTo(w2.toId);
                                    }
                                    return w2.fromOverlap.CompareTo(w1.fromOverlap);
                                }
                                return w2.toLen.CompareTo(w1.toLen);
                            }
                        );

                        aie.srcEntry = srcPte;
                        aie.trgEntry = trgPte;

                        ConsolidateOverlaps(lpeConf,aie, excDict);
                        if(CreateStrListsForEval(configuration,aie,srcStopWords,trgStopWords))
                        {
                            if (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr))
                            {
                                if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);

                                aie.alignmentScore = EvaluateAlignmentScore(lpeConf,aie);
                                if (aie.alignmentScore>=lpeConf.finalAlignmentThr)
                                {
                                    //If you wish to debug the process, comment the lines below that clear the alignments...
                                    aie.srcToTrgAlignments.Clear();
                                    aie.trgToSrcAlignments.Clear();
                                    aie.consolidatedAlignment.Clear();
                                    aie.srcFile = srcFile;
                                    aie.trgFile = trgFile;
                                    res.Add(aie);
                                }
                            }
                        }
                    }
                }
            }
            Console.WriteLine(" - "+counter.ToString());
            Log.Write ("Alignmet finished - "+ res.Count.ToString()+" term pairs aligned over the alignment threshold " +lpeConf.finalAlignmentThr.ToString()+".\n",LogLevelType.LIMITED_OUTPUT);
            return res;
        }
Esempio n. 11
0
        public static List<AlignmentInfoElement> AlignPairsMultiThreaded(MPAlignerConfiguration configuration, Dictionary<string, ProcessedTermEntry> srcTerms, Dictionary<string, ProcessedTermEntry> trgTerms, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }
            Log.Write ("Starting alignmet of "+ srcTerms.Count.ToString()+" "+srcLang+" and "+ trgTerms.Count.ToString()+" "+trgLang+" terms.",LogLevelType.LIMITED_OUTPUT);

            int threadCount = configuration.alignmentThreads;

            STPStartInfo stpStartInfo = new STPStartInfo();
            stpStartInfo.IdleTimeout = 100*1000;
            stpStartInfo.MaxWorkerThreads = 5*threadCount;
            stpStartInfo.MinWorkerThreads = threadCount;
            stpStartInfo.EnableLocalPerformanceCounters = true;

            SmartThreadPool smartThreadPool = new SmartThreadPool(stpStartInfo);

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }
            int counter = 0;
            //threadedAlignments = new List<AlignmentInfoElement>();

            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            Dictionary<string,Dictionary<string,bool>> alignedList = new Dictionary<string, Dictionary<string, bool>>();
            List<IWorkItemResult<AlignmentInfoElement>> wirList = new List<IWorkItemResult<AlignmentInfoElement>>(1000);
            _configuration = configuration;
            _interlinguaDictUsed=interlinguaDictUsed;
            _interlinguaTranslitUsed=interlinguaTranslitUsed;
            _srcFile=srcFile;
            _trgFile=trgFile;
            _excDict=excDict;
            _srcStopWords=srcStopWords;
            _trgStopWords=trgStopWords;
            _lpeConf=lpeConf;
            foreach(string srcTerm in srcTerms.Keys)
            {
                counter++;
                if (counter%50==0)
                {
                    Console.Write(".");
                    if (counter%1000==0)
                    {
                        Console.WriteLine(" - "+counter.ToString());
                    }
                }
                ProcessedTermEntry srcPte = srcTerms[srcTerm];
                foreach(string trgTerm in trgTerms.Keys)
                {
                    //List<Tuple<ProcessedTermEntry,ProcessedTermEntry>> unProcessed = new List<Tuple<ProcessedTermEntry, ProcessedTermEntry>>();
                    if (wirList.Count>=100000)
                    {
                        smartThreadPool.WaitForIdle();
                        for(int i=0;i<wirList.Count;i++)
                        {
                            if (wirList[i].IsCompleted && wirList[i].Exception==null)
                            {
                                AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                                if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                                {
                                    res.Add(aie);
                                    if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                    if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                                }
                            }
                            else if (!wirList[i].IsCompleted)
                            {
                                int times = 100;
                                while(!wirList[i].IsCompleted && times>0)
                                {
                                    times--;
                                    System.Threading.Thread.Sleep(100);
                                }
                                if (wirList[i].IsCompleted && wirList[i].Exception==null)
                                {
                                    AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                                    if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                                    {
                                        res.Add(aie);
                                        if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                        if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                                    }
                                }
                            }
                        }
                        wirList.Clear();
                    }
                    try
                    {
                        IWorkItemResult<AlignmentInfoElement> wir = smartThreadPool.QueueWorkItem(
                            new Amib.Threading.Func<ProcessedTermEntry, ProcessedTermEntry, AlignmentInfoElement>(AlignSingleTermPair), srcPte, trgTerms[trgTerm]);
                        if (wir!=null) wirList.Add(wir);
                    }
                    catch
                    {
                        Log.Write("Thread exception catched - cannot create a new thread within term alignment!", LogLevelType.WARNING);
                    }
                    //smartThreadPool
                    /*while(smartThreadPool.PerformanceCountersReader.WorkItemsQueued>=100)
                    {
                        System.Threading.Thread.Sleep(5);
                    }*/

                    //AlignmentInfoElement aie = AlignSingleTermPair (configuration, trgTerms[trgTerm], interlinguaDictUsed, interlinguaTranslitUsed, srcFile, trgFile, excDict, srcStopWords, trgStopWords, lpeConf, srcPte);
                    //if (aie!=null)
                    //{
                        //res.Add(aie);
                    //}
                }
            }
            //Console.WriteLine();
            if (wirList.Count>0)
            {
                smartThreadPool.WaitForIdle();
                for(int i=0;i<wirList.Count;i++)
                {
                    if (wirList[i].IsCompleted && wirList[i].Exception==null)
                    {
                        AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                        if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                        {
                            res.Add(aie);
                            if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                            if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                        }
                    }
                    else if (!wirList[i].IsCompleted)
                    {
                        int times = 100;
                        while(!wirList[i].IsCompleted && times>0)
                        {
                            times--;
                            System.Threading.Thread.Sleep(100);
                        }
                        if (wirList[i].IsCompleted && wirList[i].Exception==null)
                        {
                            AlignmentInfoElement aie = (AlignmentInfoElement)wirList[i].Result;
                            if (aie!=null && (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)))
                            {
                                res.Add(aie);
                                if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);
                            }
                        }
                    }
                }
                wirList.Clear();
            }
            try{
                smartThreadPool.Shutdown(true,100);
                smartThreadPool.Dispose();
                smartThreadPool = null;
                GC.Collect();
                GC.WaitForPendingFinalizers();
            }
            catch
            {
                try
                {
                    smartThreadPool.Shutdown(true,100);
                    smartThreadPool.Dispose();
                    smartThreadPool = null;
                    GC.Collect();
                    GC.WaitForPendingFinalizers();
                }
                catch
                {
                    GC.Collect();
                    GC.WaitForPendingFinalizers();
                }
            }
            Log.Write ("Alignmet finished - "+ res.Count.ToString()+" term pairs aligned over the alignment threshold " +lpeConf.finalAlignmentThr.ToString()+".\n",LogLevelType.LIMITED_OUTPUT);
            return res;
        }
Esempio n. 12
0
        // Dictionary<string, Dictionary<string, bool>> excDict)
        //private static List<WordAlignmentElement> backupList;
        static void AlignStrings(MPAlignerConfigurationLangPairEntry lpeConf, List<WordAlignmentElement> srcToTrg, List<WordAlignmentElement> trgToSrc, short fromType, short toType,  int srcId, int srcTypeId, int trgId, int trgTypeId, string srcStr, string trgStr)
        {
            //if (excDict==null||!excDict.ContainsKey(srcStr)||!excDict[srcStr].ContainsKey(trgStr))
            //{
                //TODO: There is a bug in here - the invalid alignment dictionary is incorrectly applied! That is, the validation should have been between lowercased tokens rather than translation equivalents!
                int srcIdStart = -1;
                int srcIdEnd = -1;
                int trgIdStart = -1;
                int trgIdEnd = -1;
                string matching = LongestCommonSubstring.Get(srcStr, trgStr,out srcIdStart, out srcIdEnd, out trgIdStart, out trgIdEnd);

                double lenDiff = matching.Length;
                lenDiff/=Math.Min(srcStr.Length, trgStr.Length);
                if (matching!=null && CheckOverlapThreshold(lpeConf, lenDiff, fromType, toType)
                && (!lpeConf.enforce1stChar || (Check1stCharOverlapValidity(srcIdStart,srcIdEnd,trgIdStart,trgIdEnd) && (!lpeConf.enforce2ndChar || Check2ndCharOverlapValidity(srcIdStart,srcIdEnd,trgIdStart,trgIdEnd)))))
                {
                    ///Types:
                    /// 0 - dictionary,
                    /// 1 - simple translit,
                    /// 2 - target,
                    /// 3 - translit
                    double srcOverlap = ((double)(srcIdEnd-srcIdStart+1))/((double)srcStr.Length);
                    double trgOverlap = ((double)(trgIdEnd-trgIdStart+1))/((double)trgStr.Length);
                    WordAlignmentElement wae = CreateWordAlignmentElement(trgStr, trgIdStart, trgIdEnd, 1, srcId, trgId, fromType, toType, srcTypeId, trgTypeId, srcOverlap,srcStr.Length, srcIdStart, trgIdStart);
                    srcToTrg.Add(wae);
                    wae = CreateWordAlignmentElement(srcStr, srcIdStart, srcIdEnd, -1, srcId, trgId, fromType, toType, srcTypeId, trgTypeId, trgOverlap,trgStr.Length, srcIdStart, trgIdStart);
                    trgToSrc.Add(wae);
                }
                else if (matching!=null)
                {
                    double similarity = EvaluateLevenshteinSimilarity(srcStr,trgStr);
                    //We limit the levenshtein similarity results so that the first two characters have to match!
                    //This is done to get rid of a lot of pre-fix mismatching mistakes...
                    if (CheckLevenshteinThreshold(lpeConf, similarity, fromType, toType)&&srcStr.Length>2&&trgStr.Length>2&&srcStr[0]==trgStr[0]&&srcStr[1]==trgStr[1])
                    {

                        double srcOverlap = similarity;
                        double trgOverlap = similarity;
                        WordAlignmentElement wae = CreateWordAlignmentElement(trgStr, 0, trgStr.Length-1, 1, srcId, trgId, fromType, toType, srcTypeId, trgTypeId, srcOverlap,srcStr.Length, 0, 0, false);
                        wae.isLevenshtein = true;
                        srcToTrg.Add(wae);
                        wae = CreateWordAlignmentElement(srcStr, 0, srcStr.Length-1, -1, srcId, trgId, fromType, toType, srcTypeId, trgTypeId, trgOverlap,trgStr.Length, 0, 0,false);
                        wae.isLevenshtein = true;
                        trgToSrc.Add(wae);
                    }
                }
            //}
        }
Esempio n. 13
0
        public static bool[] RemoveFromAlignmentMap(MPAlignerConfigurationLangPairEntry lpeConf,bool[] mapOne, bool[] mapTwo)
        {
            if (mapOne==null||mapTwo==null)
            {
                throw new ArgumentNullException("Either mapOne or mapTwo are empty (null)!");
            }
            bool[] res = mapOne;

            int l1 = mapOne.Length;
            int l2 = mapTwo.Length;

            for (int i=0;i<mapOne.Length;i++)
            {
                int adjM2Index = (i*l2)/l1;
                if (mapTwo[adjM2Index])
                {
                    res[i]=false;
                }
            }
            return res;
        }
Esempio n. 14
0
 /// <summary>
 /// Determines whether there is an overlap conflict between mapOne and mapTwo. For valid overlaps the validMap is used.
 /// This is an experimental implementation!
 /// TODO: Analyse effect of the variations in length to the colnflict detection!
 /// </summary>
 /// <returns>
 /// <c>true</c> if there is an overlap conflict between mapOne and mapTwo; otherwise, <c>false</c>.
 /// </returns>
 /// <param name='lpeConf'>
 /// The language pair configuration entry.
 /// </param>
 /// <param name='mapOne'>
 /// Map one - boolean array.
 /// </param>
 /// <param name='mapTwo'>
 /// Map two - boolean array.
 /// </param>
 /// <param name='validMap'>
 /// Valid alignment map - boolean array.
 /// </param>
 /// <exception cref='ArgumentNullException'>
 /// Is thrown when an argument passed to a method is invalid because it is <see langword="null" /> .
 /// </exception>
 public static bool IsOverlapConflict(MPAlignerConfigurationLangPairEntry lpeConf,bool[] mapOne, bool[] mapTwo, bool[] validMap)
 {
     int overlaps = 0;
     if (mapOne==null||mapTwo==null)
     {
         throw new ArgumentNullException("Either mapOne, mapTwo or validMap are empty (null)!");
     }
     int l1 = mapOne.Length;
     int l2 = mapTwo.Length;
     int l3 = validMap!=null?validMap.Length:-1;
     for (int i=0;i<mapOne.Length;i++)
     {
         int adjM2Index = (i*l2)/l1;
         int adjM3Index = (i*l3)/l1;
         if (mapOne[i]==mapTwo[adjM2Index]&& mapTwo[adjM2Index] && (validMap ==null || !validMap[adjM3Index]))
         {
             overlaps++;
             if (overlaps>lpeConf.maxOverlapCharsInCompounds)
             {
                 return true;
             }
         }
     }
     return false;
 }
Esempio n. 15
0
 public static double EvaluateAlignmentScore(MPAlignerConfigurationLangPairEntry lpeConf,AlignmentInfoElement aie)
 {
     if (aie==null||string.IsNullOrWhiteSpace(aie.srcStrForAlignment)||string.IsNullOrWhiteSpace(aie.trgStrForAlignment))
     {
         return 0;
     }
     double maxLen = Math.Max(aie.srcStrForAlignment.Length,aie.trgStrForAlignment.Length);
     double res = (maxLen-LevenshteinDistance.Compute(aie.srcStrForAlignment,aie.trgStrForAlignment))/maxLen;
     res*=(aie.srcMultiplier+aie.trgMultiplier)/2;
     return res;
 }
Esempio n. 16
0
        public static void ConsolidateOverlaps(MPAlignerConfigurationLangPairEntry lpeConf,AlignmentInfoElement aie, Dictionary<string, Dictionary<string, bool>> excDict)
        {
            if (lpeConf==null||aie==null||aie.srcEntry==null||aie.trgEntry==null)
            {
                return;
            }
            if (aie.srcToTrgAlignments==null || aie.trgToSrcAlignments==null)
            {
                return;
            }
            //Options:
            //Iterate through srcToTrg and find top 1 overlap for each source ID (do not worry about repetitive word overlaps at this point!)
            Dictionary<int, WordAlignmentElement> maxSrcToTrgOverlaps = new Dictionary<int, WordAlignmentElement>();
            Dictionary<int, bool[]> trgStrOverlapDict = new Dictionary<int, bool[]>();

            foreach(WordAlignmentElement wae in aie.srcToTrgAlignments)
            {
                if (excDict==null||!excDict.ContainsKey(aie.srcEntry.lowercaseWords[wae.fromId])||!excDict[aie.srcEntry.lowercaseWords[wae.fromId]].ContainsKey(aie.trgEntry.lowercaseWords[wae.toId]))
                {
                    if (wae.fromOverlap<lpeConf.minShortFragmentOverlap && wae.fromLen<lpeConf.minShortFragmentLen||
                        wae.fromLen<lpeConf.minShortFragmentLen && wae.toLen>lpeConf.maxShortFragmentTargetLen)
                    {
                        //In the case if there is a "short" fragment and the target length is "big", the alignment won't be applied (this is to limit false alignments!).
                    }
                    else if (Math.Max(wae.fromOverlap,wae.toOverlap)>=lpeConf.minSrcOrTrgOverlap) // this
                    {
                        if (!maxSrcToTrgOverlaps.ContainsKey(wae.fromId))
                        {
                            if (trgStrOverlapDict.ContainsKey(wae.toId) && !IsOverlapConflict(lpeConf, trgStrOverlapDict[wae.toId], wae.alignmentMap, null))
                            {
                                trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                maxSrcToTrgOverlaps.Add(wae.fromId, wae);
                            }
                            else if (!trgStrOverlapDict.ContainsKey(wae.toId))
                            {
                                trgStrOverlapDict.Add(wae.toId,new bool[maxStrLen]); //This way we ensure that small alignments do not cause big problems!
                                trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                maxSrcToTrgOverlaps.Add(wae.fromId, wae);
                                //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                            }
                        }
                        else if (wae.fromOverlap>maxSrcToTrgOverlaps[wae.fromId].fromOverlap)
                        {
                            if (wae.toId == maxSrcToTrgOverlaps[wae.fromId].toId)
                            {
                                if (trgStrOverlapDict.ContainsKey(wae.toId) && !IsOverlapConflict(lpeConf, trgStrOverlapDict[wae.toId], wae.alignmentMap, maxSrcToTrgOverlaps[wae.fromId].alignmentMap))
                                {
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, maxSrcToTrgOverlaps[wae.fromId].alignmentMap);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                }
                                else if (!trgStrOverlapDict.ContainsKey(wae.toId))
                                {
                                    trgStrOverlapDict.Add(wae.toId,new bool[maxStrLen]);
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                            }
                            else
                            {
                                //If the target ID changes then we have to remove alignment from the target map. As we do only one pass through the alignments, some important updates can be lost in this way if the list is not sorted.
                                if (trgStrOverlapDict.ContainsKey(wae.toId) && !IsOverlapConflict(lpeConf, trgStrOverlapDict[wae.toId], wae.alignmentMap, null))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (trgStrOverlapDict.ContainsKey(maxSrcToTrgOverlaps[wae.fromId].toId)) trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId] = RemoveFromAlignmentMap (lpeConf,trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId], maxSrcToTrgOverlaps[wae.fromId].alignmentMap);
                                    //Now adjust the new target alignment.
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                }
                                else if (!trgStrOverlapDict.ContainsKey(wae.toId))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (trgStrOverlapDict.ContainsKey(maxSrcToTrgOverlaps[wae.fromId].toId)) trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId] = RemoveFromAlignmentMap (lpeConf,trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId], maxSrcToTrgOverlaps[wae.fromId].alignmentMap);
                                    //Now create the new target alignment.
                                    trgStrOverlapDict.Add(wae.toId,new bool[maxStrLen]);
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                                //This is when for a source ID the target ID changes.
                            }
                        }
                    }
                }
            }

            Dictionary<int, WordAlignmentElement> maxTrgToSrcOverlaps = new Dictionary<int, WordAlignmentElement>();
            Dictionary<int, bool[]> srcStrOverlapDict = new Dictionary<int, bool[]>();

            foreach(WordAlignmentElement wae in aie.trgToSrcAlignments)
            {
                if (excDict==null||!excDict.ContainsKey(aie.srcEntry.lowercaseWords[wae.fromId])||!excDict[aie.srcEntry.lowercaseWords[wae.fromId]].ContainsKey(aie.trgEntry.lowercaseWords[wae.toId]))
                {
                    if (wae.toOverlap<lpeConf.minShortFragmentOverlap && wae.fromLen<lpeConf.minShortFragmentLen||
                        wae.toLen<lpeConf.minShortFragmentLen && wae.fromLen>lpeConf.maxShortFragmentTargetLen)
                    {
                        //In the case if there is a "short" fragment and the target length is "big", the alignment won't be applied (this is to limit false alignments!).
                    }
                    else if (Math.Max(wae.fromOverlap,wae.toOverlap)>=lpeConf.minSrcOrTrgOverlap)
                    {
                        if (!maxTrgToSrcOverlaps.ContainsKey(wae.toId))
                        {
                            if (srcStrOverlapDict.ContainsKey(wae.fromId) && !IsOverlapConflict(lpeConf, srcStrOverlapDict[wae.fromId], wae.alignmentMap, null))
                            {
                                srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                maxTrgToSrcOverlaps.Add(wae.toId, wae);
                            }
                            else if (!srcStrOverlapDict.ContainsKey(wae.fromId))
                            {
                                srcStrOverlapDict.Add(wae.fromId,new bool[maxStrLen]); //This way we ensure that small alignments do not cause big problems!
                                srcStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                maxTrgToSrcOverlaps.Add(wae.toId, wae);
                                //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                            }
                        }
                        else if (wae.toOverlap>maxTrgToSrcOverlaps[wae.toId].toOverlap)
                        {
                            if (wae.fromId == maxTrgToSrcOverlaps[wae.toId].fromId)
                            {
                                if (srcStrOverlapDict.ContainsKey(wae.fromId) && !IsOverlapConflict(lpeConf, srcStrOverlapDict[wae.fromId], wae.alignmentMap, maxTrgToSrcOverlaps[wae.toId].alignmentMap))
                                {
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, maxTrgToSrcOverlaps[wae.toId].alignmentMap);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                }
                                else if (!srcStrOverlapDict.ContainsKey(wae.fromId))
                                {
                                    srcStrOverlapDict.Add(wae.fromId,new bool[maxStrLen]);
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                            }
                            else
                            {
                                //If the target ID changes then we have to remove alignment from the target map. As we do only one pass through the alignments, some important updates can be lost in this way if the list is not sorted.
                                if (srcStrOverlapDict.ContainsKey(wae.fromId) && !IsOverlapConflict(lpeConf, srcStrOverlapDict[wae.fromId], wae.alignmentMap, null))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (srcStrOverlapDict.ContainsKey(maxTrgToSrcOverlaps[wae.toId].fromId)) srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId] = RemoveFromAlignmentMap (lpeConf,srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId], maxTrgToSrcOverlaps[wae.toId].alignmentMap);
                                    //Now adjust the new target alignment.
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                }
                                else if (!srcStrOverlapDict.ContainsKey(wae.fromId))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (srcStrOverlapDict.ContainsKey(maxTrgToSrcOverlaps[wae.toId].fromId)) srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId] = RemoveFromAlignmentMap (lpeConf,srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId], maxTrgToSrcOverlaps[wae.toId].alignmentMap);
                                    //Now create the new target alignment.
                                    srcStrOverlapDict.Add(wae.fromId,new bool[maxStrLen]);
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                                //This is when for a source ID the target ID changes.
                            }
                        }
                    }
                }
            }

            //At this stage we have one alignment from SRC to TRG and one from TRG to SRC. We need to consolidate both and also trim the beginning and the end...
            aie.consolidatedAlignment = ConsolidateAlignments(aie.srcEntry.lowercaseWords.Count, aie.trgEntry.lowercaseWords.Count, maxSrcToTrgOverlaps,maxTrgToSrcOverlaps);
        }