Beispiel #1
0
        public static List<AlignmentInfoElement> AlignListPairs(MPAlignerConfiguration configuration, List<ProcessedTermEntry> srcTermList, List<ProcessedTermEntry> trgTermList, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }

            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            for(int i=0;i< srcTermList.Count;i++)
            {
                ProcessedTermEntry srcPte = srcTermList[i];
                ProcessedTermEntry trgPte = trgTermList[i];
                if (srcPte!=null && trgPte!=null)
                {
                    AlignmentInfoElement aie = new AlignmentInfoElement();
                    List<WordAlignmentElement> srcToTrg = new List<WordAlignmentElement>();
                    List<WordAlignmentElement> trgToSrc = new List<WordAlignmentElement>();
                    maxStrLen = 0;

                    if (interlinguaDictUsed && interlinguaTranslitUsed)
                    {

                        ///Types:
                        /// 0 - dictionary,
                        /// 1 - simple translit,
                        /// 2 - target or source,
                        /// 3 - translit

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.transliterationList, srcToTrg, trgToSrc, 0, 3);

                        //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                        //Translation is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.translationList, srcToTrg, trgToSrc, 3, 0);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                        //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 3);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    else if (interlinguaTranslitUsed)
                    {
                        //Translation is in target language; SOURCE TRANSLATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                        //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                        //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 2);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        //Translation is in target language; SOURCE vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);
                    }
                    else if (interlinguaDictUsed)
                    {
                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                        AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                        //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                        //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                        //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    else
                    {
                        //Translation is in target language; SOURCE TRANSLATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                        //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                        AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                        //Translation is in target language; SOURCE vs TARGET TRANSLATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);

                        //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                        AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                        AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    }
                    aie.srcToTrgAlignments = srcToTrg;
                    aie.trgToSrcAlignments = trgToSrc;

                    aie.srcEntry = srcPte;
                    aie.trgEntry = trgPte;

                    ConsolidateOverlaps(lpeConf,aie, excDict);
                    bool valid = CreateStrListsForEval(configuration,aie,srcStopWords,trgStopWords,false);
                    aie.alignmentScore = EvaluateAlignmentScore(lpeConf,aie);
                    //If you wish to debug the process, comment the lines below that clear the alignments...
                    aie.srcToTrgAlignments.Clear();
                    aie.trgToSrcAlignments.Clear();
                    aie.consolidatedAlignment.Clear();
                    aie.srcFile = srcFile;
                    aie.trgFile = trgFile;
                    res.Add(aie);
                }
            }
            return res;
        }
Beispiel #2
0
        public static AlignmentInfoElement AlignSingleTermPair(ProcessedTermEntry srcPte, ProcessedTermEntry trgPte)
        {
            if (srcPte!=null && trgPte!=null)
            {
                AlignmentInfoElement aie = new AlignmentInfoElement();
                List<WordAlignmentElement> srcToTrg = new List<WordAlignmentElement>();
                List<WordAlignmentElement> trgToSrc = new List<WordAlignmentElement>();
                maxStrLen = 0;

                if (_interlinguaDictUsed && _interlinguaTranslitUsed)
                {

                    ///Types:
                    /// 0 - dictionary,
                    /// 1 - simple translit,
                    /// 2 - target,
                    /// 3 - translit

                    //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                    AlignStringProbabEntryListLists (_lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                    //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                    //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLITERATION
                    AlignStringProbabEntryListLists (_lpeConf, srcPte.translationList, trgPte.transliterationList, srcToTrg, trgToSrc, 0, 3);

                    //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                    //Translation is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLATION
                    AlignStringProbabEntryListLists (_lpeConf, srcPte.transliterationList, trgPte.translationList, srcToTrg, trgToSrc, 3, 0);

                    //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                    //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                    //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                    AlignStringProbabEntryListLists (_lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 3);

                    //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringLists (_lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                }
                else if (_interlinguaTranslitUsed)
                {
                    //Translation is in target language; SOURCE TRANSLATION vs TARGET
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                    //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                    //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                    //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                    AlignStringProbabEntryListLists (_lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 2);

                    //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringLists (_lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    //Translation is in target language; SOURCE vs TARGET TRANSLATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);
                }
                else if (_interlinguaDictUsed)
                {
                    //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                    AlignStringProbabEntryListLists (_lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                    //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                    //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                    //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                    //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringLists (_lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                    //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                }
                else
                {
                    //Translation is in target language; SOURCE TRANSLATION vs TARGET
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                    //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                    AlignStringProbabEntryListToStringList (_lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                    //Translation is in target language; SOURCE vs TARGET TRANSLATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);

                    //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                    AlignStringListToStringProbabEntryList (_lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                    //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                    AlignStringLists (_lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                }
                aie.srcToTrgAlignments = srcToTrg;
                aie.trgToSrcAlignments = trgToSrc;

                aie.srcEntry = srcPte;
                aie.trgEntry = trgPte;

                ConsolidateOverlaps(_lpeConf,aie, _excDict);
                if(CreateStrListsForEval(_configuration,aie,_srcStopWords,_trgStopWords))
                {
                    aie.alignmentScore = EvaluateAlignmentScore(_lpeConf,aie);
                    if (aie.alignmentScore>=_lpeConf.finalAlignmentThr)
                    {
                        //If you wish to debug the process, comment the lines below that clear the alignments...
                        aie.srcToTrgAlignments.Clear();
                        aie.trgToSrcAlignments.Clear();
                        aie.consolidatedAlignment.Clear();
                        aie.srcFile = _srcFile;
                        aie.trgFile = _trgFile;
                        return aie;
                    }
                }
            }
            return null;
        }
Beispiel #3
0
        public static List<AlignmentInfoElement> AlignPairs(MPAlignerConfiguration configuration, Dictionary<string, ProcessedTermEntry> srcTerms, Dictionary<string, ProcessedTermEntry> trgTerms, bool interlinguaDictUsed, bool interlinguaTranslitUsed, string srcLang, string trgLang, string srcFile, string trgFile, Dictionary<string, Dictionary<string, bool>> excDict, Dictionary<string, bool> srcStopWords, Dictionary<string, bool> trgStopWords)
        {
            if (configuration == null||configuration.langPairEntryDict==null||string.IsNullOrWhiteSpace(srcLang)||string.IsNullOrWhiteSpace(trgLang))
            {
                return null;
            }
            Log.Write ("Starting alignmet of "+ srcTerms.Count.ToString()+" "+srcLang+" and "+ trgTerms.Count.ToString()+" "+trgLang+" terms.",LogLevelType.LIMITED_OUTPUT);

            string langKey = srcLang+"_"+trgLang;

            MPAlignerConfigurationLangPairEntry lpeConf = new MPAlignerConfigurationLangPairEntry();
            if (configuration.langPairEntryDict.ContainsKey(langKey))
            {
                lpeConf = configuration.langPairEntryDict[langKey];
            }
            else
            {
                lpeConf = new MPAlignerConfigurationLangPairEntry();
                lpeConf.srcLang = srcLang;
                lpeConf.trgLang = trgLang;
            }
            int counter = 0;

            Dictionary<string,Dictionary<string,bool>> alignedList = new Dictionary<string, Dictionary<string, bool>>();
            List<AlignmentInfoElement> res = new List<AlignmentInfoElement>();
            foreach(string srcTerm in srcTerms.Keys)
            {
                counter++;
                if (counter%50==0)
                {
                    Console.Write(".");
                    if (counter%1000==0)
                    {
                        Console.WriteLine(" - "+counter.ToString());
                    }
                }
                ProcessedTermEntry srcPte = srcTerms[srcTerm];
                foreach(string trgTerm in trgTerms.Keys)
                {
                    ProcessedTermEntry trgPte = trgTerms[trgTerm];
                    if (srcPte!=null && trgPte!=null)
                    {
                        AlignmentInfoElement aie = new AlignmentInfoElement();
                        List<WordAlignmentElement> srcToTrg = new List<WordAlignmentElement>();
                        List<WordAlignmentElement> trgToSrc = new List<WordAlignmentElement>();
                        maxStrLen = 0;

                        if (interlinguaDictUsed && interlinguaTranslitUsed)
                        {

                            ///Types:
                            /// 0 - dictionary,
                            /// 1 - simple translit,
                            /// 2 - target,
                            /// 3 - translit

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.transliterationList, srcToTrg, trgToSrc, 0, 3);

                            //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                            //Translation is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.translationList, srcToTrg, trgToSrc, 3, 0);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                            //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 3);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        }
                        else if (interlinguaTranslitUsed)
                        {
                            //Translation is in target language; SOURCE TRANSLATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 3, 1);

                            //Transliteration is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.transliterationList, srcToTrg, trgToSrc, 1, 3);

                            //Transliteration is in EN language; SOURCE TRANSLITERATION vs TARGET TRANSLITERATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.transliterationList, trgPte.transliterationList, srcToTrg, trgToSrc, 3, 2);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                            //Translation is in target language; SOURCE vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);
                        }
                        else if (interlinguaDictUsed)
                        {
                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET TRANSLATION
                            AlignStringProbabEntryListLists (lpeConf, srcPte.translationList, trgPte.translationList, srcToTrg, trgToSrc, 0, 0);

                            //Translation is in EN language; SOURCE TRANSLATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 0, 1);

                            //Translation is in EN language; SOURCE SIMPLE TRANSLITERATION vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.simpleTransliteration, trgPte.translationList, srcToTrg, trgToSrc, 1, 0);

                            //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                            //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                        }
                        else
                        {
                            //Translation is in target language; SOURCE TRANSLATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.translationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 0, 2);

                            //Transliteration is in target language; SOURCE TRANSLITERATION vs TARGET
                            AlignStringProbabEntryListToStringList (lpeConf, srcPte.transliterationList, trgPte.lowercaseWords, srcToTrg, trgToSrc, 3, 2);

                            //Translation is in target language; SOURCE vs TARGET TRANSLATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.translationList, srcToTrg, trgToSrc, 2, 0);

                            //Transliteration is in target language; SOURCE vs TARGET TRANSLITERATION
                            AlignStringListToStringProbabEntryList (lpeConf, srcPte.lowercaseWords, trgPte.transliterationList, srcToTrg, trgToSrc, 2, 3);

                            //Simple translit of both is in EN; SOURCE SIMPLE TRANSLITERATION vs TARGET SIMPLE TRANSLITERATION
                            AlignStringLists (lpeConf, srcPte.simpleTransliteration, trgPte.simpleTransliteration, srcToTrg, trgToSrc, 1, 1);

                        }
                        aie.srcToTrgAlignments = srcToTrg;
                        aie.trgToSrcAlignments = trgToSrc;
                        aie.srcToTrgAlignments.Sort(
                            delegate(WordAlignmentElement w1, WordAlignmentElement w2)
                            {
                                double avgW1Overlap = (w1.fromOverlap+w1.toOverlap)/2;
                                double avgW2Overlap = (w2.fromOverlap+w2.toOverlap)/2;
                                // Descending sort of toOverlap's if the
                                if (avgW1Overlap!=avgW2Overlap)
                                {
                                    return avgW2Overlap.CompareTo(avgW1Overlap);
                                }
                                if (w2.fromLen == w1.fromLen)
                                {
                                    if (w2.toOverlap==w1.toOverlap)
                                    {
                                        return w1.fromId.CompareTo(w2.fromId);
                                    }
                                    return w2.toOverlap.CompareTo(w1.toOverlap);
                                }
                                return w2.fromLen.CompareTo(w1.fromLen);
                            }
                        );
                        aie.trgToSrcAlignments.Sort(
                            delegate(WordAlignmentElement w1, WordAlignmentElement w2)
                            {
                                double avgW1Overlap = (w1.fromOverlap+w1.toOverlap)/2;
                                double avgW2Overlap = (w2.fromOverlap+w2.toOverlap)/2;
                                // Descending sort of toOverlap's if the
                                if (avgW1Overlap!=avgW2Overlap)
                                {
                                    return avgW2Overlap.CompareTo(avgW1Overlap);
                                }
                                // Descending sort of toOverlap's if the
                                if (w2.toLen == w1.toLen)
                                {
                                    if (w2.fromOverlap==w1.fromOverlap)
                                    {
                                        return w1.toId.CompareTo(w2.toId);
                                    }
                                    return w2.fromOverlap.CompareTo(w1.fromOverlap);
                                }
                                return w2.toLen.CompareTo(w1.toLen);
                            }
                        );

                        aie.srcEntry = srcPte;
                        aie.trgEntry = trgPte;

                        ConsolidateOverlaps(lpeConf,aie, excDict);
                        if(CreateStrListsForEval(configuration,aie,srcStopWords,trgStopWords))
                        {
                            if (!alignedList.ContainsKey(aie.alignedLowSrcStr)||!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr))
                            {
                                if (!alignedList.ContainsKey(aie.alignedLowSrcStr)) alignedList.Add(aie.alignedLowSrcStr,new Dictionary<string, bool>());
                                if (!alignedList[aie.alignedLowSrcStr].ContainsKey(aie.alignedLowTrgStr)) alignedList[aie.alignedLowSrcStr].Add(aie.alignedLowTrgStr,true);

                                aie.alignmentScore = EvaluateAlignmentScore(lpeConf,aie);
                                if (aie.alignmentScore>=lpeConf.finalAlignmentThr)
                                {
                                    //If you wish to debug the process, comment the lines below that clear the alignments...
                                    aie.srcToTrgAlignments.Clear();
                                    aie.trgToSrcAlignments.Clear();
                                    aie.consolidatedAlignment.Clear();
                                    aie.srcFile = srcFile;
                                    aie.trgFile = trgFile;
                                    res.Add(aie);
                                }
                            }
                        }
                    }
                }
            }
            Console.WriteLine(" - "+counter.ToString());
            Log.Write ("Alignmet finished - "+ res.Count.ToString()+" term pairs aligned over the alignment threshold " +lpeConf.finalAlignmentThr.ToString()+".\n",LogLevelType.LIMITED_OUTPUT);
            return res;
        }
Beispiel #4
0
 public static double EvaluateAlignmentScore(MPAlignerConfigurationLangPairEntry lpeConf,AlignmentInfoElement aie)
 {
     if (aie==null||string.IsNullOrWhiteSpace(aie.srcStrForAlignment)||string.IsNullOrWhiteSpace(aie.trgStrForAlignment))
     {
         return 0;
     }
     double maxLen = Math.Max(aie.srcStrForAlignment.Length,aie.trgStrForAlignment.Length);
     double res = (maxLen-LevenshteinDistance.Compute(aie.srcStrForAlignment,aie.trgStrForAlignment))/maxLen;
     res*=(aie.srcMultiplier+aie.trgMultiplier)/2;
     return res;
 }
Beispiel #5
0
        public static bool CreateStrListsForEval(MPAlignerConfiguration conf, AlignmentInfoElement aie, Dictionary<string,bool> srcStopWords, Dictionary<string,bool> trgStopWords, bool stripListsOnError = true)
        {
            if (aie == null || aie.consolidatedAlignment == null || aie.srcEntry == null || aie.trgEntry == null) {
                return false;
            }
            aie.minSrcId = Int32.MaxValue;
            aie.minTrgId = Int32.MaxValue;
            aie.maxSrcId = Int32.MinValue;
            aie.maxTrgId = Int32.MinValue;

            double srcMultiplier = 1;
            double trgMultiplier = 1;

            int prevFromId = -1;
            int prevToId = -1;

            int minSrcNonStop = Int32.MaxValue;
            int maxSrcNonStop = Int32.MinValue;
            int minTrgNonStop = Int32.MaxValue;
            int maxTrgNonStop = Int32.MinValue;

            //aie.finalAlignment = new List<StringComparisonElement>();

            StringBuilder src = new StringBuilder ();
            StringBuilder trg = new StringBuilder ();
            //StringBuilder srcL = new StringBuilder ();
            //StringBuilder trgL = new StringBuilder ();

            Dictionary<int,bool> srcIds = new Dictionary<int, bool> ();
            Dictionary<int,bool> trgIds = new Dictionary<int, bool> ();

            bool onlyStopSrc = true;
            bool onlyStopTrg = true;
            /*if (!conf.allowTrimmedAlignments)
            {
                aie.minSrcId = 0;
                aie.maxSrcId = aie.srcEntry.lowercaseWords.Count-1;
                aie.minTrgId = 0;
                aie.maxTrgId = aie.trgEntry.lowercaseWords.Count-1;
            }*/

            foreach (WordAlignmentElement wae in aie.consolidatedAlignment) {
                double srcLen = aie.srcEntry.lowercaseWords [wae.fromId].Length;
                double trgLen = aie.trgEntry.lowercaseWords [wae.toId].Length;
                //if (conf.allowTrimmedAlignments)
                //{
                if (wae.fromId < aie.minSrcId) {
                    aie.minSrcId = wae.fromId;
                }
                if (wae.fromId < minSrcNonStop && !srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId])) {
                    minSrcNonStop = wae.fromId;
                }
                if (wae.fromId > aie.maxSrcId) {
                    aie.maxSrcId = wae.fromId;
                }
                if (wae.fromId > maxSrcNonStop && !srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId])) {
                    maxSrcNonStop = wae.fromId;
                }
                if (wae.toId < aie.minTrgId) {
                    aie.minTrgId = wae.toId;
                }
                if (wae.toId < minTrgNonStop && !trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId])) {
                    minTrgNonStop = wae.toId;
                }
                if (wae.toId > aie.maxTrgId) {
                    aie.maxTrgId = wae.toId;
                }
                if (wae.toId > maxTrgNonStop && !trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId])) {
                    maxTrgNonStop = wae.toId;
                }
                //}
                //TODO: For the future - there is a limitation that you cannot evaluate alignments where for one token you have acquired overlaps in different languages (f.e., term is in [EN][LV] aligned segments, but the final string can only be in EN - it cannot be split in two parts!).
                if (!conf.allowTrimmedAlignments
                    ||(minSrcNonStop<=wae.fromId && maxSrcNonStop>=wae.fromId
                           && minTrgNonStop<=wae.toId && maxTrgNonStop>=wae.toId))
                {
                    if (wae.fromId == prevFromId) {
                        if (wae.toId != prevToId) {
                            prevToId = wae.toId;
                            //trg.Append(" ");
                            string trgStr = GetCorrectString (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId);
                            if (!trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId]))
                                onlyStopTrg = false;
                            trg.Append (trgStr);
                            trgMultiplier *= (((aie.trgEntry.len - trgLen) / aie.trgEntry.len) + (trgLen / aie.trgEntry.len) * GetProbab (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId));
                        }
                    } else if (wae.toId == prevToId) {
                        if (wae.fromId != prevFromId) {
                            prevFromId = wae.fromId;
                            //src.Append(" ");
                            string srcStr = GetCorrectString (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId);
                            if (!srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId]))
                                onlyStopSrc = false;
                            src.Append (srcStr);
                            srcMultiplier *= (((aie.srcEntry.len - srcLen) / aie.srcEntry.len) + (srcLen / aie.srcEntry.len) * GetProbab (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId));
                        }
                    } else {
                        prevToId = wae.toId;
                        prevFromId = wae.fromId;
                        string srcStr = GetCorrectString (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId);
                        if (!srcStopWords.ContainsKey (aie.srcEntry.lowercaseWords [wae.fromId]))
                            onlyStopSrc = false;
                        src.Append (srcStr);
                        string trgStr = GetCorrectString (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId);
                        if (!trgStopWords.ContainsKey (aie.trgEntry.lowercaseWords [wae.toId]))
                            onlyStopTrg = false;
                        trg.Append (trgStr);
                        srcMultiplier *= (((aie.srcEntry.len - srcLen) / aie.srcEntry.len) + (srcLen / aie.srcEntry.len) * GetProbab (aie.srcEntry, wae.fromId, wae.fromType, wae.fromTypeId));
                        trgMultiplier *= (((aie.trgEntry.len - trgLen) / aie.trgEntry.len) + (trgLen / aie.trgEntry.len) * GetProbab (aie.trgEntry, wae.toId, wae.toType, wae.toTypeId));
                    }
                    if (!srcIds.ContainsKey (wae.fromId))
                        srcIds.Add (wae.fromId, true);
                    if (!trgIds.ContainsKey (wae.toId))
                        trgIds.Add (wae.toId, true);
                }
            }

            //Try to find words in the middle of the current alignment that have not been aligned.
            //If such are found, penalise the source and target alignments by the length of the wrong alignment.
            for (int i=0; i<aie.srcEntry.lowercaseWords.Count; i++) {
                if (i >= aie.minSrcId && i <= aie.maxSrcId && !srcIds.ContainsKey (i)) {
                    string str = aie.srcEntry.lowercaseWords [i];
                    if (!srcStopWords.ContainsKey (str))
                        onlyStopSrc = false;
                    src.Append (str);
                    trg.Append (new String (' ', str.Length));
                }
            }

            for (int i=0; i<aie.trgEntry.lowercaseWords.Count; i++) {
                if (i >= aie.minTrgId && i <= aie.maxTrgId && !trgIds.ContainsKey (i)) {
                    string str = aie.trgEntry.lowercaseWords [i];
                    if (!trgStopWords.ContainsKey (str))
                        onlyStopTrg = false;
                    trg.Append (str);
                    src.Append (new String (' ', str.Length));
                }
            }

            if (src.Length > 0) {
                aie.srcStrForAlignment = src.ToString ();
                aie.trgStrForAlignment = trg.ToString ();
            }
            aie.srcMultiplier = srcMultiplier;
            aie.trgMultiplier = trgMultiplier;
            bool wasBad = false;
            if (!conf.allowTrimmedAlignments) {
                if (aie.minSrcId > 0 || aie.minTrgId > 0 || aie.maxSrcId + 1 < aie.srcEntry.lowercaseWords.Count || aie.maxTrgId + 1 < aie.trgEntry.lowercaseWords.Count) {
                    wasBad = true;
                }
                aie.minSrcId = 0;
                aie.minTrgId = 0;
                aie.maxSrcId = aie.srcEntry.lowercaseWords.Count - 1;
                aie.maxTrgId = aie.trgEntry.lowercaseWords.Count - 1;
            } else {
                if (minSrcNonStop >= 0 && minSrcNonStop < aie.srcEntry.lowercaseWords.Count
                    && maxSrcNonStop >= 0 && maxSrcNonStop < aie.srcEntry.lowercaseWords.Count
                    && minTrgNonStop >= 0 && minTrgNonStop < aie.trgEntry.lowercaseWords.Count
                    && maxTrgNonStop >= 0 && maxTrgNonStop < aie.trgEntry.lowercaseWords.Count) {
                    aie.minSrcId = minSrcNonStop;
                    aie.minTrgId = minTrgNonStop;
                    aie.maxSrcId = maxSrcNonStop;
                    aie.maxTrgId = maxTrgNonStop;
                }
                else if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                    return false;
                }
            }

            //Just to be on the safe side - check whether the ID's are in index boundaries:
            if (aie.minSrcId < 0 || aie.minTrgId < 0 || aie.maxSrcId + 1 > aie.srcEntry.lowercaseWords.Count || aie.maxTrgId + 1 > aie.trgEntry.lowercaseWords.Count) {
                aie.minSrcId = 0;
                aie.minTrgId = 0;
                aie.maxSrcId = aie.srcEntry.lowercaseWords.Count - 1;
                aie.maxTrgId = aie.trgEntry.lowercaseWords.Count - 1;
                if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                }
                return false;
            }

            if (wasBad) {
                if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                }
                return false;
            }

            if (onlyStopSrc || onlyStopTrg) {
                if (stripListsOnError)
                {
                    aie.srcStrForAlignment = "";
                    aie.trgStrForAlignment = "";
                    aie.alignedLowSrcStr = "";
                    aie.alignedLowTrgStr = "";
                }
                return false;
            }

            aie.alignedLowSrcStr = AlignmentInfoElement.GetStrFromEntry(aie.srcEntry.lowercaseWords,aie.minSrcId,aie.maxSrcId);
            aie.alignedLowTrgStr = AlignmentInfoElement.GetStrFromEntry(aie.trgEntry.lowercaseWords,aie.minTrgId,aie.maxTrgId);
            return true;
        }
Beispiel #6
0
        public static void ConsolidateOverlaps(MPAlignerConfigurationLangPairEntry lpeConf,AlignmentInfoElement aie, Dictionary<string, Dictionary<string, bool>> excDict)
        {
            if (lpeConf==null||aie==null||aie.srcEntry==null||aie.trgEntry==null)
            {
                return;
            }
            if (aie.srcToTrgAlignments==null || aie.trgToSrcAlignments==null)
            {
                return;
            }
            //Options:
            //Iterate through srcToTrg and find top 1 overlap for each source ID (do not worry about repetitive word overlaps at this point!)
            Dictionary<int, WordAlignmentElement> maxSrcToTrgOverlaps = new Dictionary<int, WordAlignmentElement>();
            Dictionary<int, bool[]> trgStrOverlapDict = new Dictionary<int, bool[]>();

            foreach(WordAlignmentElement wae in aie.srcToTrgAlignments)
            {
                if (excDict==null||!excDict.ContainsKey(aie.srcEntry.lowercaseWords[wae.fromId])||!excDict[aie.srcEntry.lowercaseWords[wae.fromId]].ContainsKey(aie.trgEntry.lowercaseWords[wae.toId]))
                {
                    if (wae.fromOverlap<lpeConf.minShortFragmentOverlap && wae.fromLen<lpeConf.minShortFragmentLen||
                        wae.fromLen<lpeConf.minShortFragmentLen && wae.toLen>lpeConf.maxShortFragmentTargetLen)
                    {
                        //In the case if there is a "short" fragment and the target length is "big", the alignment won't be applied (this is to limit false alignments!).
                    }
                    else if (Math.Max(wae.fromOverlap,wae.toOverlap)>=lpeConf.minSrcOrTrgOverlap) // this
                    {
                        if (!maxSrcToTrgOverlaps.ContainsKey(wae.fromId))
                        {
                            if (trgStrOverlapDict.ContainsKey(wae.toId) && !IsOverlapConflict(lpeConf, trgStrOverlapDict[wae.toId], wae.alignmentMap, null))
                            {
                                trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                maxSrcToTrgOverlaps.Add(wae.fromId, wae);
                            }
                            else if (!trgStrOverlapDict.ContainsKey(wae.toId))
                            {
                                trgStrOverlapDict.Add(wae.toId,new bool[maxStrLen]); //This way we ensure that small alignments do not cause big problems!
                                trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                maxSrcToTrgOverlaps.Add(wae.fromId, wae);
                                //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                            }
                        }
                        else if (wae.fromOverlap>maxSrcToTrgOverlaps[wae.fromId].fromOverlap)
                        {
                            if (wae.toId == maxSrcToTrgOverlaps[wae.fromId].toId)
                            {
                                if (trgStrOverlapDict.ContainsKey(wae.toId) && !IsOverlapConflict(lpeConf, trgStrOverlapDict[wae.toId], wae.alignmentMap, maxSrcToTrgOverlaps[wae.fromId].alignmentMap))
                                {
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, maxSrcToTrgOverlaps[wae.fromId].alignmentMap);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                }
                                else if (!trgStrOverlapDict.ContainsKey(wae.toId))
                                {
                                    trgStrOverlapDict.Add(wae.toId,new bool[maxStrLen]);
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                            }
                            else
                            {
                                //If the target ID changes then we have to remove alignment from the target map. As we do only one pass through the alignments, some important updates can be lost in this way if the list is not sorted.
                                if (trgStrOverlapDict.ContainsKey(wae.toId) && !IsOverlapConflict(lpeConf, trgStrOverlapDict[wae.toId], wae.alignmentMap, null))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (trgStrOverlapDict.ContainsKey(maxSrcToTrgOverlaps[wae.fromId].toId)) trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId] = RemoveFromAlignmentMap (lpeConf,trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId], maxSrcToTrgOverlaps[wae.fromId].alignmentMap);
                                    //Now adjust the new target alignment.
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                }
                                else if (!trgStrOverlapDict.ContainsKey(wae.toId))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (trgStrOverlapDict.ContainsKey(maxSrcToTrgOverlaps[wae.fromId].toId)) trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId] = RemoveFromAlignmentMap (lpeConf,trgStrOverlapDict[maxSrcToTrgOverlaps[wae.fromId].toId], maxSrcToTrgOverlaps[wae.fromId].alignmentMap);
                                    //Now create the new target alignment.
                                    trgStrOverlapDict.Add(wae.toId,new bool[maxStrLen]);
                                    trgStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,trgStrOverlapDict[wae.toId],wae.alignmentMap, null);
                                    maxSrcToTrgOverlaps[wae.fromId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                                //This is when for a source ID the target ID changes.
                            }
                        }
                    }
                }
            }

            Dictionary<int, WordAlignmentElement> maxTrgToSrcOverlaps = new Dictionary<int, WordAlignmentElement>();
            Dictionary<int, bool[]> srcStrOverlapDict = new Dictionary<int, bool[]>();

            foreach(WordAlignmentElement wae in aie.trgToSrcAlignments)
            {
                if (excDict==null||!excDict.ContainsKey(aie.srcEntry.lowercaseWords[wae.fromId])||!excDict[aie.srcEntry.lowercaseWords[wae.fromId]].ContainsKey(aie.trgEntry.lowercaseWords[wae.toId]))
                {
                    if (wae.toOverlap<lpeConf.minShortFragmentOverlap && wae.fromLen<lpeConf.minShortFragmentLen||
                        wae.toLen<lpeConf.minShortFragmentLen && wae.fromLen>lpeConf.maxShortFragmentTargetLen)
                    {
                        //In the case if there is a "short" fragment and the target length is "big", the alignment won't be applied (this is to limit false alignments!).
                    }
                    else if (Math.Max(wae.fromOverlap,wae.toOverlap)>=lpeConf.minSrcOrTrgOverlap)
                    {
                        if (!maxTrgToSrcOverlaps.ContainsKey(wae.toId))
                        {
                            if (srcStrOverlapDict.ContainsKey(wae.fromId) && !IsOverlapConflict(lpeConf, srcStrOverlapDict[wae.fromId], wae.alignmentMap, null))
                            {
                                srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                maxTrgToSrcOverlaps.Add(wae.toId, wae);
                            }
                            else if (!srcStrOverlapDict.ContainsKey(wae.fromId))
                            {
                                srcStrOverlapDict.Add(wae.fromId,new bool[maxStrLen]); //This way we ensure that small alignments do not cause big problems!
                                srcStrOverlapDict[wae.toId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                maxTrgToSrcOverlaps.Add(wae.toId, wae);
                                //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                            }
                        }
                        else if (wae.toOverlap>maxTrgToSrcOverlaps[wae.toId].toOverlap)
                        {
                            if (wae.fromId == maxTrgToSrcOverlaps[wae.toId].fromId)
                            {
                                if (srcStrOverlapDict.ContainsKey(wae.fromId) && !IsOverlapConflict(lpeConf, srcStrOverlapDict[wae.fromId], wae.alignmentMap, maxTrgToSrcOverlaps[wae.toId].alignmentMap))
                                {
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, maxTrgToSrcOverlaps[wae.toId].alignmentMap);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                }
                                else if (!srcStrOverlapDict.ContainsKey(wae.fromId))
                                {
                                    srcStrOverlapDict.Add(wae.fromId,new bool[maxStrLen]);
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                            }
                            else
                            {
                                //If the target ID changes then we have to remove alignment from the target map. As we do only one pass through the alignments, some important updates can be lost in this way if the list is not sorted.
                                if (srcStrOverlapDict.ContainsKey(wae.fromId) && !IsOverlapConflict(lpeConf, srcStrOverlapDict[wae.fromId], wae.alignmentMap, null))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (srcStrOverlapDict.ContainsKey(maxTrgToSrcOverlaps[wae.toId].fromId)) srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId] = RemoveFromAlignmentMap (lpeConf,srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId], maxTrgToSrcOverlaps[wae.toId].alignmentMap);
                                    //Now adjust the new target alignment.
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                }
                                else if (!srcStrOverlapDict.ContainsKey(wae.fromId))
                                {
                                    //At first, remove alignment from the previous target.
                                    if (srcStrOverlapDict.ContainsKey(maxTrgToSrcOverlaps[wae.toId].fromId)) srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId] = RemoveFromAlignmentMap (lpeConf,srcStrOverlapDict[maxTrgToSrcOverlaps[wae.toId].fromId], maxTrgToSrcOverlaps[wae.toId].alignmentMap);
                                    //Now create the new target alignment.
                                    srcStrOverlapDict.Add(wae.fromId,new bool[maxStrLen]);
                                    srcStrOverlapDict[wae.fromId] = AdjustAlignmentMap(lpeConf,srcStrOverlapDict[wae.fromId],wae.alignmentMap, null);
                                    maxTrgToSrcOverlaps[wae.toId] = wae;
                                    //This is when for a target ID there is no alignment map already specified (should not happen, but just in case...
                                }
                                //This is when for a source ID the target ID changes.
                            }
                        }
                    }
                }
            }

            //At this stage we have one alignment from SRC to TRG and one from TRG to SRC. We need to consolidate both and also trim the beginning and the end...
            aie.consolidatedAlignment = ConsolidateAlignments(aie.srcEntry.lowercaseWords.Count, aie.trgEntry.lowercaseWords.Count, maxSrcToTrgOverlaps,maxTrgToSrcOverlaps);
        }