Exemplo n.º 1
0
        public LemmaExample AddExample(string sWord, string sLemma, double dWeight, string sMsd)
        {
            string       sNewMsd = lsett.eMsdConsider != LemmatizerSettings.MsdConsideration.Ignore ? sMsd : null;
            LemmaExample leNew   = new LemmaExample(sWord, sLemma, dWeight, sNewMsd, rlRules, lsett);

            return(Add(leNew));
        }
Exemplo n.º 2
0
        private static List <LemmaExample> CompactExamples(List <LemmaExample> examples)
        {
            Dictionary <string, LemmaExample> exampleDict = new Dictionary <string, LemmaExample>();
            Dictionary <string, double>       weights     = new Dictionary <string, double>();

            foreach (LemmaExample le in examples)
            {
                string signature = le.Word + "\t" + le.Lemma + "\t" + le.Msd;
                if (exampleDict.ContainsKey(signature))
                {
                    weights[signature] += le.Weight;
                }
                else
                {
                    exampleDict[signature] = le;
                    weights[signature]     = le.Weight;
                }
            }

            List <LemmaExample> el = new List <LemmaExample>();

            foreach (KeyValuePair <string, LemmaExample> kvp in exampleDict)
            {
                string       signature = kvp.Key;
                LemmaExample le        = kvp.Value;
                el.Add(new LemmaExample(le.Word, le.Lemma, weights[signature], le.Msd, null, null));
            }

            return(el);
        }
Exemplo n.º 3
0
        public void Deserialize(BinaryReader binRead)
        {
            bool msdSpecExists = binRead.ReadBoolean();

            if (!msdSpecExists)
            {
                msdSpec = null;
            }
            else
            {
                msdSpec = new MsdSpec(binRead);
            }

            Dictionary <int, LemmaExample> exampleMapping = new Dictionary <int, LemmaExample>();

            int exampleListCount = binRead.ReadInt32();

            if (exampleListCount < 0)
            {
                exampleList = null;
            }
            else
            {
                exampleList = new List <LemmaExample>(exampleListCount);
                for (int leId = 0; leId < exampleListCount; leId++)
                {
                    LemmaExample le = new LemmaExample(binRead, null, null);
                    exampleMapping[leId] = le;
                    exampleList.Add(le);
                }
            }

            Deserialize(binRead, exampleMapping, msdSpec);
        }
Exemplo n.º 4
0
        private static MsdSplitTree SplitByMsdAttribute(List <LemmaExample> el, int attrId, MsdSpec msdSpec)
        {
            MsdSplitTree et = new MsdSplitTree(msdSpec);

            et.attrId      = attrId;
            et.subTrees    = new Dictionary <char, MsdSplitTree>();
            et.exampleList = el;

            //todo FIX IT
            MsdSplitTree etSubDef = new MsdSplitTree(msdSpec);

            etSubDef.exampleList = new List <LemmaExample>();
            et.subTrees['#']     = etSubDef;

            for (int i = 0; i < el.Count; i++)
            {
                LemmaExample e   = el[i];
                char         cls = msdSpec.GetAttrValue(e.Msd, attrId);
                if (et.subTrees.ContainsKey(cls))
                {
                    et.subTrees[cls].exampleList.Add(e);
                }
                else
                {
                    MsdSplitTree etSub = new MsdSplitTree(msdSpec);

                    et.subTrees[cls] = etSub;

                    etSub.exampleList = new List <LemmaExample>();
                    etSub.exampleList.Add(e);
                }
            }

            double ambigChild = 0;

            foreach (KeyValuePair <char, MsdSplitTree> sub in et.subTrees)
            {
                MsdSplitTree etSub = sub.Value;
                double       ambig = GetListAmbiguities(sub.Value.exampleList);
                etSub.ambigThis         = ambig;
                etSub.ambigChild        = ambig;
                etSub.ambigRecurs       = ambig;
                etSub.subTreeSizeRecurs = 1;
                ambigChild += ambig;
            }

            et.ambigChild        = ambigChild;
            et.ambigRecurs       = ambigChild;
            et.subTreeSizeRecurs = et.subTrees.Count;

            return(et);
        }
Exemplo n.º 5
0
        private static double GetListAmbiguities(List <LemmaExample> el)
        {
            Dictionary <string, Dictionary <string, Dictionary <string, double> > > wordLemmaMsdWeight = new Dictionary <string, Dictionary <string, Dictionary <string, double> > >();

            for (int i = 0; i < el.Count; i++)
            {
                LemmaExample exmp = el[i];
                if (!wordLemmaMsdWeight.ContainsKey(exmp.Word))
                {
                    wordLemmaMsdWeight[exmp.Word] = new Dictionary <string, Dictionary <string, double> >();
                }

                if (!wordLemmaMsdWeight[exmp.Word].ContainsKey(exmp.Lemma))
                {
                    wordLemmaMsdWeight[exmp.Word][exmp.Lemma] = new Dictionary <string, double>();
                }

                if (!wordLemmaMsdWeight[exmp.Word][exmp.Lemma].ContainsKey(exmp.Msd))
                {
                    wordLemmaMsdWeight[exmp.Word][exmp.Lemma][exmp.Msd] = exmp.Weight;
                }
                else
                {
                    wordLemmaMsdWeight[exmp.Word][exmp.Lemma][exmp.Msd] += exmp.Weight;
                }
            }

            double wghtAmbiguities = 0;

            foreach (KeyValuePair <string, Dictionary <string, Dictionary <string, double> > > wordBase in wordLemmaMsdWeight)
            {
                double weightLemmaSum = 0;
                double weightLemmaMax = 0;
                foreach (KeyValuePair <string, Dictionary <string, double> > wordLemmaBase in wordBase.Value)
                {
                    double weightLemma = 0;
                    foreach (KeyValuePair <string, double> wordLemmaMsdBase in wordLemmaBase.Value)
                    {
                        weightLemma += wordLemmaMsdBase.Value;
                    }
                    weightLemmaSum += weightLemma;
                    if (weightLemma > weightLemmaMax)
                    {
                        weightLemmaMax = weightLemma;
                    }
                }
                wghtAmbiguities += weightLemmaSum - weightLemmaMax;
            }

            return(wghtAmbiguities);
        }
Exemplo n.º 6
0
 public string Lemmatize(string sWord)
 {
     if (!lsett.bBuildFrontLemmatizer)
     {
         return(ltrRootNodeSafe.Lemmatize(sWord));
     }
     else
     {
         string sWordFront  = LemmaExample.StringReverse(sWord);
         string sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront);
         string sWordRear   = LemmaExample.StringReverse(sLemmaFront);
         return(ltrRootNodeSafe.Lemmatize(sWordRear));
     }
 }
Exemplo n.º 7
0
        private static int CompareExamplesWordLemmaMsd(LemmaExample x, LemmaExample y)
        {
            int ret = 0;

            ret = String.Compare(x.Word, y.Word);
            if (ret != 0)
            {
                return(ret);
            }
            ret = String.Compare(x.Lemma, y.Lemma);
            if (ret != 0)
            {
                return(ret);
            }
            ret = String.Compare(x.Msd, y.Msd);
            return(ret);
        }
Exemplo n.º 8
0
        private LemmaExample Add(LemmaExample leNew)
        {
            LemmaExample leReturn = null;

            if (!dictExamples.TryGetValue(leNew.Signature, out leReturn))
            {
                leReturn = leNew;
                dictExamples.Add(leReturn.Signature, leReturn);
            }
            else
            {
                leReturn.Join(leNew);
            }

            lstExamples = null;

            return(leReturn);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Extended lemamtization interface with more options
        /// </summary>
        /// <param name="sWord">word to be lemmatized</param>
        /// <param name="ignoreCase">If true than casing will be ignored. If set to false, than lemmatizer will match the longest rule it knows but requiering same casing of rule and word.</param>
        /// <param name="sMsd">morpho static descriptor of the word to be lemmatized</param>
        /// <returns>Lemmatized word.</returns>
        public string Lemmatize(string sWord, bool ignoreCase, string sMsd)
        {
            string sNewMsd = sMsd;

            if (sMsd != null && lsett.bUseMsdSplitTreeOptimization && lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct)
            {
                sNewMsd = msdSplitTree.TransformMsd(sNewMsd);
            }

            if (!lsett.bBuildFrontLemmatizer)
            {
                return(ltrRootNodeSafe.Lemmatize(sWord, ignoreCase, sNewMsd));
            }
            else
            {
                string sWordFront  = LemmaExample.StringReverse(sWord);
                string sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront, ignoreCase, sNewMsd);
                string sWordRear   = LemmaExample.StringReverse(sLemmaFront);
                return(ltrRootNodeSafe.Lemmatize(sWordRear, ignoreCase, sNewMsd));
            }
        }
Exemplo n.º 10
0
        public string Lemmatize(string word)
        {
            var wordLower = word.ToLower();

            if (this.Exceptions.ContainsKey(wordLower))
            {
                return(this.Exceptions[wordLower]);
            }

            if (!Lsett.bBuildFrontLemmatizer)
            {
                return(LtrRootNodeSafe.Lemmatize(word));
            }
            else
            {
                string sWordFront  = LemmaExample.StringReverse(word);
                string sLemmaFront = LtrRootNodeFrontSafe.Lemmatize(sWordFront);
                string sWordRear   = LemmaExample.StringReverse(sLemmaFront);
                return(LtrRootNodeSafe.Lemmatize(sWordRear));
            }
        }
Exemplo n.º 11
0
        private static int CompareExamplesWordMsdWeightLemma(LemmaExample x, LemmaExample y)
        {
            int ret = 0;

            ret = String.Compare(x.Word, y.Word);
            if (ret != 0)
            {
                return(ret);
            }
            ret = String.Compare(x.Msd, y.Msd);
            if (ret != 0)
            {
                return(ret);
            }
            ret = x.Weight > y.Weight ? -1 : (x.Weight < y.Weight ? 1 : 0);
            if (ret != 0)
            {
                return(ret);
            }
            ret = String.Compare(x.Lemma, y.Lemma);
            return(ret);
        }
Exemplo n.º 12
0
        public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
        {
            //load metadata
            bool bThisTopObject = binRead.ReadBoolean();

            //load refernce types if needed -------------------------
            if (bThisTopObject)
            {
                this.lsett = new LemmatizerSettings(binRead);
            }
            else
            {
                this.lsett = lsett;
            }

            // deserialize rules
            rlRules = new RuleList(binRead, this.lsett);

            // deserialize examples
            bool bCreateLstExamples = binRead.ReadBoolean();

            lstExamples  = bCreateLstExamples ? new List <LemmaExample>() : null;
            dictExamples = new Dictionary <string, LemmaExample>();

            //load dictionary items
            int iCount = binRead.ReadInt32();

            for (int iId = 0; iId < iCount; iId++)
            {
                LemmaRule lrRule = rlRules[binRead.ReadString()];
                var       le     = new LemmaExample(binRead, this.lsett, lrRule);

                dictExamples.Add(le.Signature, le);
                if (bCreateLstExamples)
                {
                    lstExamples.Add(le);
                }
            }
        }
Exemplo n.º 13
0
 public LemmaRule AddRule(LemmaExample le)
 {
     return(AddRule(new LemmaRule(le.Word, le.Lemma, this.Count, lsett)));
 }
Exemplo n.º 14
0
        private void Deserialize(BinaryReader binRead, Dictionary <int, LemmaExample> exampleMapping, MsdSpec msdSpec)
        {
            this.msdSpec = msdSpec;

            attrId = binRead.ReadInt32();

            int exampleListCount = binRead.ReadInt32();

            if (exampleListCount < 0)
            {
                exampleList = null;
            }
            else
            {
                exampleList = new List <LemmaExample>(exampleListCount);
                for (int i = 0; i < exampleListCount; i++)
                {
                    int          leId = binRead.ReadInt32();
                    LemmaExample le   = exampleMapping[leId];
                    exampleList.Add(le);
                }
            }

            ambigThis         = binRead.ReadDouble();
            ambigChild        = binRead.ReadDouble();
            ambigRecurs       = binRead.ReadDouble();
            subTreeSizeRecurs = binRead.ReadInt32();

            int subTreesCount = binRead.ReadInt32();

            if (subTreesCount < 0)
            {
                subTrees = null;
            }
            else
            {
                subTrees = new Dictionary <char, MsdSplitTree>();
                for (int i = 0; i < subTreesCount; i++)
                {
                    char         key = binRead.ReadChar();
                    MsdSplitTree mst = new MsdSplitTree(binRead, exampleMapping, msdSpec);
                    subTrees.Add(key, mst);
                }
            }

            int beamSiblingsCount = binRead.ReadInt32();

            if (beamSiblingsCount < 0)
            {
                beamSiblings = null;
            }
            else
            {
                beamSiblings = new List <MsdSplitTree>(beamSiblingsCount);
                for (int i = 0; i < beamSiblingsCount; i++)
                {
                    bool bThisTree = binRead.ReadBoolean();
                    if (bThisTree)
                    {
                        beamSiblings.Add(this);
                    }
                    else
                    {
                        MsdSplitTree mst = new MsdSplitTree(binRead, exampleMapping, msdSpec);
                        beamSiblings.Add(mst);
                    }
                }
            }
        }