예제 #1
0
        public void Deserialize(BinaryReader binRead)
        {
            lsett = new LemmatizerSettings(binRead);

            bool bSerializeExamples = binRead.ReadBoolean();

            elExamples = new ExampleList(binRead, lsett);

            ExampleList elExamplesRear;
            ExampleList elExamplesFront;

            if (bSerializeExamples)
            {
                elExamplesRear  = elExamples.GetFrontRearExampleList(false);
                elExamplesFront = elExamples.GetFrontRearExampleList(true);
            }
            else
            {
                elExamplesRear  = new ExampleList(binRead, lsett);
                elExamplesFront = new ExampleList(binRead, lsett);
            }

            if (!lsett.bBuildFrontLemmatizer)
            {
                ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
            }
            else
            {
                ltnRootNode      = new LemmaTreeNode(binRead, lsett, elExamplesRear, null);
                ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null);
            }
        }
예제 #2
0
 public Lemmatizer(LemmatizerSettings lsett)
 {
     this.lsett            = lsett;
     this.elExamples       = new ExampleList(lsett);
     this.ltnRootNode      = null;
     this.ltnRootNodeFront = null;
 }
예제 #3
0
        public string Lemmatize(string sWord, bool ignoreCase, string sMsd)
        {
            if (sWord.Length >= iSimilarity && dictSubNodes != null)
            {
                //try first correct casing
                char chChar = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0';
                if (dictSubNodes.ContainsKey(chChar) && dictSubNodes[chChar].ConditionSatisfied(sWord, ignoreCase, sMsd))
                {
                    return(dictSubNodes[chChar].Lemmatize(sWord, ignoreCase, sMsd));
                }

                //try also inversed casing
                if (ignoreCase && char.IsLetter(chChar))
                {
                    char chCharInvert = char.IsLower(chChar) ? char.ToUpper(chChar) : char.ToLower(chChar);
                    if (dictSubNodes.ContainsKey(chCharInvert) && dictSubNodes[chCharInvert].ConditionSatisfied(sWord, ignoreCase, sMsd))
                    {
                        return(dictSubNodes[chCharInvert].Lemmatize(sWord, ignoreCase, sMsd));
                    }
                }
            }
            if (lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct && sMsd != null)
            {
                LemmaRule     lrBestValid = null;
                LemmaTreeNode ltnValid    = this;

                bool useNoMsd = false;
                while (lrBestValid == null && useNoMsd == false)
                {
                    if (ltnValid.dictMsdBestRules.ContainsKey(sMsd))
                    {
                        lrBestValid = ltnValid.dictMsdBestRules[sMsd][0].Rule;
                    }
                    else
                    {
                        if (ltnValid.ltnParentNode != null)
                        {
                            ltnValid = ltnValid.ltnParentNode;
                        }
                        else
                        {
                            useNoMsd = true;
                        }
                    }
                }

                if (useNoMsd)
                {
                    return(ltnValid.lrBestRule.Lemmatize(sWord));
                }
                else
                {
                    return(lrBestValid.Lemmatize(sWord));
                }
            }
            else
            {
                return(lrBestRule.Lemmatize(sWord));
            }
        }
예제 #4
0
 public void BuildModel()
 {
     if (ltnRootNode != null)
     {
         return;
     }
     //TODO remove: elExamples.FinalizeAdditions();
     ltnRootNode = new LemmaTreeNode(lsett, elExamples);
 }
예제 #5
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="lsett"></param>
        /// <param name="elExamples"></param>
        /// <param name="iStart">Index of the first word of the current group</param>
        /// <param name="iEnd">Index of the last word of the current group</param>
        /// <param name="ltnParentNode"></param>
        private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd,
                              LemmaTreeNode ltnParentNode) : this(lsett)
        {
            this.ltnParentNode = ltnParentNode;
            dictSubNodes       = null;

            this.iStart     = iStart;
            this.iEnd       = iEnd;
            this.elExamples = elExamples;

            if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd)
            {
                lrBestRule    = elExamples.Rules.DefaultRule;
                aBestRules    = new RuleWeighted[1];
                aBestRules[0] = new RuleWeighted(lrBestRule, 0);
                dWeight       = 0;
                return;
            }


            int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1,
                                            elExamples[iStart].Word.Length);
            sCondition  = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength);
            iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]);
            bWholeWord  = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity;

            FindBestRules();
            AddSubAll();


            //TODO check this heuristics, can be problematic when there are more applicable rules
            if (dictSubNodes != null)
            {
                List <KeyValuePair <char, LemmaTreeNode> > lReplaceNodes = new List <KeyValuePair <char, LemmaTreeNode> >();
                foreach (KeyValuePair <char, LemmaTreeNode> kvpChild in dictSubNodes)
                {
                    if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1)
                    {
                        IEnumerator <LemmaTreeNode> enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator();
                        enumChildChild.MoveNext();
                        LemmaTreeNode ltrChildChild = enumChildChild.Current;
                        if (kvpChild.Value.lrBestRule == lrBestRule)
                        {
                            lReplaceNodes.Add(new KeyValuePair <char, LemmaTreeNode>(kvpChild.Key, ltrChildChild));
                        }
                    }
                }
                foreach (KeyValuePair <char, LemmaTreeNode> kvpChild in lReplaceNodes)
                {
                    dictSubNodes[kvpChild.Key]   = kvpChild.Value;
                    kvpChild.Value.ltnParentNode = this;
                }
            }
        }
예제 #6
0
 public void Load(Latino.BinarySerializer binRead)
 {
     lsett      = new LemmatizerSettings(binRead);
     elExamples = new ExampleList(binRead, lsett);
     if (!lsett.bBuildFrontLemmatizer)
     {
         ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
     }
     else
     {
         ltnRootNode      = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false), null);
         ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null);
     }
 }
예제 #7
0
        public void BuildModel(string msdSpec, MsdSplitTree.BeamSearchParams beamSearchOpt)
        {
            if (ltnRootNode != null)
            {
                return;
            }

            //if msd are used and other criterias are fulfiled than use MsdSplitTreeOptimization
            if (lsett.bUseMsdSplitTreeOptimization && lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct && !string.IsNullOrEmpty(msdSpec))
            {
                msdSplitTree = new MsdSplitTree(elExamples.ListExamples, new MsdSpec(msdSpec), beamSearchOpt);
                //Console.WriteLine("MsdSplitTree consturcetd with {0} leaves!",msdSplitTree.subTreeSizeRecurs);
                ExampleList el = elExamples;
                elExamples = new ExampleList(lsett);
                //int s = 0;
                Dictionary <string, double> msds = new Dictionary <string, double>();
                foreach (LemmaExample le in el.ListExamples)
                {
                    //Console.WriteLine("{0}: {1}",s++,le.Msd);
                    string newMsd = msdSplitTree.TransformMsd(le.Msd);
                    elExamples.AddExample(le.Word, le.Lemma, le.Weight, newMsd);
                    //Console.WriteLine("\t" + newMsd);
                    if (msds.ContainsKey(newMsd))
                    {
                        msds[newMsd] += le.Weight;
                    }
                    else
                    {
                        msds[newMsd] = le.Weight;
                    }
                }
                foreach (KeyValuePair <string, double> msd in msds)
                {
                    //Console.WriteLine("{0} {1}", msd.Key, msd.Value);
                }
                //TODO problem, if buildmodel is called twice than a problem occurs!!!!
            }
            elExamples.FinalizeAdditions();


            if (!lsett.bBuildFrontLemmatizer)
            {
                ltnRootNode = new LemmaTreeNode(lsett, elExamples);
            }
            else
            {
                ltnRootNode      = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
                ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));
            }
        }
예제 #8
0
        public void Deserialize(BinaryReader binRead)
        {
            using (binRead)
            {
                // settings
                Lsett = new LemmatizerSettings(binRead);

                // examples
                bool bSerializeExamples = binRead.ReadBoolean();
                ElExamples = new ExampleList(binRead, Lsett);
                ExampleList elExamplesRear;
                ExampleList elExamplesFront;
                if (bSerializeExamples)
                {
                    elExamplesRear  = ElExamples.GetFrontRearExampleList(false);
                    elExamplesFront = ElExamples.GetFrontRearExampleList(true);
                }
                else
                {
                    elExamplesRear  = new ExampleList(binRead, Lsett);
                    elExamplesFront = new ExampleList(binRead, Lsett);
                }

                // root node
                LtnRootNode = new LemmaTreeNode(binRead, Lsett, Lsett.bBuildFrontLemmatizer ? elExamplesRear : ElExamples, null);

                // root node front
                if (Lsett.bBuildFrontLemmatizer)
                {
                    LtnRootNodeFront = new LemmaTreeNode(binRead, Lsett, elExamplesFront, null);
                }

                // exceptions - use try catch for retro compatibility
                // --> this section is missing in the old lemmatizer files
                try
                {
                    var nbOfExceptions = binRead.ReadInt32();
                    for (var i = 0; i < nbOfExceptions; i++)
                    {
                        var exception = binRead.ReadString();
                        var parts     = exception.Split(' ');
                        this.AddException(parts[0], parts[1]);
                    }
                }
                catch (Exception)
                {
                    Trace.WriteLine("Couldn't deserialize exceptions in Lemmatizer file");
                }
            }
        }
        private void AddSub(int iStart, int iEnd, char chChar)
        {
            LemmaTreeNode ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this);

            //TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules
            if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null)
            {
                return;
            }

            if (dictSubNodes == null)
            {
                dictSubNodes = new Dictionary <char, LemmaTreeNode>();
            }
            dictSubNodes.Add(chChar, ltnSub);
        }
예제 #10
0
        public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
        {
            this.lsett = lsett;

            // read is not null?
            if (binRead.ReadBoolean())
            {
                // read all dictionary (key + value)
                dictSubNodes = new ConcurrentDictionary <char, LemmaTreeNode>();
                int iCount = binRead.ReadInt32();
                for (int i = 0; i < iCount; i++)
                {
                    char cKey   = binRead.ReadChar();
                    var  ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.TryAdd(cKey, ltrSub);
                }
            }
            else
            {
                dictSubNodes = null;
            }

            this.ltnParentNode = ltnParentNode;

            // read similarity, condition and wholeword?
            iSimilarity = binRead.ReadInt32();
            sCondition  = binRead.ReadString();
            bWholeWord  = binRead.ReadBoolean();

            // best rule signature
            lrBestRule = elExamples.Rules[binRead.ReadString()];

            // best rules
            int iCountBest = binRead.ReadInt32();

            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
            {
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
            }

            // weight, start, end
            dWeight         = binRead.ReadDouble();
            iStart          = binRead.ReadInt32();
            iEnd            = binRead.ReadInt32();
            this.elExamples = elExamples;
        }
예제 #11
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="lsett"></param>
        /// <param name="elExamples"></param>
        /// <param name="iStart">Index of the first word of the current group</param>
        /// <param name="iEnd">Index of the last word of the current group</param>
        /// <param name="ltnParentNode"></param>
        private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett) {
            this.ltnParentNode = ltnParentNode;
            this.dictSubNodes = null;

            this.iStart = iStart;
            this.iEnd = iEnd;
            this.elExamples = elExamples;

            if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd) {
                lrBestRule = elExamples.Rules.DefaultRule;
                aBestRules = new RuleWeighted[1];
                aBestRules[0] = new RuleWeighted(lrBestRule, 0);
                dWeight = 0;
                return;
            }


            int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length);
            this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength);
            this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]);
            this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity;

            FindBestRules();
            AddSubAll();


            //TODO check this heuristics, can be problematic when there are more applicable rules
            if (dictSubNodes != null) {
                List<KeyValuePair<char, LemmaTreeNode>> lReplaceNodes = new List<KeyValuePair<char, LemmaTreeNode>>();
                foreach (KeyValuePair<char, LemmaTreeNode> kvpChild in dictSubNodes)
                    if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1) {
                        IEnumerator<LemmaTreeNode> enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator();
                        enumChildChild.MoveNext();
                        LemmaTreeNode ltrChildChild = enumChildChild.Current;
                        if (kvpChild.Value.lrBestRule == lrBestRule)
                            lReplaceNodes.Add(new KeyValuePair<char, LemmaTreeNode>(kvpChild.Key, ltrChildChild));
                    }
                foreach (KeyValuePair<char, LemmaTreeNode> kvpChild in lReplaceNodes) {
                    dictSubNodes[kvpChild.Key] = kvpChild.Value;
                    kvpChild.Value.ltnParentNode = this;
                }

            }

        }
예제 #12
0
        private SerializationModel SerializeModel(LemmaTreeNode ltn, StreamWriter sb, int iLevel)
        {
            SerializationModel model = new SerializationModel();

            model.matchWholeWord  = ltn.bWholeWord;
            model.suffixCondition = ltn.sCondition;
            model.ruleFrom        = ltn.sCondition.Substring(ltn.sCondition.Length - ltn.lrBestRule.iFrom);
            model.ruleTo          = ltn.lrBestRule.sTo;
            model.childNodes      = new List <SerializationModel>();
            if (ltn.dictSubNodes != null)
            {
                foreach (LemmaTreeNode ltnChild in ltn.dictSubNodes.Values)
                {
                    SerializationModel node = SerializeModel(ltnChild, sb, iLevel + 1);
                    model.childNodes.Add(node);
                }
            }
            return(model);
        }
예제 #13
0
        public void BuildModel()
        {
            if (ltnRootNode != null)
            {
                return;
            }

            if (!lsett.bBuildFrontLemmatizer)
            {
                //TODO remove: elExamples.FinalizeAdditions();
                elExamples.FinalizeAdditions();
                ltnRootNode = new LemmaTreeNode(lsett, elExamples);
            }
            else
            {
                ltnRootNode      = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
                ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));
            }
        }
예제 #14
0
        public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples,
                                LemmaTreeNode ltnParentNode)
        {
            this.lsett = lsett;

            if (binRead.ReadBoolean())
            {
                dictSubNodes = new Dictionary <char, LemmaTreeNode>();
                int iCount = binRead.ReadInt32();
                for (int i = 0; i < iCount; i++)
                {
                    char          cKey   = binRead.ReadChar();
                    LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.Add(cKey, ltrSub);
                }
            }
            else
            {
                dictSubNodes = null;
            }

            this.ltnParentNode = ltnParentNode;

            iSimilarity = binRead.ReadInt32();
            sCondition  = binRead.ReadString();
            bWholeWord  = binRead.ReadBoolean();

            lrBestRule = elExamples.Rules[binRead.ReadString()];

            int iCountBest = binRead.ReadInt32();

            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
            {
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
            }

            dWeight = binRead.ReadDouble();

            iStart          = binRead.ReadInt32();
            iEnd            = binRead.ReadInt32();
            this.elExamples = elExamples;
        }
 public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
 {
     Deserialize(binRead, lsett, elExamples, ltnParentNode);
 }
예제 #16
0
 public void AddExample(string sWord, string sLemma, double dWeight, string sMsd)
 {
     elExamples.AddExample(sWord, sLemma, dWeight, sMsd);
     ltnRootNode = null;
 }
예제 #17
0
 public void AddExample(string sWord, string sLemma, double dWeight, string sMsd) {
     elExamples.AddExample(sWord, sLemma, dWeight, sMsd);
     ltnRootNode = null;
 }
예제 #18
0
 public void Load(Latino.BinarySerializer binRead)
 {
     lsett       = new LemmatizerSettings(binRead);
     elExamples  = new ExampleList(binRead, lsett);
     ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
 }
예제 #19
0
 public void AddMultextFile(StreamReader srIn, string sFormat)
 {
     this.elExamples.AddMultextFile(srIn, sFormat);
     ltnRootNode = null;
 }
예제 #20
0
        public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
        {
            this.lsett = lsett;

            if (binRead.ReadBoolean())
            {
                dictSubNodes = new Dictionary <char, LemmaTreeNode>();
                int iCount = binRead.ReadInt32();
                for (int i = 0; i < iCount; i++)
                {
                    char          cKey   = binRead.ReadChar();
                    LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.Add(cKey, ltrSub);
                }
            }
            else
            {
                dictSubNodes = null;
            }

            this.ltnParentNode = ltnParentNode;

            iSimilarity = binRead.ReadInt32();
            sCondition  = binRead.ReadString();
            bWholeWord  = binRead.ReadBoolean();

            lrBestRule = elExamples.Rules[binRead.ReadString()];

            int iCountBest = binRead.ReadInt32();

            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
            {
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
            }

            dWeight = binRead.ReadDouble();

            //deserialize dictMsdBestRules dictionary
            int dictMsdBestRulesCount = binRead.ReadInt32();

            if (dictMsdBestRulesCount == -1)
            {
                dictMsdBestRules = null;
            }
            else
            {
                dictMsdBestRules = new Dictionary <string, RuleWeighted[]>();
                for (int msdId = 0; msdId < dictMsdBestRulesCount; msdId++)
                {
                    string         sMsd = binRead.ReadString();
                    RuleWeighted[] lRuleWeighted;
                    int            ruleWeightedCount = binRead.ReadInt32();
                    if (ruleWeightedCount == -1)
                    {
                        lRuleWeighted = null;
                    }
                    else
                    {
                        lRuleWeighted = new RuleWeighted[ruleWeightedCount];
                        for (int ruleId = 0; ruleId < ruleWeightedCount; ruleId++)
                        {
                            string    ruleSignature = binRead.ReadString();
                            double    ruleWeight    = binRead.ReadDouble();
                            LemmaRule rule          = elExamples.Rules[ruleSignature];
                            lRuleWeighted[ruleId] = new RuleWeighted(rule, ruleWeight);
                        }
                    }
                    dictMsdBestRules.Add(sMsd, lRuleWeighted);
                }
            }

            //deserialize dictMsdWeights dictionary
            int dictMsdWeightsCount = binRead.ReadInt32();

            if (dictMsdWeightsCount == -1)
            {
                dictMsdWeights = null;
            }
            else
            {
                dictMsdWeights = new Dictionary <string, double>();
                for (int msdId = 0; msdId < dictMsdWeightsCount; msdId++)
                {
                    string sMsd       = binRead.ReadString();
                    double dMsdWeight = binRead.ReadDouble();
                    dictMsdWeights.Add(sMsd, dMsdWeight);
                }
            }

            iStart          = binRead.ReadInt32();
            iEnd            = binRead.ReadInt32();
            this.elExamples = elExamples;
        }
예제 #21
0
        public void BuildModel() {
            if (ltnRootNode != null) return;

            if (!lsett.bBuildFrontLemmatizer) {
                //TODO remove: elExamples.FinalizeAdditions();
                elExamples.FinalizeAdditions();
                ltnRootNode = new LemmaTreeNode(lsett, elExamples);
            }
            else {
                ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
                ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));   
            }
        }
예제 #22
0
        public void Deserialize(BinaryReader binRead) {
            lsett = new LemmatizerSettings(binRead);

            bool bSerializeExamples = binRead.ReadBoolean();
            elExamples = new ExampleList(binRead, lsett);

            ExampleList elExamplesRear;
            ExampleList elExamplesFront;

            if (bSerializeExamples) {
                elExamplesRear = elExamples.GetFrontRearExampleList(false);
                elExamplesFront = elExamples.GetFrontRearExampleList(true);
            }
            else {
                elExamplesRear = new ExampleList(binRead, lsett);
                elExamplesFront = new ExampleList(binRead, lsett);
            }                

            if (!lsett.bBuildFrontLemmatizer) {
                ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
            }
            else {
                ltnRootNode = new LemmaTreeNode(binRead, lsett,  elExamplesRear, null);
                ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null);
            }
        }
예제 #23
0
 public Lemmatizer(LemmatizerSettings lsett) { 
     this.lsett = lsett;
     this.elExamples = new ExampleList(lsett);
     this.ltnRootNode = null;
     this.ltnRootNodeFront = null;
 } 
예제 #24
0
 public void Load(Latino.BinarySerializer binRead) {
     lsett = new LemmatizerSettings(binRead);
     elExamples = new ExampleList(binRead, lsett);
     if (!lsett.bBuildFrontLemmatizer) {
         ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
     }
     else {
         ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null);
         ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null);
     }               
 }
예제 #25
0
 public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
     Load(binRead, lsett, elExamples, ltnParentNode);
 }
예제 #26
0
 public void AddMultextFile(StreamReader srIn, string sFormat) {
     this.elExamples.AddMultextFile(srIn, sFormat);
     ltnRootNode = null;
 }
예제 #27
0
 public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
     Deserialize(binRead, lsett, elExamples, ltnParentNode);
 }
예제 #28
0
        public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
            this.lsett = lsett;

            if (binRead.ReadBool()) {
                dictSubNodes = new Dictionary<char, LemmaTreeNode>();
                int iCount = binRead.ReadInt();
                for (int i = 0; i < iCount; i++) {
                    char cKey = binRead.ReadChar();
                    LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.Add(cKey, ltrSub);
                }
            }
            else
                dictSubNodes = null;

            this.ltnParentNode = ltnParentNode;

            iSimilarity = binRead.ReadInt();
            sCondition = binRead.ReadString();
            bWholeWord = binRead.ReadBool();

            lrBestRule = elExamples.Rules[binRead.ReadString()];

            int iCountBest = binRead.ReadInt();
            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());

            dWeight = binRead.ReadDouble();

            iStart = binRead.ReadInt();
            iEnd = binRead.ReadInt();
            this.elExamples = elExamples;

        }
 public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
 {
     Load(binRead, lsett, elExamples, ltnParentNode);
 }
예제 #30
0
        private void AddSub(int iStart, int iEnd, char chChar) {
            LemmaTreeNode ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this);
            
            //TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules
            if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null) return;

            if (dictSubNodes == null) dictSubNodes = new Dictionary<char, LemmaTreeNode>();
            dictSubNodes.Add(chChar, ltnSub);
        }