Beispiel #1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="lsett"></param>
        /// <param name="elExamples"></param>
        /// <param name="iStart">Index of the first word of the current group</param>
        /// <param name="iEnd">Index of the last word of the current group</param>
        /// <param name="ltnParentNode"></param>
        private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd,
                              LemmaTreeNode ltnParentNode) : this(lsett)
        {
            this.ltnParentNode = ltnParentNode;
            dictSubNodes       = null;

            this.iStart     = iStart;
            this.iEnd       = iEnd;
            this.elExamples = elExamples;

            if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd)
            {
                lrBestRule    = elExamples.Rules.DefaultRule;
                aBestRules    = new RuleWeighted[1];
                aBestRules[0] = new RuleWeighted(lrBestRule, 0);
                dWeight       = 0;
                return;
            }


            int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1,
                                            elExamples[iStart].Word.Length);
            sCondition  = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength);
            iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]);
            bWholeWord  = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity;

            FindBestRules();
            AddSubAll();


            //TODO check this heuristics, can be problematic when there are more applicable rules
            if (dictSubNodes != null)
            {
                List <KeyValuePair <char, LemmaTreeNode> > lReplaceNodes = new List <KeyValuePair <char, LemmaTreeNode> >();
                foreach (KeyValuePair <char, LemmaTreeNode> kvpChild in dictSubNodes)
                {
                    if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1)
                    {
                        IEnumerator <LemmaTreeNode> enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator();
                        enumChildChild.MoveNext();
                        LemmaTreeNode ltrChildChild = enumChildChild.Current;
                        if (kvpChild.Value.lrBestRule == lrBestRule)
                        {
                            lReplaceNodes.Add(new KeyValuePair <char, LemmaTreeNode>(kvpChild.Key, ltrChildChild));
                        }
                    }
                }
                foreach (KeyValuePair <char, LemmaTreeNode> kvpChild in lReplaceNodes)
                {
                    dictSubNodes[kvpChild.Key]   = kvpChild.Value;
                    kvpChild.Value.ltnParentNode = this;
                }
            }
        }
Beispiel #2
0
        public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
        {
            this.lsett = lsett;

            // read is not null?
            if (binRead.ReadBoolean())
            {
                // read all dictionary (key + value)
                dictSubNodes = new ConcurrentDictionary <char, LemmaTreeNode>();
                int iCount = binRead.ReadInt32();
                for (int i = 0; i < iCount; i++)
                {
                    char cKey   = binRead.ReadChar();
                    var  ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.TryAdd(cKey, ltrSub);
                }
            }
            else
            {
                dictSubNodes = null;
            }

            this.ltnParentNode = ltnParentNode;

            // read similarity, condition and wholeword?
            iSimilarity = binRead.ReadInt32();
            sCondition  = binRead.ReadString();
            bWholeWord  = binRead.ReadBoolean();

            // best rule signature
            lrBestRule = elExamples.Rules[binRead.ReadString()];

            // best rules
            int iCountBest = binRead.ReadInt32();

            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
            {
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
            }

            // weight, start, end
            dWeight         = binRead.ReadDouble();
            iStart          = binRead.ReadInt32();
            iEnd            = binRead.ReadInt32();
            this.elExamples = elExamples;
        }
Beispiel #3
0
        public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples,
                                LemmaTreeNode ltnParentNode)
        {
            this.lsett = lsett;

            if (binRead.ReadBoolean())
            {
                dictSubNodes = new Dictionary <char, LemmaTreeNode>();
                int iCount = binRead.ReadInt32();
                for (int i = 0; i < iCount; i++)
                {
                    char          cKey   = binRead.ReadChar();
                    LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.Add(cKey, ltrSub);
                }
            }
            else
            {
                dictSubNodes = null;
            }

            this.ltnParentNode = ltnParentNode;

            iSimilarity = binRead.ReadInt32();
            sCondition  = binRead.ReadString();
            bWholeWord  = binRead.ReadBoolean();

            lrBestRule = elExamples.Rules[binRead.ReadString()];

            int iCountBest = binRead.ReadInt32();

            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
            {
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
            }

            dWeight = binRead.ReadDouble();

            iStart          = binRead.ReadInt32();
            iEnd            = binRead.ReadInt32();
            this.elExamples = elExamples;
        }
        public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
        {
            this.lsett = lsett;

            if (binRead.ReadBoolean())
            {
                dictSubNodes = new Dictionary <char, LemmaTreeNode>();
                int iCount = binRead.ReadInt32();
                for (int i = 0; i < iCount; i++)
                {
                    char          cKey   = binRead.ReadChar();
                    LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.Add(cKey, ltrSub);
                }
            }
            else
            {
                dictSubNodes = null;
            }

            this.ltnParentNode = ltnParentNode;

            iSimilarity = binRead.ReadInt32();
            sCondition  = binRead.ReadString();
            bWholeWord  = binRead.ReadBoolean();

            lrBestRule = elExamples.Rules[binRead.ReadString()];

            int iCountBest = binRead.ReadInt32();

            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
            {
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
            }

            dWeight = binRead.ReadDouble();

            //deserialize dictMsdBestRules dictionary
            int dictMsdBestRulesCount = binRead.ReadInt32();

            if (dictMsdBestRulesCount == -1)
            {
                dictMsdBestRules = null;
            }
            else
            {
                dictMsdBestRules = new Dictionary <string, RuleWeighted[]>();
                for (int msdId = 0; msdId < dictMsdBestRulesCount; msdId++)
                {
                    string         sMsd = binRead.ReadString();
                    RuleWeighted[] lRuleWeighted;
                    int            ruleWeightedCount = binRead.ReadInt32();
                    if (ruleWeightedCount == -1)
                    {
                        lRuleWeighted = null;
                    }
                    else
                    {
                        lRuleWeighted = new RuleWeighted[ruleWeightedCount];
                        for (int ruleId = 0; ruleId < ruleWeightedCount; ruleId++)
                        {
                            string    ruleSignature = binRead.ReadString();
                            double    ruleWeight    = binRead.ReadDouble();
                            LemmaRule rule          = elExamples.Rules[ruleSignature];
                            lRuleWeighted[ruleId] = new RuleWeighted(rule, ruleWeight);
                        }
                    }
                    dictMsdBestRules.Add(sMsd, lRuleWeighted);
                }
            }

            //deserialize dictMsdWeights dictionary
            int dictMsdWeightsCount = binRead.ReadInt32();

            if (dictMsdWeightsCount == -1)
            {
                dictMsdWeights = null;
            }
            else
            {
                dictMsdWeights = new Dictionary <string, double>();
                for (int msdId = 0; msdId < dictMsdWeightsCount; msdId++)
                {
                    string sMsd       = binRead.ReadString();
                    double dMsdWeight = binRead.ReadDouble();
                    dictMsdWeights.Add(sMsd, dMsdWeight);
                }
            }

            iStart          = binRead.ReadInt32();
            iEnd            = binRead.ReadInt32();
            this.elExamples = elExamples;
        }