/// <summary>
        /// 
        /// </summary>
        /// <param name="lsett"></param>
        /// <param name="elExamples"></param>
        /// <param name="iStart">Index of the first word of the current group</param>
        /// <param name="iEnd">Index of the last word of the current group</param>
        /// <param name="ltnParentNode"></param>
        private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett) {
            this.ltnParentNode = ltnParentNode;
            this.dictSubNodes = null;

            this.iStart = iStart;
            this.iEnd = iEnd;
            this.elExamples = elExamples;

            if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd) {
                lrBestRule = elExamples.Rules.DefaultRule;
                aBestRules = new RuleWeighted[1];
                aBestRules[0] = new RuleWeighted(lrBestRule, 0);
                dWeight = 0;
                return;
            }


            int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length);
            this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength);
            this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]);
            this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity;

            FindBestRules();
            AddSubAll();


            //TODO check this heuristics, can be problematic when there are more applicable rules
            if (dictSubNodes != null) {
                List<KeyValuePair<char, LemmaTreeNode>> lReplaceNodes = new List<KeyValuePair<char, LemmaTreeNode>>();
                foreach (KeyValuePair<char, LemmaTreeNode> kvpChild in dictSubNodes)
                    if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1) {
                        IEnumerator<LemmaTreeNode> enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator();
                        enumChildChild.MoveNext();
                        LemmaTreeNode ltrChildChild = enumChildChild.Current;
                        if (kvpChild.Value.lrBestRule == lrBestRule)
                            lReplaceNodes.Add(new KeyValuePair<char, LemmaTreeNode>(kvpChild.Key, ltrChildChild));
                    }
                foreach (KeyValuePair<char, LemmaTreeNode> kvpChild in lReplaceNodes) {
                    dictSubNodes[kvpChild.Key] = kvpChild.Value;
                    kvpChild.Value.ltnParentNode = this;
                }

            }

        }
Example #2
0
        public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
        {
            this.lsett = lsett;

            if (binRead.ReadBool()) {
                dictSubNodes = new Dictionary<char, LemmaTreeNode>();
                int iCount = binRead.ReadInt();
                for (int i = 0; i < iCount; i++) {
                    char cKey = binRead.ReadChar();
                    LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
                    dictSubNodes.Add(cKey, ltrSub);
                }
            }
            else
                dictSubNodes = null;

            this.ltnParentNode = ltnParentNode;

            iSimilarity = binRead.ReadInt();
            sCondition = binRead.ReadString();
            bWholeWord = binRead.ReadBool();

            lrBestRule = elExamples.Rules[binRead.ReadString()];

            int iCountBest = binRead.ReadInt();
            aBestRules = new RuleWeighted[iCountBest];
            for (int i = 0; i < iCountBest; i++)
                aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());

            dWeight = binRead.ReadDouble();

            iStart = binRead.ReadInt();
            iEnd = binRead.ReadInt();
            this.elExamples = elExamples;
        }