Example #1
0
        void DoIndex(Dictionary <char, DevelopingWordGroup> wordGroups)
        {
            //1. expand word group
            WordGroup[] newWordGroups = new WordGroup[this.lastChar - this.firstChar + 1];

            foreach (var kp in wordGroups)
            {
                //for each dev word group
                int index = TransformCharToIndex(kp.Key);
                DevelopingWordGroup devWordGroup = kp.Value;
                devWordGroup.DoIndex(this.textBuffer, this);
                newWordGroups[index] = devWordGroup.ResultWordGroup;
            }
            this.wordGroups = newWordGroups;
        }
Example #2
0
        public void LoadFromTextfile(string filename)
        {
            //once only
            if (textBuffer != null)
            {
                return;
            }
            if (firstChar == '\0' || lastChar == '\0')
            {
                throw new NotSupportedException();
            }

            //---------------
            Dictionary <char, DevelopingWordGroup> wordGroups = new Dictionary <char, DevelopingWordGroup>();

            using (FileStream fs = new FileStream(filename, FileMode.Open))
                using (StreamReader reader = new StreamReader(fs))
                {
                    //init with filesize
                    textBuffer = new TextBuffer((int)fs.Length);
                    string line = reader.ReadLine();
                    while (line != null)
                    {
                        line = line.Trim();
                        char[] lineBuffer = line.ToCharArray();
                        int    lineLen    = lineBuffer.Length;
                        char   c0;
                        if (lineLen > 0 && (c0 = lineBuffer[0]) != '#')
                        {
                            int startAt = textBuffer.CurrentPosition;
                            textBuffer.AddWord(lineBuffer);

#if DEBUG
                            if (lineLen > byte.MaxValue)
                            {
                                throw new NotSupportedException();
                            }
#endif

                            WordSpan wordspan = new WordSpan(startAt, (byte)lineLen);
                            //each wordgroup contains text span

                            DevelopingWordGroup found;
                            if (!wordGroups.TryGetValue(c0, out found))
                            {
                                found = new DevelopingWordGroup(new WordSpan(startAt, 1));
                                wordGroups.Add(c0, found);
                            }
                            found.AddWordSpan(wordspan);
                        }
                        //- next line
                        line = reader.ReadLine();
                    }

                    reader.Close();
                    fs.Close();
                }
            //------------------------------------------------------------------
            textBuffer.Freeze();
            //------------------------------------------------------------------
            //do index
            DoIndex(wordGroups);

            //clear, not used
            wordGroups.Clear();
        }
Example #3
0
        WordGroup _resultWordGroup;//after call DoIndex()
        internal void DoIndex(TextBuffer textBuffer, CustomDic owner)
        {
            //recursive
            if (this.PrefixLen > 7)
            {
                DoIndexOfSmallAmount(textBuffer);
#if DEBUG
                dbugDataState = debugDataState.TooLongPrefix;
#endif
                return;
            }
            //-----------------------------------------------

            bool hasEvalPrefix = false;
            if (subGroups == null)
            {
                subGroups = new DevelopingWordGroup[owner.LastChar - owner.FirstChar + 1];
            }
            //--------------------------------
            int j             = wordSpanList.Count;
            int thisPrefixLen = this.PrefixLen;
            int doSepAt       = thisPrefixLen;
            for (int i = 0; i < j; ++i)
            {
                WordSpan sp = wordSpanList[i];
                if (sp.len > doSepAt)
                {
                    char c       = sp.GetChar(doSepAt, textBuffer);
                    int  c_index = c - owner.FirstChar;
                    DevelopingWordGroup found = subGroups[c_index];
                    if (found == null)
                    {
                        //not found
                        found = new DevelopingWordGroup(new WordSpan(sp.startAt, (byte)(doSepAt + 1)));
                        subGroups[c_index] = found;
                    }
                    found.AddWordSpan(sp);
                }
                else
                {
                    if (!hasEvalPrefix)
                    {
                        if (sp.SameTextContent(this.prefixSpan, textBuffer))
                        {
                            hasEvalPrefix     = true;
                            this.PrefixIsWord = true;
                        }
                    }
                }
            }
#if DEBUG
            this.dbugDataState = debugDataState.Indexed;
#endif
            wordSpanList.Clear();
            wordSpanList = null;
            //--------------------------------
            //do sup index
            //foreach (WordGroup subgroup in this.wordGroups.Values)
            bool hasSomeSubGroup = false;
            foreach (DevelopingWordGroup subgroup in this.subGroups)
            {
                if (subgroup != null)
                {
                    hasSomeSubGroup = true;

                    //****
                    //performance factor here,****
                    //in this current version
                    //if we not call DoIndex(),
                    //this subgroup need linear search-> so it slow
                    //so we call DoIndex until member count in the group <=3
                    //then it search faster,
                    //but dictionary-building time may increase.

                    if (subgroup.WordSpanListCount > 2)
                    {
                        subgroup.DoIndex(textBuffer, owner);
                    }
                    else
                    {
#if DEBUG
                        subgroup.dbugDataState = debugDataState.SmallAmountOfMembers;
#endif
                        subgroup.DoIndexOfSmallAmount(textBuffer);
                    }
                }
            }
            //--------------------------------
#if DEBUG
            this.dbugDataState = debugDataState.Indexed;
#endif
            if (!hasSomeSubGroup)
            {
                //clear
                subGroups = null;
            }

            //--------------------------------
            WordGroup[] newsubGroups = null;
            if (subGroups != null)
            {
                newsubGroups = new WordGroup[subGroups.Length];
                for (int i = subGroups.Length - 1; i >= 0; --i)
                {
                    DevelopingWordGroup subg = subGroups[i];
                    if (subg != null)
                    {
                        newsubGroups[i] = subg.ResultWordGroup;
                    }
                }
            }
            //--------------------------------
            this._resultWordGroup = new WordGroup(
                this.prefixSpan,
                newsubGroups,
                null,
                this.PrefixIsWord);
        }
Example #4
0
        WordGroup _resultWordGroup;//after call DoIndex()
        internal void DoIndex(TextBuffer textBuffer, CustomDic owner)
        {

            //recursive
            if (this.PrefixLen > 7)
            {
                DoIndexOfSmallAmount(textBuffer);
#if DEBUG
                dbugDataState = debugDataState.TooLongPrefix;
#endif
                return;
            }
            //-----------------------------------------------

            bool hasEvalPrefix = false;
            if (subGroups == null)
            {
                subGroups = new DevelopingWordGroup[owner.LastChar - owner.FirstChar + 1];
            }
            //--------------------------------
            int j = wordSpanList.Count;
            int thisPrefixLen = this.PrefixLen;
            int doSepAt = thisPrefixLen;
            for (int i = 0; i < j; ++i)
            {
                WordSpan sp = wordSpanList[i];
                if (sp.len > doSepAt)
                {
                    char c = sp.GetChar(doSepAt, textBuffer);
                    int c_index = c - owner.FirstChar;
                    DevelopingWordGroup found = subGroups[c_index];
                    if (found == null)
                    {
                        //not found
                        found = new DevelopingWordGroup(new WordSpan(sp.startAt, (byte)(doSepAt + 1)));
                        subGroups[c_index] = found;
                    }
                    found.AddWordSpan(sp);
                }
                else
                {
                    if (!hasEvalPrefix)
                    {
                        if (sp.SameTextContent(this.prefixSpan, textBuffer))
                        {
                            hasEvalPrefix = true;
                            this.PrefixIsWord = true;
                        }
                    }
                }

            }
#if DEBUG
            this.dbugDataState = debugDataState.Indexed;
#endif
            wordSpanList.Clear();
            wordSpanList = null;
            //--------------------------------
            //do sup index
            //foreach (WordGroup subgroup in this.wordGroups.Values)
            bool hasSomeSubGroup = false;
            foreach (DevelopingWordGroup subgroup in this.subGroups)
            {
                if (subgroup != null)
                {
                    hasSomeSubGroup = true;

                    //****
                    //performance factor here,****
                    //in this current version 
                    //if we not call DoIndex(),
                    //this subgroup need linear search-> so it slow                   
                    //so we call DoIndex until member count in the group <=3
                    //then it search faster, 
                    //but dictionary-building time may increase.

                    if (subgroup.WordSpanListCount > 2)
                    {
                        subgroup.DoIndex(textBuffer, owner);
                    }
                    else
                    {
#if DEBUG
                        subgroup.dbugDataState = debugDataState.SmallAmountOfMembers;
#endif
                        subgroup.DoIndexOfSmallAmount(textBuffer);
                    }
                }
            }
            //--------------------------------
#if DEBUG
            this.dbugDataState = debugDataState.Indexed;
#endif
            if (!hasSomeSubGroup)
            {
                //clear
                subGroups = null;
            }

            //--------------------------------
            WordGroup[] newsubGroups = null;
            if (subGroups != null)
            {
                newsubGroups = new WordGroup[subGroups.Length];
                for (int i = subGroups.Length - 1; i >= 0; --i)
                {
                    DevelopingWordGroup subg = subGroups[i];
                    if (subg != null)
                    {
                        newsubGroups[i] = subg.ResultWordGroup;
                    }
                }
            }
            //--------------------------------
            this._resultWordGroup = new WordGroup(
                this.prefixSpan,
                newsubGroups,
                null,
                this.PrefixIsWord);

        }
Example #5
0
        public void LoadFromTextfile(string filename)
        {
            //once only            
            if (textBuffer != null)
            {
                return;
            }
            if (firstChar == '\0' || lastChar == '\0')
            {
                throw new NotSupportedException();
            }

            //---------------
            Dictionary<char, DevelopingWordGroup> wordGroups = new Dictionary<char, DevelopingWordGroup>();
            using (FileStream fs = new FileStream(filename, FileMode.Open))
            using (StreamReader reader = new StreamReader(fs))
            {
                //init with filesize
                textBuffer = new TextBuffer((int)fs.Length);
                string line = reader.ReadLine();
                while (line != null)
                {
                    line = line.Trim();
                    char[] lineBuffer = line.ToCharArray();
                    int lineLen = lineBuffer.Length;
                    char c0;
                    if (lineLen > 0 && (c0 = lineBuffer[0]) != '#')
                    {
                        int startAt = textBuffer.CurrentPosition;
                        textBuffer.AddWord(lineBuffer);

#if DEBUG
                        if (lineLen > byte.MaxValue)
                        {
                            throw new NotSupportedException();
                        }
#endif

                        WordSpan wordspan = new WordSpan(startAt, (byte)lineLen);
                        //each wordgroup contains text span

                        DevelopingWordGroup found;
                        if (!wordGroups.TryGetValue(c0, out found))
                        {
                            found = new DevelopingWordGroup(new WordSpan(startAt, 1));
                            wordGroups.Add(c0, found);
                        }
                        found.AddWordSpan(wordspan);

                    }
                    //- next line
                    line = reader.ReadLine();
                }

                reader.Close();
                fs.Close();
            }
            //------------------------------------------------------------------
            textBuffer.Freeze();
            //------------------------------------------------------------------ 
            //do index
            DoIndex(wordGroups);

            //clear, not used
            wordGroups.Clear();

        }