Exemple #1
0
        private void ProcessWord(ITextToken tok, WordAndPunct wap)
        {
            if (wap.Word == "")
            {
                return;
            }

            string nextWord = wap.Word.ToLower();

            if (prevWord == nextWord)
            {
                AddWord(tok, wap);
            }

            prevWord = nextWord;

            // If there are characters (such as quotes) between words,
            // then two words are not considered repeating, even if they are identical
            foreach (char cc in wap.Punct)
            {
                if (!char.IsWhiteSpace(cc))
                {
                    Reset();
                    break;
                }
            }
        }
Exemple #2
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        ///
        /// </summary>
        /// <param name="tok"></param>
        /// <param name="wap"></param>
        /// <param name="desiredKey"></param>
        /// ------------------------------------------------------------------------------------
        public void ProcessWord(ITextToken tok, WordAndPunct wap, string desiredKey)
        {
            AWord word = new AWord(wap.Word, m_categorizer);

            if (word.Prefix == string.Empty && word.Suffix == string.Empty)
            {
                return;
            }
            if (m_uncapitalizedPrefixes.Contains(word.Prefix))
            {
                return;
            }
            if (m_uncapitalizedPrefixes.Contains("*" + word.Prefix[word.Prefix.Length - 1]))
            {
                return;
            }
            if (m_uncapitalizedPrefixes.Contains("*"))
            {
                return;
            }
            if (m_capitalizedSuffixes.Contains(word.Suffix))
            {
                return;
            }
            if (m_capitalizedPrefixes.Contains(word.Prefix))
            {
                return;
            }

            AddWord(tok, wap, desiredKey);
        }
Exemple #3
0
        private void AddWord(ITextToken tok, WordAndPunct wap)
        {
            TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length);

            if (desiredKey == "" || desiredKey == tts.InventoryText)
            {
                result.Add(tts);
            }
        }
Exemple #4
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        ///
        /// </summary>
        /// <param name="tok"></param>
        /// <param name="wap"></param>
        /// <param name="desiredKey"></param>
        /// ------------------------------------------------------------------------------------
        private void AddWord(ITextToken tok, WordAndPunct wap, string desiredKey)
        {
            TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length);

            if (String.IsNullOrEmpty(desiredKey) || desiredKey == tts.InventoryText)
            {
                m_result.Add(tts);
            }
        }
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets words and punctuation from text.
        /// </summary>
        /// <param name="text">The text.</param>
        /// <returns>a collection of words and punctuation</returns>
        /// ------------------------------------------------------------------------------------
        public override List <WordAndPunct> WordAndPuncts(string text)
        {
            char cc;
            int  punctOffset;
            List <WordAndPunct> waps = new List <WordAndPunct>();

            for (int i = 0; i < text.Length;)
            {
                WordAndPunct wap = new WordAndPunct();

                // Ignore any initial separator characters
                while (i < text.Length && m_charPropEngine.get_IsSeparator(text[i]))
                {
                    i++;
                }

                if (i == text.Length)
                {
                    return(waps);
                }

                wap.Offset = i;
                bool isFirstCharacterInWord = true;

                while (i < text.Length)
                {
                    cc = text[i];

                    if (IsSingleCharacterWord(cc))
                    {
                        if (isFirstCharacterInWord)
                        {
                            // Single Character key is the first character in the key.
                            // It forms a key all by itself.
                            i = i + 1;
                        }
                        else
                        {
                            // Single Character key is NOT the first character in the key.
                            // It ends the key currently being formed.
                            // 'i' is not incremented
                        }
                        break;
                    }
                    else if (m_charPropEngine.get_IsNumber(cc))
                    {
                        // allow digits in words
                    }
                    else if (!IsWordFormingCharacter(cc))
                    {
                        break;
                    }

                    i = i + 1;
                    isFirstCharacterInWord = false;
                }

                wap.Word = text.Substring(wap.Offset, i - wap.Offset);

                punctOffset = i;

                while (i < text.Length)
                {
                    cc = text[i];
                    if (IsWordFormingCharacter(cc) || m_charPropEngine.get_IsNumber(cc))
                    {
                        break;
                    }
                    i = i + 1;
                }

                wap.Punct = text.Substring(punctOffset, i - punctOffset);
                waps.Add(wap);
            }

            return(waps);
        }