Beispiel #1
0
 private void CheckWordAndPunct(WordAndPunct wordAndPunct, string word, string punct, int offset)
 {
     Assert.AreEqual(word, wordAndPunct.Word, "The word is not correct");
     Assert.AreEqual(punct, wordAndPunct.Punct, "The punctuation is not correct");
     Assert.AreEqual(offset, wordAndPunct.Offset, "The offset is not correct");
 }
Beispiel #2
0
        //public string PunctuationCharacters { get { return punctuationCharacters; } }

        public virtual List <WordAndPunct> WordAndPuncts(string text)
        {
            char cc;
            int  punctOffset;
            List <WordAndPunct> waps = new List <WordAndPunct>();

            for (int i = 0; i < text.Length;)
            {
                WordAndPunct wap = new WordAndPunct();

                // Ignore any initial separator characters
                while (i < text.Length && char.IsSeparator(text[i]))
                {
                    i++;
                }
                wap.Offset = i;
                bool isFirstCharacterInWord = true;

                while (i < text.Length)
                {
                    cc = text[i];

                    if (IsSingleCharacterWord(cc))
                    {
                        if (isFirstCharacterInWord)
                        {
                            // Single Character key is the first character in the key.
                            // It forms a key all by itself.
                            i = i + 1;
                        }
                        else
                        {
                            // Single Character key is NOT the first character in the key.
                            // It ends the key currently being formed.
                            // 'i' is not incremented
                        }
                        break;
                    }
                    else if (IsWordMedialPunctuation(cc))
                    {
                        // Word medial punctuation only counts if this is not the first
                        // character in a key AND the next character is key forming.
                        //! can we have multiple key medial punctuation?
                        if (isFirstCharacterInWord)
                        {
                            break;
                        }
                        if (i + 1 >= text.Length || !IsWordFormingCharacter(text[i + 1]))
                        {
                            break;
                        }
                    }
                    else if (char.IsDigit(cc))
                    {
                        // allow digits in words
                    }
                    else if (!IsWordFormingCharacter(cc))
                    {
                        break;
                    }

                    i = i + 1;
                    isFirstCharacterInWord = false;
                }

                wap.Word = text.Substring(wap.Offset, i - wap.Offset);

                punctOffset = i;

                while (i < text.Length)
                {
                    cc = text[i];
                    if (IsWordFormingCharacter(cc) || char.IsDigit(cc))
                    {
                        break;
                    }
                    i = i + 1;
                }
                wap.Punct = text.Substring(punctOffset, i - punctOffset);

                waps.Add(wap);
            }

            return(waps);
        }