private void ProcessWord(ITextToken tok, WordAndPunct wap) { if (wap.Word == "") { return; } string nextWord = wap.Word.ToLower(); if (prevWord == nextWord) { AddWord(tok, wap); } prevWord = nextWord; // If there are characters (such as quotes) between words, // then two words are not considered repeating, even if they are identical foreach (char cc in wap.Punct) { if (!char.IsWhiteSpace(cc)) { Reset(); break; } } }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// <param name="tok"></param> /// <param name="wap"></param> /// <param name="desiredKey"></param> /// ------------------------------------------------------------------------------------ public void ProcessWord(ITextToken tok, WordAndPunct wap, string desiredKey) { AWord word = new AWord(wap.Word, m_categorizer); if (word.Prefix == string.Empty && word.Suffix == string.Empty) { return; } if (m_uncapitalizedPrefixes.Contains(word.Prefix)) { return; } if (m_uncapitalizedPrefixes.Contains("*" + word.Prefix[word.Prefix.Length - 1])) { return; } if (m_uncapitalizedPrefixes.Contains("*")) { return; } if (m_capitalizedSuffixes.Contains(word.Suffix)) { return; } if (m_capitalizedPrefixes.Contains(word.Prefix)) { return; } AddWord(tok, wap, desiredKey); }
private void AddWord(ITextToken tok, WordAndPunct wap) { TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length); if (desiredKey == "" || desiredKey == tts.InventoryText) { result.Add(tts); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// /// </summary> /// <param name="tok"></param> /// <param name="wap"></param> /// <param name="desiredKey"></param> /// ------------------------------------------------------------------------------------ private void AddWord(ITextToken tok, WordAndPunct wap, string desiredKey) { TextTokenSubstring tts = new TextTokenSubstring(tok, wap.Offset, wap.Word.Length); if (String.IsNullOrEmpty(desiredKey) || desiredKey == tts.InventoryText) { m_result.Add(tts); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets words and punctuation from text. /// </summary> /// <param name="text">The text.</param> /// <returns>a collection of words and punctuation</returns> /// ------------------------------------------------------------------------------------ public override List <WordAndPunct> WordAndPuncts(string text) { char cc; int punctOffset; List <WordAndPunct> waps = new List <WordAndPunct>(); for (int i = 0; i < text.Length;) { WordAndPunct wap = new WordAndPunct(); // Ignore any initial separator characters while (i < text.Length && m_charPropEngine.get_IsSeparator(text[i])) { i++; } if (i == text.Length) { return(waps); } wap.Offset = i; bool isFirstCharacterInWord = true; while (i < text.Length) { cc = text[i]; if (IsSingleCharacterWord(cc)) { if (isFirstCharacterInWord) { // Single Character key is the first character in the key. // It forms a key all by itself. i = i + 1; } else { // Single Character key is NOT the first character in the key. // It ends the key currently being formed. // 'i' is not incremented } break; } else if (m_charPropEngine.get_IsNumber(cc)) { // allow digits in words } else if (!IsWordFormingCharacter(cc)) { break; } i = i + 1; isFirstCharacterInWord = false; } wap.Word = text.Substring(wap.Offset, i - wap.Offset); punctOffset = i; while (i < text.Length) { cc = text[i]; if (IsWordFormingCharacter(cc) || m_charPropEngine.get_IsNumber(cc)) { break; } i = i + 1; } wap.Punct = text.Substring(punctOffset, i - punctOffset); waps.Add(wap); } return(waps); }