public void MarkWordBoundariesTest() { String title = String.Format("fear{0}is{0}the{0}mind{0}killer", Normalizer.WORD_SEPARATOR); String search = "fmik"; int[] searchPositions = Scorer.FindRightmostCharPositions(title, search); SearchCharFlags[] searchCharFlags = new SearchCharFlags[searchPositions.Length]; Scorer.MarkWordBoundaries(title, search, searchPositions, out searchCharFlags); Assert.AreEqual(SearchCharFlags.WordBoundary, searchCharFlags[0]); Assert.AreEqual(SearchCharFlags.WordBoundary, searchCharFlags[1]); Assert.AreEqual(SearchCharFlags.None, searchCharFlags[2]); Assert.AreEqual(SearchCharFlags.WordBoundary, searchCharFlags[3]); }
private static decimal SimpleScore(int[] searchCharPositions, SearchCharFlags[] searchCharFlags) { double maxPerCharScore = 1.0/(double)searchCharPositions.Length; double totalScore = 0; foreach (SearchCharFlags flags in searchCharFlags) { if ((flags & SearchCharFlags.WordBoundary) != 0 || (flags & SearchCharFlags.SurroundedChar) != 0) { //This character is a word boundary, or is surrounded by matching search chars on both sides totalScore += maxPerCharScore; } else if ((flags & SearchCharFlags.PreceedingChar) != 0 || (flags & SearchCharFlags.SucceedingChar) != 0) { //Preceeds or succeeds a matching char; that's only a half score totalScore += maxPerCharScore / 2.0; } } return (decimal)totalScore; }
public static void MarkWordBoundaries(string title, string searchString, int[] searchCharPositions, out SearchCharFlags[] searchCharFlags) { // Check for search letters at the start of words, at positions equal to or earlier than // the right-most positions found above int[] wordStartPositions; Char[] wordStartCharacters; searchCharFlags = new SearchCharFlags[searchString.Length]; FindWordStarts(title, out wordStartPositions, out wordStartCharacters); List<Char> wordStartCharsList = new List<char>(wordStartCharacters); int wordIdx = 0; for (int idx = 0; idx < searchCharPositions.Length; idx++) { int val = searchCharPositions[idx]; Char searchChar = searchString[idx]; // Does the character at this index start a word? int tempIdx = wordStartCharsList.IndexOf(searchChar, wordIdx); if (tempIdx != -1 && wordStartPositions[tempIdx] <= val) { // A word starting with this character is present in the string on or before // the right-most occurence of this char. searchCharFlags[idx] = SearchCharFlags.WordBoundary; wordIdx = tempIdx + 1; if (wordIdx == wordStartCharacters.Length) { // No more words break; } } else { // No word starting with this character. pick up the word-start search for subsequent // characters, after the nearest occurence of this character. int nearestCharIdx = title.IndexOf(searchString[idx], wordStartPositions[wordIdx]); if (nearestCharIdx == -1) { // This character isn't present past the current word, which means no further // characters will be either; abort the search break; } // Find the first word start index AFTER this char wordIdx = -1; for (int wordStartPosIdx = 0; wordStartPosIdx < wordStartPositions.Length; wordStartPosIdx++) { int wordStartPos = wordStartPositions[wordStartPosIdx]; if (wordStartPos > nearestCharIdx) { wordIdx = wordStartPosIdx; } } if (wordIdx == -1) { // No more word starts; that's the last of the search terms that align on // word boundaries break; } } } }
private static void FindSearchCharsInTitle(string title, string searchString, out int[] searchCharPositions, out SearchCharFlags[] searchCharFlags) { //For each search character, find its right-most occurrence in title while preserving the search character order searchCharPositions = FindRightmostCharPositions(title, searchString); MarkWordBoundaries(title, searchString, searchCharPositions, out searchCharFlags); }