コード例 #1
0
ファイル: ScorerTests.cs プロジェクト: anelson/new_mercury
        public void MarkWordBoundariesTest()
        {
            String title = String.Format("fear{0}is{0}the{0}mind{0}killer", Normalizer.WORD_SEPARATOR);
            String search = "fmik";

            int[] searchPositions = Scorer.FindRightmostCharPositions(title, search);
            SearchCharFlags[] searchCharFlags = new SearchCharFlags[searchPositions.Length];

            Scorer.MarkWordBoundaries(title, search, searchPositions, out searchCharFlags);

            Assert.AreEqual(SearchCharFlags.WordBoundary, searchCharFlags[0]);
            Assert.AreEqual(SearchCharFlags.WordBoundary, searchCharFlags[1]);
            Assert.AreEqual(SearchCharFlags.None, searchCharFlags[2]);
            Assert.AreEqual(SearchCharFlags.WordBoundary, searchCharFlags[3]);
        }
コード例 #2
0
ファイル: Scorer.cs プロジェクト: anelson/new_mercury
        private static decimal SimpleScore(int[] searchCharPositions, SearchCharFlags[] searchCharFlags)
        {
            double maxPerCharScore = 1.0/(double)searchCharPositions.Length;

            double totalScore = 0;

            foreach (SearchCharFlags flags in searchCharFlags) {
                if ((flags & SearchCharFlags.WordBoundary) != 0 ||
                    (flags & SearchCharFlags.SurroundedChar) != 0) {
                    //This character is a word boundary, or is surrounded by matching search chars on both sides
                    totalScore += maxPerCharScore;
                } else if ((flags & SearchCharFlags.PreceedingChar) != 0 ||
                    (flags & SearchCharFlags.SucceedingChar) != 0) {
                    //Preceeds or succeeds a matching char; that's only a half score
                    totalScore += maxPerCharScore / 2.0;
                }
            }

            return (decimal)totalScore;
        }
コード例 #3
0
ファイル: Scorer.cs プロジェクト: anelson/new_mercury
        public static void MarkWordBoundaries(string title, string searchString, int[] searchCharPositions, out SearchCharFlags[] searchCharFlags)
        {
            //  Check for search letters at the start of words, at positions equal to or earlier than
            //  the right-most positions found above
            int[] wordStartPositions;
            Char[] wordStartCharacters;

            searchCharFlags = new SearchCharFlags[searchString.Length];

            FindWordStarts(title, out wordStartPositions, out wordStartCharacters);
            List<Char> wordStartCharsList = new List<char>(wordStartCharacters);

            int wordIdx = 0;

            for (int idx = 0; idx < searchCharPositions.Length; idx++)
            {
                int val = searchCharPositions[idx];
                Char searchChar = searchString[idx];

                // Does the character at this index start a word?
                int tempIdx = wordStartCharsList.IndexOf(searchChar, wordIdx);

                if (tempIdx != -1 &&
                    wordStartPositions[tempIdx] <= val)
                {
                    // A word starting with this character is present in the string on or before
                    // the right-most occurence of this char.
                    searchCharFlags[idx] = SearchCharFlags.WordBoundary;
                    wordIdx = tempIdx + 1;

                    if (wordIdx == wordStartCharacters.Length)
                    {
                        // No more words
                        break;
                    }
                }
                else
                {
                    // No word starting with this character.  pick up the word-start search for subsequent
                    // characters, after the nearest occurence of this character.
                    int nearestCharIdx = title.IndexOf(searchString[idx], wordStartPositions[wordIdx]);

                    if (nearestCharIdx == -1)
                    {
                        // This character isn't present past the current word, which means no further
                        // characters will be either; abort the search
                        break;
                    }

                    // Find the first word start index AFTER this char
                    wordIdx = -1;

                    for (int wordStartPosIdx = 0;
                        wordStartPosIdx < wordStartPositions.Length;
                        wordStartPosIdx++)
                    {
                        int wordStartPos = wordStartPositions[wordStartPosIdx];

                        if (wordStartPos > nearestCharIdx)
                        {
                            wordIdx = wordStartPosIdx;
                        }
                    }

                    if (wordIdx == -1)
                    {
                        // No more word starts; that's the last of the search terms that align on
                        // word boundaries
                        break;
                    }
                }
            }
        }
コード例 #4
0
ファイル: Scorer.cs プロジェクト: anelson/new_mercury
        private static void FindSearchCharsInTitle(string title, string searchString, out int[] searchCharPositions, out SearchCharFlags[] searchCharFlags)
        {
            //For each search character, find its right-most occurrence in title while preserving the search character order
            searchCharPositions = FindRightmostCharPositions(title, searchString);

            MarkWordBoundaries(title, searchString, searchCharPositions, out searchCharFlags);
        }