示例#1
0
        private readonly Dictionary <int, List <Token> > materializedWords = new Dictionary <int, List <Token> >(); // Pooling? Configuration for expected unique words per document?

        public void MergeOrAdd(TokenHash hash, StringBuilder word, WordLocation location)
        {
            if (this.materializedWords.TryGetValue(hash.HashValue, out var existingEntries))
            {
                foreach (var existingEntry in existingEntries)
                {
                    if (word.SequenceEqual(existingEntry.Value))
                    {
                        existingEntry.AddLocation(location);
                        return;
                    }
                }

                existingEntries.Add(new Token(word.ToString(), location));
            }
            else
            {
                this.materializedWords.Add(
                    hash.HashValue,
                    new List <Token>()
                {
                    new Token(word.ToString(), location)
                });
            }
        }
示例#2
0
文件: Token.cs 项目: lulzzz/lifti
 public Token(string token, WordLocation location)
 {
     this.locations = new List <WordLocation> {
         location
     };
     this.Value = token;
 }
示例#3
0
        private void ReadLocations(int locationCount, List <WordLocation> locationMatches)
        {
            WordLocation?lastLocation = null;

            for (var locationMatch = 0; locationMatch < locationCount; locationMatch++)
            {
                var          structureType = (LocationEntryStructure)this.reader.ReadByte();
                WordLocation location;
                if (structureType == LocationEntryStructure.Full)
                {
                    location = new WordLocation(this.reader.ReadInt32(), this.reader.ReadInt32(), this.reader.ReadUInt16());
                }
                else
                {
                    if (lastLocation == null)
                    {
                        throw new DeserializationException(ExceptionMessages.MalformedDataExpectedFullLocationEntry);
                    }

                    location = this.DeserializeLocationData(lastLocation.Value, structureType);
                }

                locationMatches.Add(location);
                lastLocation = location;
            }
        }
        public void GetWord_GivenTooLongWord_ThrowsProperException()
        {
            var w = new WordSearch(ThreeByFourGrid());
            var l = new WordLocation(0, 0, 1, 1, 10);
            var e = Assert.Throws <ArgumentException>(() => w.GetWord(l));

            Assert.Equal(WordSearch.LocationOutOfBoundsError, e.Message);
        }
        public void FormatWithSolutions_WithSingleCharFormatterWithOneSolution_FormatsCorrectly()
        {
            var f        = new WordSearchFormatter(new TestSolutionFormatter());
            var expected = "* b c\nd * f\ng h *\nj k l";
            var location = new WordLocation(0, 0, 1, 1, 3);

            Assert.Equal(expected, f.Format(ThreeByFourWordSearch(), new [] { location }));
        }
        public void FormatWithSolutions_WithSingleCharFormatterTooLittleSpacing_ThrowsProperException()
        {
            var f        = new WordSearchFormatter(new ParenthesesSolutionFormatter(), 0);
            var location = new WordLocation(0, 0, 1, 1, 3);
            var e        = Assert.Throws <FormatException>(() => f.Format(ThreeByFourWordSearch(), new[] { location }));

            Assert.Equal(WordSearchFormatter.TooLittleSpacingError, e.Message);
        }
示例#7
0
        public void WordsWithTheSameValues_ShouldBeEqual()
        {
            var firstWord  = new WordLocation(1, 3, 100);
            var secondWord = new WordLocation(1, 3, 100);

            (firstWord == secondWord).Should().BeTrue();
            (firstWord != secondWord).Should().BeFalse();
            firstWord.Should().Be(secondWord);
        }
        public void GetWord_GivenOneLengthWordLocation_ReturnsTheSingleCharacter()
        {
            var w = new WordSearch(ThreeByFourGrid());

            w.Chars[1, 1] = 'A';
            var l = new WordLocation(1, 1, 1, 1, 1);

            Assert.Equal("A", w.GetWord(l));
        }
示例#9
0
        public void WordsWithDifferentValues_ShouldNotBeEqual()
        {
            var firstWord  = new WordLocation(1, 3, 100);
            var secondWord = new WordLocation(1, 4, 100);

            (firstWord == secondWord).Should().BeFalse();
            (firstWord != secondWord).Should().BeTrue();
            firstWord.Should().NotBe(secondWord);
        }
        public void FormatWithSolutions_WithLongFormatterTooLittleSpacing_ThrowsProperException()
        {
            var f = new WordSearchFormatter(new TestSolutionFormatter {
                Length = 21
            }, 9);
            var location = new WordLocation(0, 0, 1, 1, 3);
            var e        = Assert.Throws <FormatException>(() => f.Format(ThreeByFourWordSearch(), new[] { location }));

            Assert.Equal(WordSearchFormatter.TooLittleSpacingError, e.Message);
        }
示例#11
0
 public static WordLocation1 ConvertToDto(WordLocation w)
 {
     return(new WordLocation1()
     {
         BookSenteceID = w.BookSenteceID,
         Counter = w.Counter,
         ID = w.ID,
         SearchId = w.SearchId,
         SubjectId = w.SubjectId,
     });
 }
示例#12
0
        public void StaticInstances_Title()
        {
            WordLocation loc1 = WordLocation.Title;
            WordLocation loc2 = WordLocation.Title;

            Assert.Equal("Title", loc1.ToString());
            Assert.Equal("Title", loc2.ToString());

            Assert.True(loc1 == loc2, "loc1 should equal loc2");
            Assert.True(loc1.Equals(loc2), "loc1 should equal loc2");
            Assert.NotSame(loc2, loc1);
        }
        public void StaticInstances_Title()
        {
            WordLocation loc1 = WordLocation.Title;
            WordLocation loc2 = WordLocation.Title;

            Assert.AreEqual("Title", loc1.ToString(), "Invalid string representation");
            Assert.AreEqual("Title", loc2.ToString(), "Invalid string representation");

            Assert.IsTrue(loc1 == loc2, "loc1 should equal loc2");
            Assert.IsTrue(loc1.Equals(loc2), "loc1 should equal loc2");
            Assert.AreNotSame(loc2, loc1, "loc1 should not be the same object as loc2");
        }
示例#14
0
        public void StaticInstances_Keywords()
        {
            WordLocation loc1 = WordLocation.Keywords;
            WordLocation loc2 = WordLocation.Keywords;

            Assert.Equal("Keywords", loc1.ToString());
            Assert.Equal("Keywords", loc2.ToString());

            Assert.True(loc1 == loc2, "loc1 should equal loc2");
            Assert.True(loc1.Equals(loc2), "loc1 should equal loc2");
            Assert.NotSame(loc2, loc1);
        }
示例#15
0
        public void StaticInstances_Content()
        {
            WordLocation loc1 = WordLocation.Content;
            WordLocation loc2 = WordLocation.Content;

            Assert.Equal("Content", loc1.ToString());
            Assert.Equal("Content", loc2.ToString());

            Assert.True(loc1 == loc2, "loc1 should equal loc2");
            Assert.True(loc1.Equals(loc2), "loc1 should equal loc2");
            Assert.NotSame(loc2, loc1);
        }
        public void FormatWithSolutions_WithLongFormatter_FormatsCorrectly()
        {
            var f = new WordSearchFormatter(new TestSolutionFormatter {
                Length = 3
            }, 3);
            var location = new WordLocation(0, 0, 1, 1, 3);
            var expected = "*** b   c\n" +
                           "d  ***  f\n" +
                           "g   h  ***\n" +
                           "j   k   l";

            Assert.Equal(expected, f.Format(ThreeByFourWordSearch(), new[] { location }));
        }
        public void FormatWithSolutions_WithEvenFormatter_PlacesSpaceAtBeginning()
        {
            var f = new WordSearchFormatter(new TestSolutionFormatter {
                Length = 2
            });
            var location = new WordLocation(0, 0, 1, 1, 3);
            var expected = "**b c\n" +
                           "d** f\n" +
                           "g h**\n" +
                           "j k l";

            Assert.Equal(expected, f.Format(ThreeByFourWordSearch(), new [] { location }));
        }
        public void GetWord_GivenTypicalWordLocation_ReturnsProperWord()
        {
            var w = new WordSearch(new[, ]
            {
                { 'a', 'b', 'c', 'd' },
                { 'e', 'f', 'g', 'h' },
                { 'i', 'j', 'k', 'l' },
                { 'm', 'n', 'o', 'p' },
                { 'q', 'r', 's', 't' }
            });
            var l = new WordLocation(2, 1, 1, 1, 3);

            Assert.Equal("jot", w.GetWord(l));
        }
示例#19
0
 private WordLocation DeserializeLocationData(WordLocation previous, LocationEntryStructure structureType)
 {
     return(new WordLocation(
                previous.WordIndex + this.DeserializeAbbreviatedData(
                    structureType,
                    LocationEntryStructure.WordIndexByte,
                    LocationEntryStructure.WordIndexUInt16),
                previous.Start + this.DeserializeAbbreviatedData(
                    structureType,
                    LocationEntryStructure.WordStartByte,
                    LocationEntryStructure.WordStartUInt16),
                ((structureType & LocationEntryStructure.LengthSameAsLast) == LocationEntryStructure.LengthSameAsLast) ?
                previous.Length :
                this.reader.ReadUInt16()));
 }
示例#20
0
        /// <summary>
        /// Find out if any exception is matched to current word.
        /// Compare word to exceptions which correspond to the location of the word
        /// </summary>
        /// <param name="processContext">A processing context which lives until the process is finished,
        /// and stores data for the process</param>
        /// <param name="wordLocation">A place of a word relative to a sentence: at first, in the middle or at the end</param>
        /// <returns></returns>
        private bool IsAnyExceptionMatch(AnalysisProcessContext processContext, WordLocation wordLocation)
        {
            IList <StopSignExceptionRule> exceptions = null;
            bool isAnyExceptionMatch = false;
            bool?exceptionsFound     = processContext?.StopSignConfiguration?.Exceptions?.TryGetValue(wordLocation, out exceptions);

            exceptionsFound = exceptionsFound.HasValue && exceptions != null && exceptions.Count() > 0;

            if (exceptionsFound == true)
            {
                isAnyExceptionMatch = exceptions.Any(exception => exception.IsMatch(processContext));
            }

            return(isAnyExceptionMatch);
        }
示例#21
0
        public void WordsWithLowerStartValues_ShouldBeLessThanWordsWithHigherStartValues(int firstWordStart, int secondWordStart)
        {
            var firstWord  = new WordLocation(1, firstWordStart, 100);
            var secondWord = new WordLocation(1, secondWordStart, 100);

            (firstWord < secondWord).Should().BeTrue();
            (secondWord < firstWord).Should().BeFalse();
            (firstWord > secondWord).Should().BeFalse();
            (secondWord > firstWord).Should().BeTrue();
            (firstWord <= secondWord).Should().BeTrue();
            (secondWord <= firstWord).Should().BeFalse();
            (firstWord >= secondWord).Should().BeFalse();
            (secondWord >= firstWord).Should().BeTrue();

            firstWord.Should().BeLessThan(secondWord);
            secondWord.Should().BeGreaterThan(firstWord);
            firstWord.Should().NotBe(secondWord);
        }
示例#22
0
文件: Word.cs 项目: pcstx/OA
        public void IncrementOccurence(WordLocation location)
        {
            //ScottW:
            //This may need to be Application specific since only blog has Excerpt.
            //It should not break the search logic, but just a little messy to drop it
            //in here.
            switch(location)
            {
                case WordLocation.Section:
                        _occurenceWeight += 5;
                    break;
                case WordLocation.Excerpt:
                    _occurenceWeight += 5;
                    break;
                case WordLocation.Subject:
                    _occurenceWeight += 10;
                    break;
                default:
                    _occurenceWeight++;
                    break;

            }
        }
示例#23
0
 public int DistanceFrom(WordLocation wl)
 {
     return(wl.Location - Location);
 }
示例#24
0
        /// <summary>
        /// Tokenizes a string.
        /// </summary>
        /// <param name="text">The text to tokenize.</param>
        /// <param name="location">The location of the words that are extracted.</param>
        /// <returns>The tokens.</returns>
        /// <exception cref="ArgumentNullException">If <paramref name="text"/> is <c>null</c>.</exception>
        public static WordInfo[] Tokenize(string text, WordLocation location)
        {
            if(text == null) throw new ArgumentNullException("text");

            List<WordInfo> words = new List<WordInfo>(text.Length / 5); // Average 5 chars/word

            ushort currentIndex = 0, currentWordStart;

            // Skip all trailing splitChars
            currentIndex = SkipSplitChars(0, text);

            currentWordStart = currentIndex;

            while(currentIndex < text.Length && currentIndex < 65500) {
                while(currentIndex < text.Length && !Tools.IsSplitChar(text[currentIndex])) currentIndex++;
                string w = text.Substring(currentWordStart, currentIndex - currentWordStart);
                w = Tools.RemoveDiacriticsAndPunctuation(w, true);
                if(!string.IsNullOrEmpty(w)) {
                    words.Add(new WordInfo(w, currentWordStart, (ushort)words.Count, location));
                }
                currentIndex = SkipSplitChars((ushort)(currentIndex + 1), text);
                currentWordStart = currentIndex;
            }

            return words.ToArray();
        }
示例#25
0
文件: Token.cs 项目: lulzzz/lifti
 public void AddLocation(WordLocation location)
 {
     this.locations.Add(location);
 }
示例#26
0
 public SingleWordLocationMatch(WordLocation original)
 {
     this.original = original;
 }
示例#27
0
文件: Word.cs 项目: pcstx/OA
 public Word(string word, WordLocation location)
 {
     this.word = word;
     IncrementOccurence(location);
 }
 public void StaticMethods_GetInstance_InvalidLocation(byte location)
 {
     WordLocation.GetInstance(location);
 }
 private void AssertWordLocation(int startRow, int startCol, int endRow, int endCol, WordLocation actual)
 {
     Assert.Equal(startRow, actual.StartRow);
     Assert.Equal(startCol, actual.StartCol);
     Assert.Equal(endRow, actual.EndRow);
     Assert.Equal(endCol, actual.EndCol);
 }
 public void StaticMethods_GetInstance()
 {
     Assert.AreEqual(WordLocation.Title, WordLocation.GetInstance(1), "Wrong instance");
     Assert.AreEqual(WordLocation.Keywords, WordLocation.GetInstance(2), "Wrong instance");
     Assert.AreEqual(WordLocation.Content, WordLocation.GetInstance(3), "Wrong instance");
 }
示例#31
0
        private static (LocationEntryStructure structure, int wordIndexValue, int startValue) DeriveEntryStructureInformation(WordLocation lastLocation, WordLocation location)
        {
            var relativeWordIndex = location.WordIndex - lastLocation.WordIndex;
            var relativeStart     = location.Start - lastLocation.Start;

            if (relativeWordIndex < 0 || relativeStart < 0)
            {
                Debug.Fail("Warning: This shouldn't happen");
                return(LocationEntryStructure.Full, location.WordIndex, location.Start);
            }

            var entryStructure = LocationEntryStructure.Full;

            if (relativeWordIndex <= byte.MaxValue)
            {
                entryStructure |= LocationEntryStructure.WordIndexByte;
            }
            else if (relativeWordIndex <= ushort.MaxValue)
            {
                entryStructure |= LocationEntryStructure.WordIndexUInt16;
            }

            if (relativeStart <= byte.MaxValue)
            {
                entryStructure |= LocationEntryStructure.WordStartByte;
            }
            else if (relativeStart <= ushort.MaxValue)
            {
                entryStructure |= LocationEntryStructure.WordStartUInt16;
            }

            if (lastLocation.Length == location.Length)
            {
                entryStructure |= LocationEntryStructure.LengthSameAsLast;
            }

            return(entryStructure, relativeWordIndex, relativeStart);
        }
示例#32
0
        /// <summary>
        /// Stores an occurrence.
        /// </summary>
        /// <param name="document">The document the occurrence is referred to.</param>
        /// <param name="firstCharIndex">The index of the first character of the word in the document.</param>
        /// <param name="wordIndex">The index of the word in the document.</param>
        /// <param name="location">The location of the word.</param>
        /// <remarks>Adding an occurrence is <b>O(n)</b>, where <b>n</b> is the number of occurrences 
        /// of the word already stored for the same document. If there were no occurrences previously stored, 
        /// the operation is <b>O(1)</b>.</remarks>
        /// <exception cref="ArgumentNullException">If <paramref name="document"/> is <c>null</c>.</exception>
        /// <exception cref="ArgumentOutOfRangeException">If <paramref name="firstCharIndex"/> or <paramref name="wordIndex"/> are less than zero.</exception>
        public void AddOccurrence(IDocument document, ushort firstCharIndex, ushort wordIndex, WordLocation location)
        {
            if(document == null) throw new ArgumentNullException("document");
            if(firstCharIndex < 0) throw new ArgumentOutOfRangeException("firstCharIndex", "Invalid first char index: must be greater than or equal to zero");
            if(wordIndex < 0) throw new ArgumentOutOfRangeException("wordIndex", "Invalid word index: must be greater than or equal to zero");

            lock(occurrences) {
                if(occurrences.ContainsKey(document)) {
                    // Existing document
                    occurrences[document].Add(new BasicWordInfo(firstCharIndex, wordIndex, location));
                }
                else {
                    // New document
                    SortedBasicWordInfoSet set = new SortedBasicWordInfoSet();
                    set.Add(new BasicWordInfo(firstCharIndex, wordIndex, location));
                    occurrences.Add(document, set);
                }
            }
        }
示例#33
0
        /// <summary>
        /// Stores a word in the catalog.
        /// </summary>
        /// <param name="wordText">The word to store.</param>
        /// <param name="document">The document the word occurs in.</param>
        /// <param name="firstCharIndex">The index of the first character of the word in the document the word occurs at.</param>
        /// <param name="wordIndex">The index of the word in the document.</param>
        /// <param name="location">The location of the word.</param>
        /// <param name="newWord">The new word, or <c>null</c>.</param>
        /// <param name="dumpedWord">The dumped word data, or <c>null</c>.</param>
        /// <returns>The dumped word mapping data.</returns>
        /// <remarks>Storing a word in the index is <b>O(n log n)</b>, 
        /// where <b>n</b> is the number of words already in the index.</remarks>
        protected DumpedWordMapping StoreWord(string wordText, IDocument document, ushort firstCharIndex, ushort wordIndex,
            WordLocation location, out Word newWord, out DumpedWord dumpedWord)
        {
            wordText = wordText.ToLower(CultureInfo.InvariantCulture);

            lock(this) {
                Word word = null;

                if(!catalog.TryGetValue(wordText, out word)) {
                    // Use ZERO as initial ID, update when IndexStorer has stored the word
                    // A reference to this newly-created word must be passed outside this method
                    word = new Word(0, wordText);
                    catalog.Add(wordText, word);
                    newWord = word;
                    dumpedWord = new DumpedWord(word);
                }
                else {
                    newWord = null;
                    dumpedWord = null;
                }

                word.AddOccurrence(document, firstCharIndex, wordIndex, location);
                return new DumpedWordMapping(word.ID, document.ID, firstCharIndex, wordIndex, location.Location);
            }
        }
示例#34
0
        public void StaticMethods_GetInstance_Location_High()
        {
            var ex = Assert.Throws <ArgumentOutOfRangeException>(() => WordLocation.GetInstance(4));

            Assert.Equal("Invalid location.\r\nParameter name: location", ex.Message);
        }
示例#35
0
 public void StaticMethods_GetInstance_InvalidLocation(byte location)
 {
     Assert.Throws <ArgumentOutOfRangeException>(() => WordLocation.GetInstance(location));
 }
示例#36
0
文件: Search.cs 项目: pcstx/OA
        // *********************************************************
        // Index
        //
        /// <summary>
        /// Populates a hashtable of words that will be entered into
        /// the forums search barrel.
        /// </summary>
        /// 
        protected static Hashtable Index(string contentToIndex, Hashtable words, WordLocation wordLocation, int settingsID)
        {
            // Get the ignore words
            //
            Hashtable ignoreWords = GetIgnoreWords(settingsID);

            // Get a string array of the words we want to index
            //
            string[] wordsToIndex = CleanSearchTerms(contentToIndex);

            // Ensure we have data to work with
            //
            if (wordsToIndex.Length == 0)
                return words;

            // Operate on each word in stringArrayOfWords
            //
            foreach (string word in wordsToIndex) {

                if(word != null && word.Length >= 3)
                {
                    // Get the hash code for the word
                    //
                    int hashedWord = word.ToLower().GetHashCode();

                    // Add the word to our words Hashtable
                    //
                    if (!ignoreWords.ContainsKey(hashedWord))
                    {
                        if (!words.Contains(hashedWord))
                            words.Add(hashedWord, new Word(word, wordLocation));
                        else
                            ((Word) words[hashedWord]).IncrementOccurence(wordLocation);
                    }
                }

            }

            return words;
        }