private readonly Dictionary <int, List <Token> > materializedWords = new Dictionary <int, List <Token> >(); // Pooling? Configuration for expected unique words per document? public void MergeOrAdd(TokenHash hash, StringBuilder word, WordLocation location) { if (this.materializedWords.TryGetValue(hash.HashValue, out var existingEntries)) { foreach (var existingEntry in existingEntries) { if (word.SequenceEqual(existingEntry.Value)) { existingEntry.AddLocation(location); return; } } existingEntries.Add(new Token(word.ToString(), location)); } else { this.materializedWords.Add( hash.HashValue, new List <Token>() { new Token(word.ToString(), location) }); } }
public Token(string token, WordLocation location) { this.locations = new List <WordLocation> { location }; this.Value = token; }
private void ReadLocations(int locationCount, List <WordLocation> locationMatches) { WordLocation?lastLocation = null; for (var locationMatch = 0; locationMatch < locationCount; locationMatch++) { var structureType = (LocationEntryStructure)this.reader.ReadByte(); WordLocation location; if (structureType == LocationEntryStructure.Full) { location = new WordLocation(this.reader.ReadInt32(), this.reader.ReadInt32(), this.reader.ReadUInt16()); } else { if (lastLocation == null) { throw new DeserializationException(ExceptionMessages.MalformedDataExpectedFullLocationEntry); } location = this.DeserializeLocationData(lastLocation.Value, structureType); } locationMatches.Add(location); lastLocation = location; } }
public void GetWord_GivenTooLongWord_ThrowsProperException() { var w = new WordSearch(ThreeByFourGrid()); var l = new WordLocation(0, 0, 1, 1, 10); var e = Assert.Throws <ArgumentException>(() => w.GetWord(l)); Assert.Equal(WordSearch.LocationOutOfBoundsError, e.Message); }
public void FormatWithSolutions_WithSingleCharFormatterWithOneSolution_FormatsCorrectly() { var f = new WordSearchFormatter(new TestSolutionFormatter()); var expected = "* b c\nd * f\ng h *\nj k l"; var location = new WordLocation(0, 0, 1, 1, 3); Assert.Equal(expected, f.Format(ThreeByFourWordSearch(), new [] { location })); }
public void FormatWithSolutions_WithSingleCharFormatterTooLittleSpacing_ThrowsProperException() { var f = new WordSearchFormatter(new ParenthesesSolutionFormatter(), 0); var location = new WordLocation(0, 0, 1, 1, 3); var e = Assert.Throws <FormatException>(() => f.Format(ThreeByFourWordSearch(), new[] { location })); Assert.Equal(WordSearchFormatter.TooLittleSpacingError, e.Message); }
public void WordsWithTheSameValues_ShouldBeEqual() { var firstWord = new WordLocation(1, 3, 100); var secondWord = new WordLocation(1, 3, 100); (firstWord == secondWord).Should().BeTrue(); (firstWord != secondWord).Should().BeFalse(); firstWord.Should().Be(secondWord); }
public void GetWord_GivenOneLengthWordLocation_ReturnsTheSingleCharacter() { var w = new WordSearch(ThreeByFourGrid()); w.Chars[1, 1] = 'A'; var l = new WordLocation(1, 1, 1, 1, 1); Assert.Equal("A", w.GetWord(l)); }
public void WordsWithDifferentValues_ShouldNotBeEqual() { var firstWord = new WordLocation(1, 3, 100); var secondWord = new WordLocation(1, 4, 100); (firstWord == secondWord).Should().BeFalse(); (firstWord != secondWord).Should().BeTrue(); firstWord.Should().NotBe(secondWord); }
public void FormatWithSolutions_WithLongFormatterTooLittleSpacing_ThrowsProperException() { var f = new WordSearchFormatter(new TestSolutionFormatter { Length = 21 }, 9); var location = new WordLocation(0, 0, 1, 1, 3); var e = Assert.Throws <FormatException>(() => f.Format(ThreeByFourWordSearch(), new[] { location })); Assert.Equal(WordSearchFormatter.TooLittleSpacingError, e.Message); }
public static WordLocation1 ConvertToDto(WordLocation w) { return(new WordLocation1() { BookSenteceID = w.BookSenteceID, Counter = w.Counter, ID = w.ID, SearchId = w.SearchId, SubjectId = w.SubjectId, }); }
public void StaticInstances_Title() { WordLocation loc1 = WordLocation.Title; WordLocation loc2 = WordLocation.Title; Assert.Equal("Title", loc1.ToString()); Assert.Equal("Title", loc2.ToString()); Assert.True(loc1 == loc2, "loc1 should equal loc2"); Assert.True(loc1.Equals(loc2), "loc1 should equal loc2"); Assert.NotSame(loc2, loc1); }
public void StaticInstances_Title() { WordLocation loc1 = WordLocation.Title; WordLocation loc2 = WordLocation.Title; Assert.AreEqual("Title", loc1.ToString(), "Invalid string representation"); Assert.AreEqual("Title", loc2.ToString(), "Invalid string representation"); Assert.IsTrue(loc1 == loc2, "loc1 should equal loc2"); Assert.IsTrue(loc1.Equals(loc2), "loc1 should equal loc2"); Assert.AreNotSame(loc2, loc1, "loc1 should not be the same object as loc2"); }
public void StaticInstances_Keywords() { WordLocation loc1 = WordLocation.Keywords; WordLocation loc2 = WordLocation.Keywords; Assert.Equal("Keywords", loc1.ToString()); Assert.Equal("Keywords", loc2.ToString()); Assert.True(loc1 == loc2, "loc1 should equal loc2"); Assert.True(loc1.Equals(loc2), "loc1 should equal loc2"); Assert.NotSame(loc2, loc1); }
public void StaticInstances_Content() { WordLocation loc1 = WordLocation.Content; WordLocation loc2 = WordLocation.Content; Assert.Equal("Content", loc1.ToString()); Assert.Equal("Content", loc2.ToString()); Assert.True(loc1 == loc2, "loc1 should equal loc2"); Assert.True(loc1.Equals(loc2), "loc1 should equal loc2"); Assert.NotSame(loc2, loc1); }
public void FormatWithSolutions_WithLongFormatter_FormatsCorrectly() { var f = new WordSearchFormatter(new TestSolutionFormatter { Length = 3 }, 3); var location = new WordLocation(0, 0, 1, 1, 3); var expected = "*** b c\n" + "d *** f\n" + "g h ***\n" + "j k l"; Assert.Equal(expected, f.Format(ThreeByFourWordSearch(), new[] { location })); }
public void FormatWithSolutions_WithEvenFormatter_PlacesSpaceAtBeginning() { var f = new WordSearchFormatter(new TestSolutionFormatter { Length = 2 }); var location = new WordLocation(0, 0, 1, 1, 3); var expected = "**b c\n" + "d** f\n" + "g h**\n" + "j k l"; Assert.Equal(expected, f.Format(ThreeByFourWordSearch(), new [] { location })); }
public void GetWord_GivenTypicalWordLocation_ReturnsProperWord() { var w = new WordSearch(new[, ] { { 'a', 'b', 'c', 'd' }, { 'e', 'f', 'g', 'h' }, { 'i', 'j', 'k', 'l' }, { 'm', 'n', 'o', 'p' }, { 'q', 'r', 's', 't' } }); var l = new WordLocation(2, 1, 1, 1, 3); Assert.Equal("jot", w.GetWord(l)); }
private WordLocation DeserializeLocationData(WordLocation previous, LocationEntryStructure structureType) { return(new WordLocation( previous.WordIndex + this.DeserializeAbbreviatedData( structureType, LocationEntryStructure.WordIndexByte, LocationEntryStructure.WordIndexUInt16), previous.Start + this.DeserializeAbbreviatedData( structureType, LocationEntryStructure.WordStartByte, LocationEntryStructure.WordStartUInt16), ((structureType & LocationEntryStructure.LengthSameAsLast) == LocationEntryStructure.LengthSameAsLast) ? previous.Length : this.reader.ReadUInt16())); }
/// <summary> /// Find out if any exception is matched to current word. /// Compare word to exceptions which correspond to the location of the word /// </summary> /// <param name="processContext">A processing context which lives until the process is finished, /// and stores data for the process</param> /// <param name="wordLocation">A place of a word relative to a sentence: at first, in the middle or at the end</param> /// <returns></returns> private bool IsAnyExceptionMatch(AnalysisProcessContext processContext, WordLocation wordLocation) { IList <StopSignExceptionRule> exceptions = null; bool isAnyExceptionMatch = false; bool?exceptionsFound = processContext?.StopSignConfiguration?.Exceptions?.TryGetValue(wordLocation, out exceptions); exceptionsFound = exceptionsFound.HasValue && exceptions != null && exceptions.Count() > 0; if (exceptionsFound == true) { isAnyExceptionMatch = exceptions.Any(exception => exception.IsMatch(processContext)); } return(isAnyExceptionMatch); }
public void WordsWithLowerStartValues_ShouldBeLessThanWordsWithHigherStartValues(int firstWordStart, int secondWordStart) { var firstWord = new WordLocation(1, firstWordStart, 100); var secondWord = new WordLocation(1, secondWordStart, 100); (firstWord < secondWord).Should().BeTrue(); (secondWord < firstWord).Should().BeFalse(); (firstWord > secondWord).Should().BeFalse(); (secondWord > firstWord).Should().BeTrue(); (firstWord <= secondWord).Should().BeTrue(); (secondWord <= firstWord).Should().BeFalse(); (firstWord >= secondWord).Should().BeFalse(); (secondWord >= firstWord).Should().BeTrue(); firstWord.Should().BeLessThan(secondWord); secondWord.Should().BeGreaterThan(firstWord); firstWord.Should().NotBe(secondWord); }
public void IncrementOccurence(WordLocation location) { //ScottW: //This may need to be Application specific since only blog has Excerpt. //It should not break the search logic, but just a little messy to drop it //in here. switch(location) { case WordLocation.Section: _occurenceWeight += 5; break; case WordLocation.Excerpt: _occurenceWeight += 5; break; case WordLocation.Subject: _occurenceWeight += 10; break; default: _occurenceWeight++; break; } }
public int DistanceFrom(WordLocation wl) { return(wl.Location - Location); }
/// <summary> /// Tokenizes a string. /// </summary> /// <param name="text">The text to tokenize.</param> /// <param name="location">The location of the words that are extracted.</param> /// <returns>The tokens.</returns> /// <exception cref="ArgumentNullException">If <paramref name="text"/> is <c>null</c>.</exception> public static WordInfo[] Tokenize(string text, WordLocation location) { if(text == null) throw new ArgumentNullException("text"); List<WordInfo> words = new List<WordInfo>(text.Length / 5); // Average 5 chars/word ushort currentIndex = 0, currentWordStart; // Skip all trailing splitChars currentIndex = SkipSplitChars(0, text); currentWordStart = currentIndex; while(currentIndex < text.Length && currentIndex < 65500) { while(currentIndex < text.Length && !Tools.IsSplitChar(text[currentIndex])) currentIndex++; string w = text.Substring(currentWordStart, currentIndex - currentWordStart); w = Tools.RemoveDiacriticsAndPunctuation(w, true); if(!string.IsNullOrEmpty(w)) { words.Add(new WordInfo(w, currentWordStart, (ushort)words.Count, location)); } currentIndex = SkipSplitChars((ushort)(currentIndex + 1), text); currentWordStart = currentIndex; } return words.ToArray(); }
public void AddLocation(WordLocation location) { this.locations.Add(location); }
public SingleWordLocationMatch(WordLocation original) { this.original = original; }
public Word(string word, WordLocation location) { this.word = word; IncrementOccurence(location); }
public void StaticMethods_GetInstance_InvalidLocation(byte location) { WordLocation.GetInstance(location); }
private void AssertWordLocation(int startRow, int startCol, int endRow, int endCol, WordLocation actual) { Assert.Equal(startRow, actual.StartRow); Assert.Equal(startCol, actual.StartCol); Assert.Equal(endRow, actual.EndRow); Assert.Equal(endCol, actual.EndCol); }
public void StaticMethods_GetInstance() { Assert.AreEqual(WordLocation.Title, WordLocation.GetInstance(1), "Wrong instance"); Assert.AreEqual(WordLocation.Keywords, WordLocation.GetInstance(2), "Wrong instance"); Assert.AreEqual(WordLocation.Content, WordLocation.GetInstance(3), "Wrong instance"); }
private static (LocationEntryStructure structure, int wordIndexValue, int startValue) DeriveEntryStructureInformation(WordLocation lastLocation, WordLocation location) { var relativeWordIndex = location.WordIndex - lastLocation.WordIndex; var relativeStart = location.Start - lastLocation.Start; if (relativeWordIndex < 0 || relativeStart < 0) { Debug.Fail("Warning: This shouldn't happen"); return(LocationEntryStructure.Full, location.WordIndex, location.Start); } var entryStructure = LocationEntryStructure.Full; if (relativeWordIndex <= byte.MaxValue) { entryStructure |= LocationEntryStructure.WordIndexByte; } else if (relativeWordIndex <= ushort.MaxValue) { entryStructure |= LocationEntryStructure.WordIndexUInt16; } if (relativeStart <= byte.MaxValue) { entryStructure |= LocationEntryStructure.WordStartByte; } else if (relativeStart <= ushort.MaxValue) { entryStructure |= LocationEntryStructure.WordStartUInt16; } if (lastLocation.Length == location.Length) { entryStructure |= LocationEntryStructure.LengthSameAsLast; } return(entryStructure, relativeWordIndex, relativeStart); }
/// <summary> /// Stores an occurrence. /// </summary> /// <param name="document">The document the occurrence is referred to.</param> /// <param name="firstCharIndex">The index of the first character of the word in the document.</param> /// <param name="wordIndex">The index of the word in the document.</param> /// <param name="location">The location of the word.</param> /// <remarks>Adding an occurrence is <b>O(n)</b>, where <b>n</b> is the number of occurrences /// of the word already stored for the same document. If there were no occurrences previously stored, /// the operation is <b>O(1)</b>.</remarks> /// <exception cref="ArgumentNullException">If <paramref name="document"/> is <c>null</c>.</exception> /// <exception cref="ArgumentOutOfRangeException">If <paramref name="firstCharIndex"/> or <paramref name="wordIndex"/> are less than zero.</exception> public void AddOccurrence(IDocument document, ushort firstCharIndex, ushort wordIndex, WordLocation location) { if(document == null) throw new ArgumentNullException("document"); if(firstCharIndex < 0) throw new ArgumentOutOfRangeException("firstCharIndex", "Invalid first char index: must be greater than or equal to zero"); if(wordIndex < 0) throw new ArgumentOutOfRangeException("wordIndex", "Invalid word index: must be greater than or equal to zero"); lock(occurrences) { if(occurrences.ContainsKey(document)) { // Existing document occurrences[document].Add(new BasicWordInfo(firstCharIndex, wordIndex, location)); } else { // New document SortedBasicWordInfoSet set = new SortedBasicWordInfoSet(); set.Add(new BasicWordInfo(firstCharIndex, wordIndex, location)); occurrences.Add(document, set); } } }
/// <summary> /// Stores a word in the catalog. /// </summary> /// <param name="wordText">The word to store.</param> /// <param name="document">The document the word occurs in.</param> /// <param name="firstCharIndex">The index of the first character of the word in the document the word occurs at.</param> /// <param name="wordIndex">The index of the word in the document.</param> /// <param name="location">The location of the word.</param> /// <param name="newWord">The new word, or <c>null</c>.</param> /// <param name="dumpedWord">The dumped word data, or <c>null</c>.</param> /// <returns>The dumped word mapping data.</returns> /// <remarks>Storing a word in the index is <b>O(n log n)</b>, /// where <b>n</b> is the number of words already in the index.</remarks> protected DumpedWordMapping StoreWord(string wordText, IDocument document, ushort firstCharIndex, ushort wordIndex, WordLocation location, out Word newWord, out DumpedWord dumpedWord) { wordText = wordText.ToLower(CultureInfo.InvariantCulture); lock(this) { Word word = null; if(!catalog.TryGetValue(wordText, out word)) { // Use ZERO as initial ID, update when IndexStorer has stored the word // A reference to this newly-created word must be passed outside this method word = new Word(0, wordText); catalog.Add(wordText, word); newWord = word; dumpedWord = new DumpedWord(word); } else { newWord = null; dumpedWord = null; } word.AddOccurrence(document, firstCharIndex, wordIndex, location); return new DumpedWordMapping(word.ID, document.ID, firstCharIndex, wordIndex, location.Location); } }
public void StaticMethods_GetInstance_Location_High() { var ex = Assert.Throws <ArgumentOutOfRangeException>(() => WordLocation.GetInstance(4)); Assert.Equal("Invalid location.\r\nParameter name: location", ex.Message); }
public void StaticMethods_GetInstance_InvalidLocation(byte location) { Assert.Throws <ArgumentOutOfRangeException>(() => WordLocation.GetInstance(location)); }
// ********************************************************* // Index // /// <summary> /// Populates a hashtable of words that will be entered into /// the forums search barrel. /// </summary> /// protected static Hashtable Index(string contentToIndex, Hashtable words, WordLocation wordLocation, int settingsID) { // Get the ignore words // Hashtable ignoreWords = GetIgnoreWords(settingsID); // Get a string array of the words we want to index // string[] wordsToIndex = CleanSearchTerms(contentToIndex); // Ensure we have data to work with // if (wordsToIndex.Length == 0) return words; // Operate on each word in stringArrayOfWords // foreach (string word in wordsToIndex) { if(word != null && word.Length >= 3) { // Get the hash code for the word // int hashedWord = word.ToLower().GetHashCode(); // Add the word to our words Hashtable // if (!ignoreWords.ContainsKey(hashedWord)) { if (!words.Contains(hashedWord)) words.Add(hashedWord, new Word(word, wordLocation)); else ((Word) words[hashedWord]).IncrementOccurence(wordLocation); } } } return words; }