public EnglishPluralityStringNormaliser(IEnumerable <PluralEntry> plurals, IStringNormaliser optionalPreNormaliser, PreNormaliserWorkOptions preNormaliserWork)
        {
            if (plurals == null)
            {
                throw new ArgumentNullException("pluralEntries");
            }
            var allPreNormaliserOptions = (PreNormaliserWorkOptions)0;

            foreach (PreNormaliserWorkOptions option in Enum.GetValues(typeof(PreNormaliserWorkOptions)))
            {
                allPreNormaliserOptions = allPreNormaliserOptions | option;
            }
            if ((preNormaliserWork & allPreNormaliserOptions) != preNormaliserWork)
            {
                throw new ArgumentOutOfRangeException("preNormaliserWork");
            }

            var pluralsTidied = new List <PluralEntry>();

            foreach (var plural in plurals)
            {
                if (plural == null)
                {
                    throw new ArgumentException("Null reference encountered in plurals set");
                }
                pluralsTidied.Add(plural);
            }

            // Although we don't need the plurals once the normaliser has been generated in normal operation, if the instance is to be serialised then we need to record
            // them so that the normalier can be re-generated at deserialisation (as the normaliser that is generated can not be serialised - see GetObjectData)
            _plurals               = pluralsTidied;
            _normaliser            = GenerateNormaliser();
            _optionalPreNormaliser = optionalPreNormaliser;
            _preNormaliserWork     = preNormaliserWork;
        }
 public TextFileBasedTSTWordLookup(FileInfo file, IStringNormaliser stringNormaliser)
 {
     _file             = file ?? throw new ArgumentNullException(nameof(file));
     _stringNormaliser = stringNormaliser ?? throw new ArgumentNullException(nameof(stringNormaliser));
     _data             = new Lazy <TernarySearchTreeDictionary <bool> >(
         GenerateLookup,
         true                 // isThreadSafe
         );
 }
        private TernarySearchTreeDictionary(Node rootIfAny, IStringNormaliser keyNormaliser)
        {
            if (keyNormaliser == null)
            {
                throw new ArgumentNullException("keyNormaliser");
            }

            _root          = rootIfAny;
            _keyNormaliser = keyNormaliser;
        }
        public AuthUserBaseRepository(IOptions <MongoDbSettings> dbSettings, IStringNormaliser normaliser)
        {
            var client = new MongoClient(dbSettings.Value.ConnectionString);

            if (client != null)
            {
                _database = client.GetDatabase(dbSettings.Value.Database);
            }
            if (_database != null)
            {
                _users = _database.GetCollection <TAuthUser>("AuthUsers");
            }
            _normaliser = normaliser;
        }
示例#5
0
        public TernarySearchTreeDictionary(IEnumerable <KeyValuePair <string, TValue> > data, IStringNormaliser keyNormaliser)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }
            if (keyNormaliser == null)
            {
                throw new ArgumentNullException("keyNormaliser");
            }

            _root          = Add(null, keyNormaliser, data);
            _keyNormaliser = keyNormaliser;
        }
示例#6
0
 public IndexGenerator(
     NonNullImmutableList <ContentRetriever <TSource, TKey> > contentRetrievers,
     IEqualityComparer <TKey> dataKeyComparer,
     IStringNormaliser sourceStringComparer,
     ITokenBreaker tokenBreaker,
     IndexGenerator.WeightedEntryCombiner weightedEntryCombiner,
     bool captureSourceLocations,
     ILogger logger)
 {
     _contentRetrievers      = contentRetrievers ?? throw new ArgumentNullException(nameof(contentRetrievers));
     _dataKeyComparer        = dataKeyComparer ?? throw new ArgumentNullException(nameof(dataKeyComparer));
     _sourceStringComparer   = sourceStringComparer ?? throw new ArgumentNullException(nameof(sourceStringComparer));
     _tokenBreaker           = tokenBreaker ?? throw new ArgumentNullException(nameof(tokenBreaker));
     _weightedEntryCombiner  = weightedEntryCombiner ?? throw new ArgumentNullException(nameof(weightedEntryCombiner));
     _captureSourceLocations = captureSourceLocations;
     _logger = logger ?? throw new ArgumentNullException(nameof(logger));
 }
        public PostIndexGenerator(ITokenBreaker tokenBreaker, IStringNormaliser sourceStringComparer, ILogger logger)
        {
            if (tokenBreaker == null)
            {
                throw new ArgumentNullException("tokenBreaker");
            }
            if (sourceStringComparer == null)
            {
                throw new ArgumentNullException("sourceStringComparer");
            }
            if (logger == null)
            {
                throw new ArgumentNullException("logger");
            }

            _tokenBreaker         = tokenBreaker;
            _sourceStringComparer = sourceStringComparer;
            _stopWordLookup       = new HashSet <string>(Constants.GetStopWords("en"), _sourceStringComparer);      // TODO: Explain (if it helps)
            _logger = logger;
        }
示例#8
0
 public AutomatedIndexGeneratorFactory(
     Func <TSource, TKey> keyRetriever,
     IEqualityComparer <TKey> keyComparer,
     IStringNormaliser stringNormaliser,
     ITokenBreaker tokenBreaker,
     IndexGenerator.WeightedEntryCombiner weightedEntryCombiner,
     WeightDeterminerGenerator brokenTokenWeightDeterminerGenerator,
     PropertyInfo optionalPropertyForFirstContentRetriever,
     bool captureSourceLocations,
     ILogger logger)
 {
     _keyRetriever          = keyRetriever ?? throw new ArgumentNullException(nameof(keyRetriever));
     _keyComparer           = keyComparer ?? throw new ArgumentNullException(nameof(keyComparer));
     _stringNormaliser      = stringNormaliser ?? throw new ArgumentNullException(nameof(stringNormaliser));
     _tokenBreaker          = tokenBreaker ?? throw new ArgumentNullException(nameof(tokenBreaker));
     _weightedEntryCombiner = weightedEntryCombiner ?? throw new ArgumentNullException(nameof(weightedEntryCombiner));
     _brokenTokenWeightDeterminerGenerator     = brokenTokenWeightDeterminerGenerator ?? throw new ArgumentNullException(nameof(brokenTokenWeightDeterminerGenerator));
     _optionalPropertyForFirstContentRetriever = optionalPropertyForFirstContentRetriever;
     _captureSourceLocations = captureSourceLocations;
     _logger = logger ?? throw new ArgumentNullException(nameof(logger));
 }
 private AutomatedIndexGeneratorFactoryBuilder(
     Func <TSource, TKey> keyRetrieverOverride,
     IEqualityComparer <TKey> keyComparerOverride,
     IStringNormaliser stringNormaliserOverride,
     ITokenBreaker tokenBreaker,
     IndexGenerator.WeightedEntryCombiner weightedEntryCombinerOverride,
     NonNullImmutableList <IModifyMatchWeights> propertyWeightAppliers,
     AutomatedIndexGeneratorFactory <TSource, TKey> .WeightDeterminerGenerator tokenWeightDeterminerGeneratorOverride,
     PropertyInfo optionalPropertyForFirstContentRetriever,
     bool captureSourceLocations,
     ILogger loggerOverride)
 {
     _keyRetrieverOverride                     = keyRetrieverOverride;
     _keyComparerOverride                      = keyComparerOverride;
     _stringNormaliserOverride                 = stringNormaliserOverride;
     _tokenBreaker                             = tokenBreaker ?? throw new ArgumentNullException(nameof(tokenBreaker));
     _weightedEntryCombinerOverride            = weightedEntryCombinerOverride;
     _propertyWeightAppliers                   = propertyWeightAppliers ?? throw new ArgumentNullException(nameof(propertyWeightAppliers));
     _tokenWeightDeterminerGeneratorOverride   = tokenWeightDeterminerGeneratorOverride;
     _optionalPropertyForFirstContentRetriever = optionalPropertyForFirstContentRetriever;
     _captureSourceLocations                   = captureSourceLocations;
     _loggerOverride                           = loggerOverride;
 }
示例#10
0
        private static IndexData <TKey> RebuildIndexFromMatchDataAndReferencedKeys(
            BinaryReader reader,
            ImmutableList <TKey> keys,
            IStringNormaliser stringNormaliser,
            IEqualityComparer <TKey> keyComparer)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }
            if (keys == null)
            {
                throw new ArgumentNullException("keys");
            }
            if (stringNormaliser == null)
            {
                throw new ArgumentNullException("stringNormaliser");
            }
            if (keyComparer == null)
            {
                throw new ArgumentNullException("keyComparer");
            }

            var numberOfTokens  = reader.ReadInt32();
            var matchDictionary = new Dictionary <string, NonNullImmutableList <WeightedEntry <TKey> > >(stringNormaliser);

            for (var tokenIndex = 0; tokenIndex < numberOfTokens; tokenIndex++)
            {
                var token = reader.ReadString();
                var numberOfMatchesForToken = reader.ReadInt32();
                var matches = NonNullImmutableList <WeightedEntry <TKey> > .Empty;
                for (var matchIndex = 0; matchIndex < numberOfMatchesForToken; matchIndex++)
                {
                    var keyIndex = reader.ReadInt32();
                    if ((keyIndex < 0) || (keyIndex >= keys.Count))
                    {
                        throw new Exception("Invalid keyIndex (" + keyIndex + ")");
                    }

                    var matchWeight = reader.ReadSingle();

                    var numberOfSourceLocations = reader.ReadInt32();
                    NonNullImmutableList <SourceFieldLocation> sourceLocationsIfRecorded;
                    if (numberOfSourceLocations == 0)
                    {
                        sourceLocationsIfRecorded = null;
                    }
                    else
                    {
                        sourceLocationsIfRecorded = NonNullImmutableList <SourceFieldLocation> .Empty;
                        for (var sourceLocationIndex = 0; sourceLocationIndex < numberOfSourceLocations; sourceLocationIndex++)
                        {
                            sourceLocationsIfRecorded = sourceLocationsIfRecorded.Add(
                                new SourceFieldLocation(
                                    reader.ReadInt32(),
                                    reader.ReadInt32(),
                                    reader.ReadInt32(),
                                    reader.ReadInt32(),
                                    reader.ReadSingle()
                                    )
                                );
                        }
                    }

                    matches = matches.Add(
                        new WeightedEntry <TKey>(
                            keys[keyIndex],
                            matchWeight,
                            sourceLocationsIfRecorded
                            )
                        );
                }
                matchDictionary.Add(token, matches);
            }

            return(new IndexData <TKey>(
                       new TernarySearchTreeDictionary <NonNullImmutableList <WeightedEntry <TKey> > >(
                           matchDictionary,
                           stringNormaliser
                           ),
                       keyComparer
                       ));
        }
示例#11
0
        private static IIndexData <int> GenerateIndexData(NonNullImmutableList <Post> posts, IStringNormaliser sourceStringComparer, ITokenBreaker tokenBreaker)
        {
            if (posts == null)
            {
                throw new ArgumentNullException(nameof(posts));
            }
            if (sourceStringComparer == null)
            {
                throw new ArgumentNullException(nameof(sourceStringComparer));
            }
            if (tokenBreaker == null)
            {
                throw new ArgumentNullException(nameof(tokenBreaker));
            }

            // The Post (plain text) content is always the first field since its Content Retriever is first, this means that all source locations for the content
            // will have an SourceFieldIndex of zero
            var contentRetrievers = new List <ContentRetriever <Post, int> >
            {
                new ContentRetriever <Post, int>(
                    p => new PreBrokenContent <int>(p.Id, p.GetContentAsPlainText()),
                    GetTokenWeightDeterminer(1f, sourceStringComparer)
                    ),
                new ContentRetriever <Post, int>(
                    p => new PreBrokenContent <int>(p.Id, p.Title),
                    GetTokenWeightDeterminer(5f, sourceStringComparer)
                    ),
                new ContentRetriever <Post, int>(
                    p => new PreBrokenContent <int>(p.Id, new NonNullOrEmptyStringList(p.Tags.Select(tag => tag.Tag))),
                    GetTokenWeightDeterminer(3f, sourceStringComparer)
                    )
            };

            return(new IndexGenerator <Post, int>(
                       contentRetrievers.ToNonNullImmutableList(),
                       new DefaultEqualityComparer <int>(),
                       sourceStringComparer,
                       tokenBreaker,
                       weightedValues => weightedValues.Sum(),
                       captureSourceLocations: true,
                       new NullLogger()
                       ).Generate(posts.ToNonNullImmutableList()));
        }
示例#12
0
        private static ContentRetriever <Post, int> .BrokenTokenWeightDeterminer GetTokenWeightDeterminer(float multiplier, IStringNormaliser sourceStringComparer)
        {
            if (multiplier <= 0)
            {
                throw new ArgumentOutOfRangeException(nameof(multiplier), "must be greater than zero");
            }
            if (sourceStringComparer == null)
            {
                throw new ArgumentNullException(nameof(sourceStringComparer));
            }

            // Constructing a HashSet of the normalised versions of the stop words means that looking up whether normalised tokens are stop
            // words can be a lot faster (as neither the stop words nor the token need to be fed through the normaliser again)
            var hashSetOfNormalisedStopWords = new HashSet <string>(
                FullTextIndexer.Core.Constants.GetStopWords("en").Select(word => sourceStringComparer.GetNormalisedString(word))
                );

            return(normalisedToken => multiplier * (hashSetOfNormalisedStopWords.Contains(normalisedToken) ? 0.01f : 1f));
        }
        public TernarySearchTreeStructDictionary(IEnumerable <KeyValuePair <string, TValue> > data, IStringNormaliser keyNormaliser)
        {
            if (data == null)
            {
                throw new ArgumentNullException("data");
            }
            if (keyNormaliser == null)
            {
                throw new ArgumentNullException("keyNormaliser");
            }

            var nodes = new List <Node> {
                GetUnintialisedNode()
            };
            var values = new List <TValue>();
            var keys   = new HashSet <string>(keyNormaliser);

            foreach (var entry in data)
            {
                var key = entry.Key;
                if (key == null)
                {
                    throw new ArgumentException("Null key encountered in data");
                }
                var normalisedKey = keyNormaliser.GetNormalisedString(key);
                if (normalisedKey == "")
                {
                    throw new ArgumentException("key value results in blank string when normalised: " + key);
                }
                if (keys.Contains(normalisedKey))
                {
                    throw new ArgumentException("key value results in duplicate normalised key:" + key);
                }
                keys.Add(key);

                if (nodes[0].Character == (char)0)
                {
                    nodes[0] = SetCharacter(nodes[0], normalisedKey[0]);
                }

                var nodeIndex = 0;
                var keyIndex  = 0;
                while (true)
                {
                    if (nodes[nodeIndex].Character == normalisedKey[keyIndex])
                    {
                        keyIndex++;
                        if (keyIndex == normalisedKey.Length)
                        {
                            var newValueIndex = values.Count;
                            values.Add(entry.Value);
                            nodes[nodeIndex] = SetValueIndex(nodes[nodeIndex], newValueIndex);
                            break;
                        }
                        if (nodes[nodeIndex].MiddleChildIndex == -1)
                        {
                            var newNode      = SetCharacter(GetUnintialisedNode(), normalisedKey[keyIndex]);
                            var newNodeIndex = nodes.Count;
                            nodes.Add(newNode);
                            nodes[nodeIndex] = SetMiddleChildIndex(nodes[nodeIndex], newNodeIndex);
                            nodeIndex        = newNodeIndex;
                        }
                        else
                        {
                            nodeIndex = nodes[nodeIndex].MiddleChildIndex;
                        }
                        continue;
                    }
                    else if (normalisedKey[keyIndex] < nodes[nodeIndex].Character)
                    {
                        if (nodes[nodeIndex].LeftChildIndex == -1)
                        {
                            var newNode      = SetCharacter(GetUnintialisedNode(), normalisedKey[keyIndex]);
                            var newNodeIndex = nodes.Count;
                            nodes.Add(newNode);
                            nodes[nodeIndex] = SetLeftChildIndex(nodes[nodeIndex], newNodeIndex);
                            nodeIndex        = newNodeIndex;
                        }
                        else
                        {
                            nodeIndex = nodes[nodeIndex].LeftChildIndex;
                        }
                    }
                    else
                    {
                        if (nodes[nodeIndex].RightChildIndex == -1)
                        {
                            var newNode      = SetCharacter(GetUnintialisedNode(), normalisedKey[keyIndex]);
                            var newNodeIndex = nodes.Count;
                            nodes.Add(newNode);
                            nodes[nodeIndex] = SetRightChildIndex(nodes[nodeIndex], newNodeIndex);
                            nodeIndex        = newNodeIndex;
                        }
                        else
                        {
                            nodeIndex = nodes[nodeIndex].RightChildIndex;
                        }
                    }
                }
            }

            _nodes         = nodes.ToArray();
            _values        = values.ToArray();
            _keyNormaliser = keyNormaliser;
            _keys          = keys.ToList().AsReadOnly();
        }
        public AutomatedIndexGeneratorFactoryBuilder <TSource, TKey> SetStringNormaliser(IStringNormaliser stringNormaliser)
        {
            if (stringNormaliser == null)
            {
                throw new ArgumentNullException("stringNormaliser");
            }

            return(new AutomatedIndexGeneratorFactoryBuilder <TSource, TKey>(
                       _keyRetrieverOverride,
                       _keyComparerOverride,
                       stringNormaliser,
                       _tokenBreaker,
                       _weightedEntryCombinerOverride,
                       _propertyWeightAppliers,
                       _tokenWeightDeterminerGeneratorOverride,
                       _optionalPropertyForFirstContentRetriever,
                       _captureSourceLocations,
                       _loggerOverride
                       ));
        }
        private AutomatedIndexGeneratorFactory <TSource, TKey> .WeightDeterminerGenerator GetDefaultTokenWeightDeterminerGenerator(IStringNormaliser stringNormaliser)
        {
            if (stringNormaliser == null)
            {
                throw new ArgumentNullException("stringNormaliser");
            }

            // Constructing a HashSet of the normalised versions of the stop words means that looking up whether normalised tokens are stop
            // words can be a lot faster (as neither the stop words nor the token need to be fed through the normaliser again)
            var hashSetOfNormalisedStopWords = new HashSet <string>(
                Constants.GetStopWords("en").Select(word => stringNormaliser.GetNormalisedString(word))
                );

            return(property =>
            {
                // Reverse the propertyWeightAppliers so that later values added to the set take precedence (eg. if, for some reason, a x5 weight is
                // given to a property and then later it's set to be ignored, then we want to ignore it - which this will achieve)
                var propertyWeightApplier = _propertyWeightAppliers.Reverse().FirstOrDefault(p => p.AppliesTo(property));
                if ((propertyWeightApplier != null) && (propertyWeightApplier.WeightMultiplier == 0))
                {
                    // A weight multiplier of zero means ignore this property, as does returning null from a WeightDeterminerGenerator call
                    return null;
                }

                var weightMultiplier = (propertyWeightApplier != null) ? propertyWeightApplier.WeightMultiplier : 1;
                return normalisedToken => weightMultiplier * (hashSetOfNormalisedStopWords.Contains(normalisedToken) ? 0.01f : 1f);
            });
        }
示例#16
0
        /// <summary>
        /// Combine additional data with an existing root's content, returning a new Node (unless zero data entries were specified, in which case the original
        /// reference will be returned). This will throw an exception for a null keyNormalised or data (or if any keys in the data return null, empty string
        /// or duplicates when run through the keyNormaliser). If a null root is specified then a new root will be generated.
        /// </summary>
        private static Node Add(Node root, IStringNormaliser keyNormaliser, IEnumerable <KeyValuePair <string, TValue> > data)
        {
            if (keyNormaliser == null)
            {
                throw new ArgumentNullException("keyNormaliser");
            }
            if (data == null)
            {
                throw new ArgumentNullException("keys");
            }

            if (!data.Any())
            {
                return(root);
            }

            if (root != null)
            {
                root = root.Clone();
            }
            foreach (var entry in data)
            {
                var key = entry.Key;
                if (key == null)
                {
                    throw new ArgumentException("Null key encountered in data");
                }
                var normalisedKey = keyNormaliser.GetNormalisedString(key);
                if (normalisedKey == "")
                {
                    throw new ArgumentException("key value results in blank string when normalised: " + key);
                }

                if (root == null)
                {
                    root = new Node()
                    {
                        Character = normalisedKey[0]
                    }
                }
                ;

                var node  = root;
                var index = 0;
                while (true)
                {
                    if (node.Character == normalisedKey[index])
                    {
                        index++;
                        if (index == normalisedKey.Length)
                        {
                            node.Key   = normalisedKey;
                            node.Value = entry.Value;
                            break;
                        }
                        if (node.MiddleChild == null)
                        {
                            node.MiddleChild = new Node()
                            {
                                Character = normalisedKey[index], Parent = node
                            }
                        }
                        ;
                        node = node.MiddleChild;
                    }
                    else if (normalisedKey[index] < node.Character)
                    {
                        if (node.LeftChild == null)
                        {
                            node.LeftChild = new Node()
                            {
                                Character = normalisedKey[index], Parent = node
                            }
                        }
                        ;
                        node = node.LeftChild;
                    }
                    else
                    {
                        if (node.RightChild == null)
                        {
                            node.RightChild = new Node()
                            {
                                Character = normalisedKey[index], Parent = node
                            }
                        }
                        ;
                        node = node.RightChild;
                    }
                }
            }
            return(root);
        }
 public UserRepository(IOptions <MongoDbSettings> dbSettings, IStringNormaliser normaliser) : base(dbSettings, normaliser)
 {
 }
 public EnglishPluralityStringNormaliser(IStringNormaliser optionalPreNormaliser, PreNormaliserWorkOptions preNormaliserWork)
     : this(DefaultPlurals, optionalPreNormaliser, preNormaliserWork)
 {
 }
        private static Node Add(Node root, IStringNormaliser keyNormaliser, IEnumerable <KeyValuePair <string, TValue> > data, Combiner combine)
        {
            if (keyNormaliser == null)
            {
                throw new ArgumentNullException("keyNormaliser");
            }
            if (data == null)
            {
                throw new ArgumentNullException("keys");
            }
            if (combine == null)
            {
                throw new ArgumentNullException(nameof(combine));
            }

            if (!data.Any())
            {
                return(root);
            }

            if (root != null)
            {
                root = root.Clone();
            }
            foreach (var entry in data)
            {
                var key = entry.Key;
                if (key == null)
                {
                    throw new ArgumentException("Null key encountered in data");
                }
                var normalisedKey = keyNormaliser.GetNormalisedString(key);
                if (normalisedKey == "")
                {
                    throw new ArgumentException("key value results in blank string when normalised: " + key);
                }

                if (root == null)
                {
                    root = new Node()
                    {
                        Character = normalisedKey[0]
                    }
                }
                ;

                var node  = root;
                var index = 0;
                while (true)
                {
                    if (node.Character == normalisedKey[index])
                    {
                        index++;
                        if (index == normalisedKey.Length)
                        {
                            node.Value = ((node.Value != null) && (entry.Value != null))
                                                                ? combine(node.Value, entry.Value)
                                                                : ((node.Value != null) ? node.Value : entry.Value);
                            node.Key = (node.Value == null) ? null : normalisedKey;                             // If we ended up with a null Value then the Combiner may have removed it, in which case we should set the Key to null as well
                            break;
                        }
                        if (node.MiddleChild == null)
                        {
                            node.MiddleChild = new Node()
                            {
                                Character = normalisedKey[index], Parent = node
                            }
                        }
                        ;
                        node = node.MiddleChild;
                    }
                    else if (normalisedKey[index] < node.Character)
                    {
                        if (node.LeftChild == null)
                        {
                            node.LeftChild = new Node()
                            {
                                Character = normalisedKey[index], Parent = node
                            }
                        }
                        ;
                        node = node.LeftChild;
                    }
                    else
                    {
                        if (node.RightChild == null)
                        {
                            node.RightChild = new Node()
                            {
                                Character = normalisedKey[index], Parent = node
                            }
                        }
                        ;
                        node = node.RightChild;
                    }
                }
            }
            return(root);
        }
 public TernarySearchTreeDictionary(IEnumerable <KeyValuePair <string, TValue> > data, IStringNormaliser keyNormaliser)
     : this(Add(root : null, keyNormaliser : keyNormaliser, data : data, combine : _takeNewValue), keyNormaliser)
 {
 }