SearchAlgorithms

Overview

Project that defines a .NET search algorithms module. It currently supports the following:

String search through large dictionaries using Levenshtein Automata

The project will generate a Nuget package that can be downloaded as a 3rd party dll via Visual Studio.

Example

using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using NUnit.Framework;
using SearchAlgorithms.StringStrategies;

namespace SearchAlgorithms.Test.SearchStrategies
{
    public class LevenshteinAutomataStringStrategyTest
    {
        private IList<string> _dataset;
        private List<string> _largeDataset;
        private LevenshteinAutomataStringStrategy _strategy;
        private SortedList _sortedDataSet;

        [SetUp]
        public void Initialize()
        {
            _strategy = new LevenshteinAutomataStringStrategy();
            _SetupDataset();
            _SetupLargeDataset();
        }

        [Test]
        public void ShouldReturnStringsThatAreEqual()
        {
            IEnumerable<string> results = _strategy.Search("nice", _dataset);
            Assert.AreEqual(1, results.Count());
        }

        [Test]
        public void ShouldReturnStringsThatAreOnlyOffByAtMostOneCharacter()
        {
            IEnumerable<string> results = _strategy.Search("nice", _dataset.OrderBy(x => x).ToList(), 1);
            Assert.AreEqual(3, results.Count());
        }

        [Test]
        public void ShouldReturnStringsThatAreOnlyOffByAtMostTwoCharacters()
        {
            IEnumerable<string> results = _strategy.Search("nice", _dataset.OrderBy(x => x).ToList(), 2);
            Assert.AreEqual(5, results.Count());
        }

        [Test]
        public void ShouldReturnStringsFromLargeDatasetThatAreEqual()
        {
            IEnumerable<string> results = _strategy.Search("elephant", _largeDataset);
            Assert.AreEqual(1, results.Count());
        }

        [Test]
        public void ShouldReturnStringsFromLargeDatasetThatAreNotEqual()
        {
            string[] dataset = new string[_sortedDataSet.Count];
            _sortedDataSet.Values.CopyTo(dataset, 0);
            IEnumerable<string> results = _strategy.Search("Randy".ToLower(), dataset.ToList(), 1);

            List<string> finalResults = new List<string>();
            foreach (string item in _largeDataset)
            {
                foreach (string result in results)
                    if (item.ToLower().Contains(result))
                        finalResults.Add(item);
            }

            Assert.AreEqual(527, finalResults.Count);
        }

        private void _SetupDataset()
        {
            _dataset = new List<string>
                {
                    "applepie",
                    "nice",
                    "rice",
                    "niceso",
                    "plepie",
                    "applep",
                    "genes",
                    "beer",
                    "nici",
                    "wizened",
                    "great",
                    "bait",
                    "sonice",
                    "mate"
                };
        }

        private void _SetupLargeDataset()
        {
            _largeDataset = new List<string>();
            HashSet<SearchResult> searchResults = new HashSet<SearchResult>();
            StreamReader reader = new StreamReader(@"results.csv");
            reader.ReadLine(); //do not process header

            while (!reader.EndOfStream)
            {
                string line = reader.ReadLine();
                string[] entries = line.Split(new [] {','}, System.StringSplitOptions.None);
                SearchResult sr = new SearchResult(entries[0]);
                searchResults.Add(sr);
            }

            _largeDataset.AddRange(searchResults.Where(x => !string.IsNullOrEmpty(x.Name)).Select(x => x.Name));
            List<string> intermediateDataSet = new List<string>();
            intermediateDataSet.AddRange(_largeDataset.SelectMany(x => x.Split(new[] {' '})));

            _sortedDataSet = new SortedList(
                intermediateDataSet.Where(x => !string.IsNullOrWhiteSpace(x)).
                                    Select(x => x.ToLower().Trim()).
                                    Distinct().
                                    ToDictionary(str => str));
        }
    }
}

License

This code is released under the MIT license with no guarantees.

Name		Name	Last commit message	Last commit date
Latest commit History 25 Commits
.build		.build
.nuget		.nuget
SearchAlgorithm.Surveyors		SearchAlgorithm.Surveyors
SearchAlgorithms.Automata		SearchAlgorithms.Automata
SearchAlgorithms.Test		SearchAlgorithms.Test
SearchAlgorithms		SearchAlgorithms
lib		lib
.gitattributes		.gitattributes
.gitignore		.gitignore
Build.proj		Build.proj
README.md		README.md
SearchAlgorithms.dll.nuspec		SearchAlgorithms.dll.nuspec
SearchAlgorithms.sln		SearchAlgorithms.sln
SearchAlgorithms.sln.DotSettings		SearchAlgorithms.sln.DotSettings
SolutionInfo.cs		SolutionInfo.cs
SolutionVersionInfo.cs		SolutionVersionInfo.cs

method-inc/SearchAlgorithms

Folders and files

Latest commit

History

Repository files navigation

SearchAlgorithms

Overview

Example

License

About

Resources

Stars

Watchers

Forks

Languages