public DocumentIndex(IReadOnlyList <KeyValuePair <string, byte[]> > documents) { var intCast = documents.Select(d => d.Value.Select(x => (int)x)).ToList(); var inputAlphabet = Enumerable.Range(0, 256 + intCast.Count); input = intCast.SelectMany((d, i) => d.Concat(new[] { 256 + i })).ToList(); tree = new SuffixTree <int>(input, inputAlphabet); _docsMapping = BuildDocsMapping(documents); }
public void SuffixTreeTests(string text, string pattern, string expected) { var alphabet = new HashSet <char>((text + pattern).ToCharArray()); var expectedList = expected.Length > 0 ? expected.Split(',').Select(int.Parse).ToArray() : new int[] {}; var tree = new SuffixTree <char>(text.ToCharArray(), alphabet); var result = tree.FindAllOccurrences(pattern.ToCharArray()); CollectionAssert.AreEquivalent(expectedList, result); }
public DocumentIndex(IReadOnlyList <KeyValuePair <string, byte[]> > documents) { var intCast = documents.Select(d => d.Value.Select(x => (int)x)).ToList(); var inputAlphabet = Enumerable.Range(0, 256 + intCast.Count); input = new List <int>(); filesStartPositions = new int[documents.Count]; filesNames = new string[documents.Count]; for (var docIndex = 0; docIndex < documents.Count; docIndex++) { filesStartPositions[docIndex] = input.Count(); input.AddRange(intCast[docIndex]); input.Add(256 + docIndex); filesNames[docIndex] = documents[docIndex].Key; } tree = new SuffixTree <int>(input, inputAlphabet, symbol => symbol - 255, symbol => symbol > 255); }