public DocumentIndex(IReadOnlyList <KeyValuePair <string, byte[]> > documents)
        {
            var intCast       = documents.Select(d => d.Value.Select(x => (int)x)).ToList();
            var inputAlphabet = Enumerable.Range(0, 256 + intCast.Count);

            input        = intCast.SelectMany((d, i) => d.Concat(new[] { 256 + i })).ToList();
            tree         = new SuffixTree <int>(input, inputAlphabet);
            _docsMapping = BuildDocsMapping(documents);
        }
Beispiel #2
0
        public void SuffixTreeTests(string text, string pattern, string expected)
        {
            var alphabet     = new HashSet <char>((text + pattern).ToCharArray());
            var expectedList = expected.Length > 0
                ? expected.Split(',').Select(int.Parse).ToArray()
                : new int[] {};

            var tree   = new SuffixTree <char>(text.ToCharArray(), alphabet);
            var result = tree.FindAllOccurrences(pattern.ToCharArray());

            CollectionAssert.AreEquivalent(expectedList, result);
        }
        public DocumentIndex(IReadOnlyList <KeyValuePair <string, byte[]> > documents)
        {
            var intCast       = documents.Select(d => d.Value.Select(x => (int)x)).ToList();
            var inputAlphabet = Enumerable.Range(0, 256 + intCast.Count);

            input = new List <int>();
            filesStartPositions = new int[documents.Count];
            filesNames          = new string[documents.Count];
            for (var docIndex = 0; docIndex < documents.Count; docIndex++)
            {
                filesStartPositions[docIndex] = input.Count();
                input.AddRange(intCast[docIndex]);
                input.Add(256 + docIndex);
                filesNames[docIndex] = documents[docIndex].Key;
            }
            tree = new SuffixTree <int>(input, inputAlphabet,
                                        symbol => symbol - 255,
                                        symbol => symbol > 255);
        }