Beispiel #1
0
        public InvertedIndex CreateIndex(params string[] documents)
        {
            var result = new InvertedIndex(documents.Length);

            try
            {
                NumericComparer ns = new NumericComparer();
                Array.Sort(documents, ns);
            }
            catch { }

            for (var i = 0; i < documents.Length; i++)
            {
                using (var reader = new StreamReader(documents[i]))
                {
                    var tokenSource = new TokenSource(reader);

                    while (tokenSource.Next())
                    {
                        if (_analyzer.Process(tokenSource))
                        {
                            result.Append(
                                new TermSegment(tokenSource.Buffer, tokenSource.Size),
                                i, tokenSource.Position);
                        }
                    }
                }
            }

            return(result);
        }
Beispiel #2
0
        public InvertedIndex CreateIndex(
            params string[] documents
            )
        {
            var result = new InvertedIndex(documents.Length);

            for (var i = 0; i < documents.Length; i++)
            {
                using (var reader = new StringReader(documents[i]))
                {
                    var tokenSource = new TokenSource(reader);

                    while (tokenSource.Next())
                    {
                        if (_analyzer.Process(tokenSource))
                        {
                            result.Append(
                                new CharArraySegmentKey(tokenSource.Buffer, tokenSource.Size),
                                i, tokenSource.Position);
                        }
                    }
                }
            }

            return(result);
        }
Beispiel #3
0
        public static Query From(string term, IAnalyzer analyzer)
        {
            using (var reader = new StringReader(term))
            {
                var tokenSource = new TokenSource(reader);
                tokenSource.Next();

                if (!analyzer.Process(tokenSource))
                {
                    throw new InvalidOperationException($"Could not generate a term from: {term}");
                }

                return(new TermQuery(tokenSource.ToString()));
            }
        }
Beispiel #4
0
        private void ProcessDocument(
            InvertedIndex result,
            int documentIndex,
            TextReader reader
            )
        {
            var tokenSource = new TokenSource(reader);

            while (tokenSource.Next())
            {
                if (_analyzer.Process(tokenSource))
                {
                    result.Append(
                        new CharArraySegmentKey(tokenSource.Buffer, tokenSource.Size),
                        documentIndex, tokenSource.Position);
                }
            }
        }
Beispiel #5
0
        public void Index(string field, TextReader indexedValue)
        {
            if (field == null)
            {
                throw new ArgumentNullException("field");
            }
            if (field.Length > 256)
            {
                throw new ArgumentException("field name cannot exceed 256 characters", "field");
            }
            if (indexedValue == null)
            {
                throw new ArgumentNullException("indexedValue");
            }

            var treeName = "@fld_" + field;

            _source = _analyzer.CreateTokenSource(field, _source);
            _source.SetReader(indexedValue);
            while (_source.Next())
            {
                if (_analyzer.Process(field, _source) == false)
                {
                    continue;
                }

                var byteCount = Encoding.UTF8.GetByteCount(_source.Buffer, 0, _source.Size);
                if (byteCount > 256)
                {
                    throw new IOException("Cannot index a term that is greater than 256 bytes, but got a term with " +
                                          byteCount + " bytes");
                }
                var bytes = _bufferPool.Take(byteCount);
                _toBeFreed.Add(bytes);
                Encoding.UTF8.GetBytes(_source.Buffer, 0, _source.Size, bytes, 0);

                _writeBatch.MultiAdd(new Slice(bytes, (ushort)byteCount), _currentDocumentIdSlice, treeName);
            }
        }