Exemplo n.º 1
0
        public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1)
        {
            _directory     = directory;
            _ix            = ix;
            _scorerFactory = scorerFactory;

            _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount;
        }
Exemplo n.º 2
0
 private void Scan(QueryContext queryContext, IScoringScheme scorer)
 {
     queryContext.Result = GetScoredResult(queryContext, scorer).ToDictionary(x => x.DocId, y => y);
     foreach (var child in queryContext.Children)
     {
         Scan(child, scorer);
     }
 }
Exemplo n.º 3
0
 public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1)
 {
     _directory        = directory;
     _ix               = ix;
     _scorerFactory    = scorerFactory;
     _distanceResolver = distanceResolver ?? new Levenshtein();
     _documentCount    = documentCount == -1 ? ix.DocumentCount : documentCount;
 }
Exemplo n.º 4
0
 public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1)
 {
     _directory        = directory;
     _ix               = ix;
     _scorerFactory    = scorerFactory;
     _distanceResolver = distanceResolver ?? new Levenshtein();
     _documentCount    = documentCount == -1 ? ix.DocumentCount : documentCount;
     _scoreCache       = new Dictionary <SubQuery, IList <DocumentScore> >();
 }
Exemplo n.º 5
0
        public Searcher(string directory, QueryParser parser, IScoringScheme scorer)
        {
            _directory         = directory;
            _parser            = parser;
            _scorer            = scorer;
            _trieFiles         = new ConcurrentDictionary <string, LazyTrie>();
            _docContainers     = new ConcurrentDictionary <string, DocContainer>();
            _postingContainers = new ConcurrentDictionary <string, PostingsContainer>();

            _ix = IxInfo.Load(Path.Combine(_directory, "0.ix"));
        }
Exemplo n.º 6
0
        public Searcher(string directory, QueryParser parser, IScoringScheme scorerFactory)
        {
            _directory     = directory;
            _parser        = parser;
            _scorerFactory = scorerFactory;

            _ixs = Util.GetIndexFileNamesInChronologicalOrder(directory).Select(IxInfo.Load).ToList();

            _documentCount = Util.GetDocumentCount(_ixs);

            _blockSize = Serializer.SizeOfBlock();
        }
Exemplo n.º 7
0
        public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1)
        {
            _directory     = directory;
            _ix            = ix;
            _scorerFactory = scorerFactory;
            _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount;
            _scoreCache    = new Dictionary <Query, IList <DocumentScore> >();

            var docHashesFileName = Path.Combine(_directory, string.Format("{0}.{1}", _ix.VersionId, "pk"));

            _posFileName = Path.Combine(directory, string.Format("{0}.{1}", ix.VersionId, "pos"));

            _docHashReader = new DocHashReader(docHashesFileName);
        }
Exemplo n.º 8
0
        public Searcher(string directory, QueryParser parser, IScoringScheme scorer)
        {
            _directory = directory;
            _parser    = parser;
            _scorer    = scorer;

            var initTimer = Time();
            var ixFiles   = GetIndexFileNamesInChronologicalOrder();

            _indices = ixFiles.Select(IxInfo.Load).ToDictionary(x => x.Name);

            _docReader = new DbDocumentReader(OpenDocDb());

            Log.DebugFormat("init searcher in {0}", initTimer.Elapsed);
        }
Exemplo n.º 9
0
        public IndexWriter(string directory, IAnalyzer analyzer, IScoringScheme scoringScheme)
        {
            _directory          = directory;
            _analyzer           = analyzer;
            _scoringScheme      = scoringScheme;
            _postingsFiles      = new Dictionary <string, PostingsFile>();
            _postingsContainers = new Dictionary <string, PostingsContainer>();
            _docContainers      = new Dictionary <string, DocContainer>();
            _docWorker          = new TaskQueue <Document>(1, PutDocInContainer);
            _postingsWorker     = new TaskQueue <PostingsFile>(1, PutPostingsInContainer);
            _deletions          = new List <string>();
            _trieFiles          = new Dictionary <string, LazyTrie>();

            var ixFileName = Path.Combine(directory, "1.ix");

            _ix = File.Exists(ixFileName) ? IxFile.Load(ixFileName) : new IxFile();
        }
Exemplo n.º 10
0
        public Collector(string directory, IxInfo ix, IScoringScheme scorer)
        {
            _directory = directory;
            _ix        = ix;
            _scorer    = scorer;

            var initTimer = Time();
            var dbOptions = new BPlusTree <Term, DocumentPosting[]> .OptionsV2(
                new TermSerializer(),
                new ArraySerializer <DocumentPosting>(new PostingSerializer()), new TermComparer());

            dbOptions.FileName = Path.Combine(directory, string.Format("{0}-{1}.{2}", _ix.Name, "pos", "db"));
            dbOptions.ReadOnly = true;

            _postingDb = new BPlusTree <Term, DocumentPosting[]>(dbOptions);

            Log.DebugFormat("init collector in {0}", initTimer.Elapsed);
        }
Exemplo n.º 11
0
        private IEnumerable <DocumentScore> GetScoredResult(Term term, IScoringScheme scoringScheme)
        {
            var trie = GetTrie(term.Field);

            if (_ix == null)
            {
                yield break;
            }
            var totalNumOfDocs = _ix.DocCount[term.Field];

            if (trie.ContainsToken(term.Value))
            {
                var termData = GetPostingsFile(term.Field, term.Value);
                var scorer   = scoringScheme.CreateScorer(totalNumOfDocs, termData.Postings.Count);
                foreach (var posting in termData.Postings)
                {
                    var hit = new DocumentScore(posting.Key, posting.Value, totalNumOfDocs);
                    scorer.Score(hit);
                    yield return(hit);
                }
            }
        }
Exemplo n.º 12
0
        public IEnumerable <DocumentScore> Collect(QueryContext queryContext, int page, int size, IScoringScheme scorer)
        {
            Expand(queryContext);
            Scan(queryContext, scorer);
            var scored = queryContext.Resolve().Values.OrderByDescending(s => s.Score).ToList();

            return(scored);
        }