public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; }
private void Scan(QueryContext queryContext, IScoringScheme scorer) { queryContext.Result = GetScoredResult(queryContext, scorer).ToDictionary(x => x.DocId, y => y); foreach (var child in queryContext.Children) { Scan(child, scorer); } }
public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _distanceResolver = distanceResolver ?? new Levenshtein(); _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; }
public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _distanceResolver = distanceResolver ?? new Levenshtein(); _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; _scoreCache = new Dictionary <SubQuery, IList <DocumentScore> >(); }
public Searcher(string directory, QueryParser parser, IScoringScheme scorer) { _directory = directory; _parser = parser; _scorer = scorer; _trieFiles = new ConcurrentDictionary <string, LazyTrie>(); _docContainers = new ConcurrentDictionary <string, DocContainer>(); _postingContainers = new ConcurrentDictionary <string, PostingsContainer>(); _ix = IxInfo.Load(Path.Combine(_directory, "0.ix")); }
public Searcher(string directory, QueryParser parser, IScoringScheme scorerFactory) { _directory = directory; _parser = parser; _scorerFactory = scorerFactory; _ixs = Util.GetIndexFileNamesInChronologicalOrder(directory).Select(IxInfo.Load).ToList(); _documentCount = Util.GetDocumentCount(_ixs); _blockSize = Serializer.SizeOfBlock(); }
public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; _scoreCache = new Dictionary <Query, IList <DocumentScore> >(); var docHashesFileName = Path.Combine(_directory, string.Format("{0}.{1}", _ix.VersionId, "pk")); _posFileName = Path.Combine(directory, string.Format("{0}.{1}", ix.VersionId, "pos")); _docHashReader = new DocHashReader(docHashesFileName); }
public Searcher(string directory, QueryParser parser, IScoringScheme scorer) { _directory = directory; _parser = parser; _scorer = scorer; var initTimer = Time(); var ixFiles = GetIndexFileNamesInChronologicalOrder(); _indices = ixFiles.Select(IxInfo.Load).ToDictionary(x => x.Name); _docReader = new DbDocumentReader(OpenDocDb()); Log.DebugFormat("init searcher in {0}", initTimer.Elapsed); }
public IndexWriter(string directory, IAnalyzer analyzer, IScoringScheme scoringScheme) { _directory = directory; _analyzer = analyzer; _scoringScheme = scoringScheme; _postingsFiles = new Dictionary <string, PostingsFile>(); _postingsContainers = new Dictionary <string, PostingsContainer>(); _docContainers = new Dictionary <string, DocContainer>(); _docWorker = new TaskQueue <Document>(1, PutDocInContainer); _postingsWorker = new TaskQueue <PostingsFile>(1, PutPostingsInContainer); _deletions = new List <string>(); _trieFiles = new Dictionary <string, LazyTrie>(); var ixFileName = Path.Combine(directory, "1.ix"); _ix = File.Exists(ixFileName) ? IxFile.Load(ixFileName) : new IxFile(); }
public Collector(string directory, IxInfo ix, IScoringScheme scorer) { _directory = directory; _ix = ix; _scorer = scorer; var initTimer = Time(); var dbOptions = new BPlusTree <Term, DocumentPosting[]> .OptionsV2( new TermSerializer(), new ArraySerializer <DocumentPosting>(new PostingSerializer()), new TermComparer()); dbOptions.FileName = Path.Combine(directory, string.Format("{0}-{1}.{2}", _ix.Name, "pos", "db")); dbOptions.ReadOnly = true; _postingDb = new BPlusTree <Term, DocumentPosting[]>(dbOptions); Log.DebugFormat("init collector in {0}", initTimer.Elapsed); }
private IEnumerable <DocumentScore> GetScoredResult(Term term, IScoringScheme scoringScheme) { var trie = GetTrie(term.Field); if (_ix == null) { yield break; } var totalNumOfDocs = _ix.DocCount[term.Field]; if (trie.ContainsToken(term.Value)) { var termData = GetPostingsFile(term.Field, term.Value); var scorer = scoringScheme.CreateScorer(totalNumOfDocs, termData.Postings.Count); foreach (var posting in termData.Postings) { var hit = new DocumentScore(posting.Key, posting.Value, totalNumOfDocs); scorer.Score(hit); yield return(hit); } } }
public IEnumerable <DocumentScore> Collect(QueryContext queryContext, int page, int size, IScoringScheme scorer) { Expand(queryContext); Scan(queryContext, scorer); var scored = queryContext.Resolve().Values.OrderByDescending(s => s.Score).ToList(); return(scored); }