private long Merge(string srcIxFileName) { Log.InfoFormat("merging branch {0} with trunk {1}", _ixFilesToProcess[1], _ixFilesToProcess[0]); var ix = IxInfo.Load(srcIxFileName); var documentFileName = Path.Combine(_directory, ix.VersionId + ".rdoc"); long version; using (var documentStream = new RDocStream(documentFileName, ix.PrimaryKeyFieldName)) { using (var upsert = new UpsertTransaction( _directory, _analyzer, ix.Compression, documentStream)) { version = upsert.Write(); upsert.Commit(); } Log.InfoFormat("{0} merged with {1} creating a segmented index", srcIxFileName, _ixFilesToProcess[0]); } Util.RemoveAll(srcIxFileName); return(version); }
public Collector(string directory, IxInfo ix, ConcurrentDictionary <string, LazyTrie> trieFiles, ConcurrentDictionary <string, PostingsContainer> postingContainers) { _directory = directory; _trieFiles = trieFiles; _postingContainers = postingContainers; _ix = ix; }
private long Truncate(string srcIxFileName) { Log.InfoFormat("truncating {0}", srcIxFileName); var srcIx = IxInfo.Load(srcIxFileName); var documentFileName = Path.Combine(_directory, srcIx.VersionId + ".rdoc"); var docAddressFn = Path.Combine(_directory, srcIx.VersionId + ".da"); var docHashesFileName = Path.Combine(_directory, string.Format("{0}.{1}", srcIx.VersionId, "pk")); long version; using (var documentStream = new RDocStream(documentFileName, srcIx.PrimaryKeyFieldName)) { Util.TryAquireWriteLock(_directory); using (var upsert = new UpsertTransaction( _directory, _analyzer, srcIx.Compression, documentStream)) { version = upsert.Write(); upsert.Commit(); } Util.ReleaseFileLock(_directory); Log.InfoFormat("ix {0} fully truncated", _ixFilesToProcess[0]); } Util.RemoveAll(srcIxFileName); return(version); }
private IEnumerable <ScoredDocument> GetDocs(IList <DocumentScore> scores, IxInfo ix) { var docAddressFileName = Path.Combine(_directory, ix.VersionId + ".da"); IList <BlockInfo> docAdrs; using (var docAddressReader = new DocumentAddressReader( new FileStream(docAddressFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096 * 1, FileOptions.SequentialScan))) { var adrs = scores .Select(s => new BlockInfo(s.DocumentId * _blockSize, _blockSize)) .OrderBy(b => b.Position) .ToList(); docAdrs = docAddressReader.Get(adrs).ToList(); } var docFileName = Path.Combine(_directory, ix.VersionId + ".rdoc"); using (var docReader = new DocumentReader( new FileStream(docFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096 * 4, FileOptions.SequentialScan), (Compression)ix.Compression)) { var dic = scores.ToDictionary(x => x.DocumentId, y => y.Score); foreach (var doc in docReader.Get(docAdrs)) { var score = dic[doc.Id]; yield return(new ScoredDocument { Document = doc, Score = score }); } } }
public DocumentScore(int documentId, UInt64 docHash, double score, IxInfo ix) { DocumentId = documentId; Score = score; Ix = ix; DocHash = docHash; }
public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; }
public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _distanceResolver = distanceResolver ?? new Levenshtein(); _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; }
public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _distanceResolver = distanceResolver ?? new Levenshtein(); _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; _scoreCache = new Dictionary <SubQuery, IList <DocumentScore> >(); }
public Searcher(string directory, QueryParser parser, IScoringScheme scorer) { _directory = directory; _parser = parser; _scorer = scorer; _trieFiles = new ConcurrentDictionary <string, LazyTrie>(); _docContainers = new ConcurrentDictionary <string, DocContainer>(); _postingContainers = new ConcurrentDictionary <string, PostingsContainer>(); _ix = IxInfo.Load(Path.Combine(_directory, "0.ix")); }
public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1) { _directory = directory; _ix = ix; _scorerFactory = scorerFactory; _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount; _scoreCache = new Dictionary <Query, IList <DocumentScore> >(); var docHashesFileName = Path.Combine(_directory, string.Format("{0}.{1}", _ix.VersionId, "pk")); _posFileName = Path.Combine(directory, string.Format("{0}.{1}", ix.VersionId, "pos")); _docHashReader = new DocHashReader(docHashesFileName); }
public void Dispose() { foreach (var docId in _deletions) { DoRemove(docId); } Parallel.ForEach(_trieFiles, kvp => { var field = kvp.Key; var trie = kvp.Value; using (var container = new TrieWriter(field.ToTrieContainerId())) { trie.Save(container, _directory); } }); _docWorker.Dispose(); _postingsWorker.Dispose(); Parallel.ForEach(_postingsContainers.Values, container => { if (container.Count > 0) { container.Flush(_directory); container.Dispose(); } else { container.Dispose(); File.Delete(Path.Combine(_directory, container.Id + ".pc")); } }); Parallel.ForEach(_docContainers.Values, container => container.Dispose()); _ix.Save(Path.Combine(_directory, "1.ix")); var ixInfo = new IxInfo(); foreach (var field in _ix.Fields) { ixInfo.DocCount[field.Key] = field.Value.Count; } ixInfo.Save(Path.Combine(_directory, "0.ix")); }
public Collector(string directory, IxInfo ix, IScoringScheme scorer) { _directory = directory; _ix = ix; _scorer = scorer; var initTimer = Time(); var dbOptions = new BPlusTree <Term, DocumentPosting[]> .OptionsV2( new TermSerializer(), new ArraySerializer <DocumentPosting>(new PostingSerializer()), new TermComparer()); dbOptions.FileName = Path.Combine(directory, string.Format("{0}-{1}.{2}", _ix.Name, "pos", "db")); dbOptions.ReadOnly = true; _postingDb = new BPlusTree <Term, DocumentPosting[]>(dbOptions); Log.DebugFormat("init collector in {0}", initTimer.Elapsed); }
private void GetDocs(IList <DocumentScore> scores, IxInfo ix, ConcurrentBag <ScoredDocument> result) { var documentIds = scores.Select(s => s.DocumentId).ToList(); var docAddressFileName = Path.Combine(_directory, ix.VersionId + ".da"); var docFileName = Path.Combine(_directory, ix.VersionId + ".rdoc"); using (var session = _sessionFactory.Create(docAddressFileName, docFileName, ix.Compression)) { var dic = scores.ToDictionary(x => x.DocumentId, y => y.Score); foreach (var doc in session.Read(documentIds)) { var score = dic[doc.Id]; result.Add(new ScoredDocument(doc, score)); } } }
public long Commit() { if (_ixFilesToProcess.Length == 1) { // merge segments var ix = IxInfo.Load(_ixFilesToProcess[0]); if (Util.IsSegmented(_ixFilesToProcess[0])) { return(Truncate(_ixFilesToProcess[0])); } else { return(-1); } } // merge branches return(Merge(_ixFilesToProcess[1])); }
public UpsertTransaction( string directory, IAnalyzer analyzer, Compression compression, DocumentStream documents, IDocumentStoreWriter storeWriter = null) { _directory = directory; _analyzer = analyzer; _compression = compression; _documents = documents; var mainIndexVersion = Util.GetIndexFileNamesInChronologicalOrder(_directory) .FirstOrDefault(); if (mainIndexVersion == null) { _indexVersionId = Util.GetNextChronologicalFileId(); } else { if (Util.WriteLockExists(_directory) || !Util.TryAquireWriteLock(_directory)) { _indexVersionId = Util.GetNextChronologicalFileId(); } else { _indexVersionId = long.Parse(Path.GetFileNameWithoutExtension(mainIndexVersion)); var ix = IxInfo.Load(mainIndexVersion); _count = ix.DocumentCount; } } _storeWriter = storeWriter ?? new DocumentStoreWriter(directory, _indexVersionId, _compression); }
public RDocStream(string fileName, string primaryKeyFieldName = null, int skip = 0, int take = int.MaxValue) : base(primaryKeyFieldName) { var versionId = Path.GetFileNameWithoutExtension(fileName); var directory = Path.GetDirectoryName(fileName); var docFileName = Path.Combine(directory, versionId + ".rdoc"); var docAddressFn = Path.Combine(directory, versionId + ".da"); var docHashesFileName = Path.Combine(directory, string.Format("{0}.{1}", versionId, "pk")); var keyIndexFileName = Path.Combine(directory, versionId + ".kix"); var keyIndex = Util.GetKeyIndex(keyIndexFileName); _ix = IxInfo.Load(Path.Combine(directory, versionId + ".ix")); _hashReader = new DocHashReader(docHashesFileName); _addressReader = new DocumentAddressReader(new FileStream(docAddressFn, FileMode.Open, FileAccess.Read)); _documentReader = new DocumentReader( new FileStream(docFileName, FileMode.Open, FileAccess.Read), _ix.Compression, keyIndex); _skip = skip; _take = take; _directory = directory; }