Esempio n. 1
0
        private long Merge(string srcIxFileName)
        {
            Log.InfoFormat("merging branch {0} with trunk {1}", _ixFilesToProcess[1], _ixFilesToProcess[0]);

            var  ix = IxInfo.Load(srcIxFileName);
            var  documentFileName = Path.Combine(_directory, ix.VersionId + ".rdoc");
            long version;

            using (var documentStream = new RDocStream(documentFileName, ix.PrimaryKeyFieldName))
            {
                using (var upsert = new UpsertTransaction(
                           _directory,
                           _analyzer,
                           ix.Compression,
                           documentStream))
                {
                    version = upsert.Write();
                    upsert.Commit();
                }

                Log.InfoFormat("{0} merged with {1} creating a segmented index", srcIxFileName, _ixFilesToProcess[0]);
            }
            Util.RemoveAll(srcIxFileName);
            return(version);
        }
Esempio n. 2
0
 public Collector(string directory, IxInfo ix, ConcurrentDictionary <string, LazyTrie> trieFiles, ConcurrentDictionary <string, PostingsContainer> postingContainers)
 {
     _directory         = directory;
     _trieFiles         = trieFiles;
     _postingContainers = postingContainers;
     _ix = ix;
 }
Esempio n. 3
0
        private long Truncate(string srcIxFileName)
        {
            Log.InfoFormat("truncating {0}", srcIxFileName);

            var  srcIx             = IxInfo.Load(srcIxFileName);
            var  documentFileName  = Path.Combine(_directory, srcIx.VersionId + ".rdoc");
            var  docAddressFn      = Path.Combine(_directory, srcIx.VersionId + ".da");
            var  docHashesFileName = Path.Combine(_directory, string.Format("{0}.{1}", srcIx.VersionId, "pk"));
            long version;

            using (var documentStream = new RDocStream(documentFileName, srcIx.PrimaryKeyFieldName))
            {
                Util.TryAquireWriteLock(_directory);

                using (var upsert = new UpsertTransaction(
                           _directory,
                           _analyzer,
                           srcIx.Compression,
                           documentStream))
                {
                    version = upsert.Write();
                    upsert.Commit();
                }

                Util.ReleaseFileLock(_directory);

                Log.InfoFormat("ix {0} fully truncated", _ixFilesToProcess[0]);
            }
            Util.RemoveAll(srcIxFileName);
            return(version);
        }
Esempio n. 4
0
        private IEnumerable <ScoredDocument> GetDocs(IList <DocumentScore> scores, IxInfo ix)
        {
            var docAddressFileName = Path.Combine(_directory, ix.VersionId + ".da");

            IList <BlockInfo> docAdrs;

            using (var docAddressReader = new DocumentAddressReader(
                       new FileStream(docAddressFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096 * 1, FileOptions.SequentialScan)))
            {
                var adrs = scores
                           .Select(s => new BlockInfo(s.DocumentId * _blockSize, _blockSize))
                           .OrderBy(b => b.Position)
                           .ToList();

                docAdrs = docAddressReader.Get(adrs).ToList();
            }

            var docFileName = Path.Combine(_directory, ix.VersionId + ".rdoc");

            using (var docReader = new DocumentReader(
                       new FileStream(docFileName, FileMode.Open, FileAccess.Read, FileShare.Read, 4096 * 4, FileOptions.SequentialScan),
                       (Compression)ix.Compression))
            {
                var dic = scores.ToDictionary(x => x.DocumentId, y => y.Score);

                foreach (var doc in docReader.Get(docAdrs))
                {
                    var score = dic[doc.Id];

                    yield return(new ScoredDocument {
                        Document = doc, Score = score
                    });
                }
            }
        }
Esempio n. 5
0
 public DocumentScore(int documentId, UInt64 docHash, double score, IxInfo ix)
 {
     DocumentId = documentId;
     Score      = score;
     Ix         = ix;
     DocHash    = docHash;
 }
Esempio n. 6
0
        public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1)
        {
            _directory     = directory;
            _ix            = ix;
            _scorerFactory = scorerFactory;

            _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount;
        }
Esempio n. 7
0
 public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1)
 {
     _directory        = directory;
     _ix               = ix;
     _scorerFactory    = scorerFactory;
     _distanceResolver = distanceResolver ?? new Levenshtein();
     _documentCount    = documentCount == -1 ? ix.DocumentCount : documentCount;
 }
Esempio n. 8
0
 public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, IDistanceResolver distanceResolver = null, int documentCount = -1)
 {
     _directory        = directory;
     _ix               = ix;
     _scorerFactory    = scorerFactory;
     _distanceResolver = distanceResolver ?? new Levenshtein();
     _documentCount    = documentCount == -1 ? ix.DocumentCount : documentCount;
     _scoreCache       = new Dictionary <SubQuery, IList <DocumentScore> >();
 }
Esempio n. 9
0
        public Searcher(string directory, QueryParser parser, IScoringScheme scorer)
        {
            _directory         = directory;
            _parser            = parser;
            _scorer            = scorer;
            _trieFiles         = new ConcurrentDictionary <string, LazyTrie>();
            _docContainers     = new ConcurrentDictionary <string, DocContainer>();
            _postingContainers = new ConcurrentDictionary <string, PostingsContainer>();

            _ix = IxInfo.Load(Path.Combine(_directory, "0.ix"));
        }
Esempio n. 10
0
        public Collector(string directory, IxInfo ix, IScoringScheme scorerFactory = null, int documentCount = -1)
        {
            _directory     = directory;
            _ix            = ix;
            _scorerFactory = scorerFactory;
            _documentCount = documentCount == -1 ? ix.DocumentCount : documentCount;
            _scoreCache    = new Dictionary <Query, IList <DocumentScore> >();

            var docHashesFileName = Path.Combine(_directory, string.Format("{0}.{1}", _ix.VersionId, "pk"));

            _posFileName = Path.Combine(directory, string.Format("{0}.{1}", ix.VersionId, "pos"));

            _docHashReader = new DocHashReader(docHashesFileName);
        }
Esempio n. 11
0
        public void Dispose()
        {
            foreach (var docId in _deletions)
            {
                DoRemove(docId);
            }

            Parallel.ForEach(_trieFiles, kvp =>
            {
                var field = kvp.Key;
                var trie  = kvp.Value;
                using (var container = new TrieWriter(field.ToTrieContainerId()))
                {
                    trie.Save(container, _directory);
                }
            });

            _docWorker.Dispose();
            _postingsWorker.Dispose();

            Parallel.ForEach(_postingsContainers.Values, container =>
            {
                if (container.Count > 0)
                {
                    container.Flush(_directory);
                    container.Dispose();
                }
                else
                {
                    container.Dispose();
                    File.Delete(Path.Combine(_directory, container.Id + ".pc"));
                }
            });

            Parallel.ForEach(_docContainers.Values, container => container.Dispose());

            _ix.Save(Path.Combine(_directory, "1.ix"));
            var ixInfo = new IxInfo();

            foreach (var field in _ix.Fields)
            {
                ixInfo.DocCount[field.Key] = field.Value.Count;
            }
            ixInfo.Save(Path.Combine(_directory, "0.ix"));
        }
Esempio n. 12
0
        public Collector(string directory, IxInfo ix, IScoringScheme scorer)
        {
            _directory = directory;
            _ix        = ix;
            _scorer    = scorer;

            var initTimer = Time();
            var dbOptions = new BPlusTree <Term, DocumentPosting[]> .OptionsV2(
                new TermSerializer(),
                new ArraySerializer <DocumentPosting>(new PostingSerializer()), new TermComparer());

            dbOptions.FileName = Path.Combine(directory, string.Format("{0}-{1}.{2}", _ix.Name, "pos", "db"));
            dbOptions.ReadOnly = true;

            _postingDb = new BPlusTree <Term, DocumentPosting[]>(dbOptions);

            Log.DebugFormat("init collector in {0}", initTimer.Elapsed);
        }
Esempio n. 13
0
        private void GetDocs(IList <DocumentScore> scores, IxInfo ix, ConcurrentBag <ScoredDocument> result)
        {
            var documentIds        = scores.Select(s => s.DocumentId).ToList();
            var docAddressFileName = Path.Combine(_directory, ix.VersionId + ".da");
            var docFileName        = Path.Combine(_directory, ix.VersionId + ".rdoc");

            using (var session = _sessionFactory.Create(docAddressFileName, docFileName, ix.Compression))
            {
                var dic = scores.ToDictionary(x => x.DocumentId, y => y.Score);

                foreach (var doc in session.Read(documentIds))
                {
                    var score = dic[doc.Id];

                    result.Add(new ScoredDocument(doc, score));
                }
            }
        }
Esempio n. 14
0
        public long Commit()
        {
            if (_ixFilesToProcess.Length == 1)
            {
                // merge segments

                var ix = IxInfo.Load(_ixFilesToProcess[0]);

                if (Util.IsSegmented(_ixFilesToProcess[0]))
                {
                    return(Truncate(_ixFilesToProcess[0]));
                }
                else
                {
                    return(-1);
                }
            }

            // merge branches

            return(Merge(_ixFilesToProcess[1]));
        }
Esempio n. 15
0
        public UpsertTransaction(
            string directory,
            IAnalyzer analyzer,
            Compression compression,
            DocumentStream documents,
            IDocumentStoreWriter storeWriter = null)
        {
            _directory   = directory;
            _analyzer    = analyzer;
            _compression = compression;
            _documents   = documents;

            var mainIndexVersion = Util.GetIndexFileNamesInChronologicalOrder(_directory)
                                   .FirstOrDefault();

            if (mainIndexVersion == null)
            {
                _indexVersionId = Util.GetNextChronologicalFileId();
            }
            else
            {
                if (Util.WriteLockExists(_directory) || !Util.TryAquireWriteLock(_directory))
                {
                    _indexVersionId = Util.GetNextChronologicalFileId();
                }
                else
                {
                    _indexVersionId = long.Parse(Path.GetFileNameWithoutExtension(mainIndexVersion));

                    var ix = IxInfo.Load(mainIndexVersion);

                    _count = ix.DocumentCount;
                }
            }

            _storeWriter = storeWriter ??
                           new DocumentStoreWriter(directory, _indexVersionId, _compression);
        }
Esempio n. 16
0
        public RDocStream(string fileName, string primaryKeyFieldName = null, int skip = 0, int take = int.MaxValue)
            : base(primaryKeyFieldName)
        {
            var versionId         = Path.GetFileNameWithoutExtension(fileName);
            var directory         = Path.GetDirectoryName(fileName);
            var docFileName       = Path.Combine(directory, versionId + ".rdoc");
            var docAddressFn      = Path.Combine(directory, versionId + ".da");
            var docHashesFileName = Path.Combine(directory, string.Format("{0}.{1}", versionId, "pk"));
            var keyIndexFileName  = Path.Combine(directory, versionId + ".kix");
            var keyIndex          = Util.GetKeyIndex(keyIndexFileName);

            _ix             = IxInfo.Load(Path.Combine(directory, versionId + ".ix"));
            _hashReader     = new DocHashReader(docHashesFileName);
            _addressReader  = new DocumentAddressReader(new FileStream(docAddressFn, FileMode.Open, FileAccess.Read));
            _documentReader = new DocumentReader(
                new FileStream(docFileName, FileMode.Open, FileAccess.Read),
                _ix.Compression,
                keyIndex);

            _skip      = skip;
            _take      = take;
            _directory = directory;
        }