Esempio n. 1
0
        //private void RebuildIndexes()
        //{
        //    try
        //    {
        //        var timer = new Stopwatch();
        //        var batchTimer = new Stopwatch();

        //        timer.Start();

        //        var files = Directory.GetFiles(Dir, "*.docs");

        //        _log.Log(string.Format("re-indexing process found {0} document files", files.Length));

        //        foreach (var docFileName in files)
        //        {
        //            var name = Path.GetFileNameWithoutExtension(docFileName)
        //                .Split(".", StringSplitOptions.RemoveEmptyEntries);

        //            var collectionId = ulong.Parse(name[0]);

        //            using (var readSession = new DocumentReadSession(collectionId, this))
        //            {
        //                foreach (var batch in readSession.ReadDocs().Batch(1000))
        //                {
        //                    batchTimer.Restart();

        //                    using (var writeSession = new LocalStorageSessionFactory(Dir, new LatinTokenizer()).CreateWriteSession(collectionId))
        //                    {
        //                        var job = new IndexJob(collectionId, batch);

        //                        writeSession.WriteToIndex(job);
        //                    }
        //                    Console.WriteLine("wrote batch to index {0} in {1}", collectionId, batchTimer.Elapsed);
        //                }
        //            }
        //        }

        //        _log.Log(string.Format("rebuilt {0} indexes in {1}", files.Length, timer.Elapsed));
        //    }
        //    catch (Exception ex)
        //    {
        //        _log.Log(ex.ToString());
        //        throw;
        //    }

        //}

        public void LoadIndex()
        {
            var ix = new SortedList <ulong, SortedList <long, VectorNode> >();

            foreach (var ixFileName in Directory.GetFiles(Dir, "*.ix"))
            {
                var name = Path.GetFileNameWithoutExtension(ixFileName)
                           .Split(".", StringSplitOptions.RemoveEmptyEntries);

                var collectionHash = ulong.Parse(name[0]);
                var keyId          = long.Parse(name[1]);
                var vecFileName    = Path.Combine(Dir, string.Format("{0}.vec", collectionHash));

                SortedList <long, VectorNode> colIx;

                if (!ix.TryGetValue(collectionHash, out colIx))
                {
                    colIx = new SortedList <long, VectorNode>();
                    ix.Add(collectionHash, colIx);
                }

                var root = DeserializeIndex(ixFileName, vecFileName);
                ix[collectionHash].Add(keyId, root);
            }

            _index = new VectorTree(ix);
        }
Esempio n. 2
0
 public SessionFactory(string dir)
 {
     _keys                     = LoadKeyMap(dir);
     _index                    = DeserializeTree(dir);
     _dir                      = dir;
     _valueStream              = CreateReadWriteStream(Path.Combine(dir, "_.val"));
     _writableValueStream      = CreateAppendStream(Path.Combine(dir, "_.val"));
     _valueIndexStream         = CreateReadWriteStream(Path.Combine(dir, "_.vix"));
     _writableValueIndexStream = CreateAppendStream(Path.Combine(dir, "_.vix"));
     _writableKeyMapStream     = new FileStream(Path.Combine(dir, "_.kmap"), FileMode.Append, FileAccess.Write, FileShare.ReadWrite);
 }
Esempio n. 3
0
        private async Task LoadIndex()
        {
            try
            {
                var timer = new Stopwatch();
                timer.Start();

                _log.Log("begin loading index into memory");

                var ixs        = new ConcurrentDictionary <ulong, SortedList <long, VectorNode> >();
                var indexFiles = Directory.GetFiles(Dir, "*.ix");

                foreach (var ixFileName in indexFiles)
                {
                    var name = Path.GetFileNameWithoutExtension(ixFileName)
                               .Split(".", StringSplitOptions.RemoveEmptyEntries);

                    var collectionHash = ulong.Parse(name[0]);
                    var keyId          = long.Parse(name[1]);
                    var vecFileName    = Path.Combine(Dir, string.Format("{0}.{1}.vec", collectionHash, keyId));

                    SortedList <long, VectorNode> colIx;

                    if (!ixs.TryGetValue(collectionHash, out colIx))
                    {
                        colIx = new SortedList <long, VectorNode>();
                        ixs.GetOrAdd(collectionHash, colIx);
                    }

                    var ix = await DeserializeIndex(ixFileName, vecFileName);

                    colIx.Add(keyId, ix);

                    _log.Log(string.Format("loaded {0}.{1}. {2}",
                                           collectionHash, keyId, ix.Size()));
                }

                _index = new VectorTree(ixs);

                if (indexFiles.Length == 0)
                {
                    _log.Log("found no index files in {0}. index is empty.", Dir);
                }
                else
                {
                    _log.Log("deserialized {0} index files in {1}", indexFiles.Length, timer.Elapsed);

                    // validate
                    foreach (var validateFn in Directory.GetFiles(Dir, "*.validate"))
                    {
                        _log.Log("validating {0}", validateFn);

                        var fi    = new FileInfo(validateFn);
                        var segs  = Path.GetFileNameWithoutExtension(fi.Name).Split('.');
                        var col   = ulong.Parse(segs[0]);
                        var key   = long.Parse(segs[1]);
                        var colIx = ixs[col];
                        var ix    = colIx[key];

                        foreach (var token in File.ReadAllLines(validateFn))
                        {
                            var closestMatch = ix.ClosestMatch(new VectorNode(token), skipDirtyNodes: false);

                            if (closestMatch.Score < VectorNode.IdenticalAngle)
                            {
                                throw new DataMisalignedException();
                            }
                            else
                            {
                                File.Delete(validateFn);
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                _log.Log(ex);

                throw;
            }
        }