//private void RebuildIndexes() //{ // try // { // var timer = new Stopwatch(); // var batchTimer = new Stopwatch(); // timer.Start(); // var files = Directory.GetFiles(Dir, "*.docs"); // _log.Log(string.Format("re-indexing process found {0} document files", files.Length)); // foreach (var docFileName in files) // { // var name = Path.GetFileNameWithoutExtension(docFileName) // .Split(".", StringSplitOptions.RemoveEmptyEntries); // var collectionId = ulong.Parse(name[0]); // using (var readSession = new DocumentReadSession(collectionId, this)) // { // foreach (var batch in readSession.ReadDocs().Batch(1000)) // { // batchTimer.Restart(); // using (var writeSession = new LocalStorageSessionFactory(Dir, new LatinTokenizer()).CreateWriteSession(collectionId)) // { // var job = new IndexJob(collectionId, batch); // writeSession.WriteToIndex(job); // } // Console.WriteLine("wrote batch to index {0} in {1}", collectionId, batchTimer.Elapsed); // } // } // } // _log.Log(string.Format("rebuilt {0} indexes in {1}", files.Length, timer.Elapsed)); // } // catch (Exception ex) // { // _log.Log(ex.ToString()); // throw; // } //} public void LoadIndex() { var ix = new SortedList <ulong, SortedList <long, VectorNode> >(); foreach (var ixFileName in Directory.GetFiles(Dir, "*.ix")) { var name = Path.GetFileNameWithoutExtension(ixFileName) .Split(".", StringSplitOptions.RemoveEmptyEntries); var collectionHash = ulong.Parse(name[0]); var keyId = long.Parse(name[1]); var vecFileName = Path.Combine(Dir, string.Format("{0}.vec", collectionHash)); SortedList <long, VectorNode> colIx; if (!ix.TryGetValue(collectionHash, out colIx)) { colIx = new SortedList <long, VectorNode>(); ix.Add(collectionHash, colIx); } var root = DeserializeIndex(ixFileName, vecFileName); ix[collectionHash].Add(keyId, root); } _index = new VectorTree(ix); }
public SessionFactory(string dir) { _keys = LoadKeyMap(dir); _index = DeserializeTree(dir); _dir = dir; _valueStream = CreateReadWriteStream(Path.Combine(dir, "_.val")); _writableValueStream = CreateAppendStream(Path.Combine(dir, "_.val")); _valueIndexStream = CreateReadWriteStream(Path.Combine(dir, "_.vix")); _writableValueIndexStream = CreateAppendStream(Path.Combine(dir, "_.vix")); _writableKeyMapStream = new FileStream(Path.Combine(dir, "_.kmap"), FileMode.Append, FileAccess.Write, FileShare.ReadWrite); }
private async Task LoadIndex() { try { var timer = new Stopwatch(); timer.Start(); _log.Log("begin loading index into memory"); var ixs = new ConcurrentDictionary <ulong, SortedList <long, VectorNode> >(); var indexFiles = Directory.GetFiles(Dir, "*.ix"); foreach (var ixFileName in indexFiles) { var name = Path.GetFileNameWithoutExtension(ixFileName) .Split(".", StringSplitOptions.RemoveEmptyEntries); var collectionHash = ulong.Parse(name[0]); var keyId = long.Parse(name[1]); var vecFileName = Path.Combine(Dir, string.Format("{0}.{1}.vec", collectionHash, keyId)); SortedList <long, VectorNode> colIx; if (!ixs.TryGetValue(collectionHash, out colIx)) { colIx = new SortedList <long, VectorNode>(); ixs.GetOrAdd(collectionHash, colIx); } var ix = await DeserializeIndex(ixFileName, vecFileName); colIx.Add(keyId, ix); _log.Log(string.Format("loaded {0}.{1}. {2}", collectionHash, keyId, ix.Size())); } _index = new VectorTree(ixs); if (indexFiles.Length == 0) { _log.Log("found no index files in {0}. index is empty.", Dir); } else { _log.Log("deserialized {0} index files in {1}", indexFiles.Length, timer.Elapsed); // validate foreach (var validateFn in Directory.GetFiles(Dir, "*.validate")) { _log.Log("validating {0}", validateFn); var fi = new FileInfo(validateFn); var segs = Path.GetFileNameWithoutExtension(fi.Name).Split('.'); var col = ulong.Parse(segs[0]); var key = long.Parse(segs[1]); var colIx = ixs[col]; var ix = colIx[key]; foreach (var token in File.ReadAllLines(validateFn)) { var closestMatch = ix.ClosestMatch(new VectorNode(token), skipDirtyNodes: false); if (closestMatch.Score < VectorNode.IdenticalAngle) { throw new DataMisalignedException(); } else { File.Delete(validateFn); } } } } } catch (Exception ex) { _log.Log(ex); throw; } }