public WriteSession( ulong collectionId, LocalStorageSessionFactory sessionFactory, ITokenizer tokenizer) : base(collectionId, sessionFactory) { _tokenizer = tokenizer; _log = Logging.CreateLogWriter("writesession"); _indexQueue = new ProducerConsumerQueue <IndexJob>(Write); _buildQueue = new ProducerConsumerQueue <BuildJob>(Write); ValueStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.val", collectionId))); KeyStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.key", collectionId))); DocStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.docs", collectionId))); ValueIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vix", collectionId))); KeyIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.kix", collectionId))); DocIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.dix", collectionId))); PostingsStream = sessionFactory.CreateReadWriteStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.pos", collectionId))); VectorStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vec", collectionId))); Index = sessionFactory.GetCollectionIndex(collectionId); _vals = new ValueWriter(ValueStream); _keys = new ValueWriter(KeyStream); _docs = new DocWriter(DocStream); _valIx = new ValueIndexWriter(ValueIndexStream); _keyIx = new ValueIndexWriter(KeyIndexStream); _docIx = new DocIndexWriter(DocIndexStream); _postingsReader = new PagedPostingsReader(PostingsStream); _dirty = new Dictionary <long, VectorNode>(); }
public WriteSession( string collectionId, SessionFactory sessionFactory) : base(collectionId, sessionFactory) { _log = Logging.CreateWriter("writesession"); var collection = collectionId.ToHash(); ValueStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.val", collection))); KeyStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.key", collection))); DocStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.docs", collection))); ValueIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vix", collection))); KeyIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.kix", collection))); DocIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.dix", collection))); _vals = new ValueWriter(ValueStream); _keys = new ValueWriter(KeyStream); _docs = new DocWriter(DocStream); _valIx = new ValueIndexWriter(ValueIndexStream); _keyIx = new ValueIndexWriter(KeyIndexStream); _docIx = new DocIndexWriter(DocIndexStream); }
public WriteSession(ulong collectionId, LocalStorageSessionFactory sessionFactory) : base(collectionId, sessionFactory) { _dirty = new Dictionary <long, VectorNode>(); ValueStream = sessionFactory.WritableValueStream; KeyStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.key", collectionId))); DocStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.docs", collectionId))); ValueIndexStream = sessionFactory.WritableValueIndexStream; KeyIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.kix", collectionId))); DocIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.dix", collectionId))); PostingsStream = sessionFactory.CreateReadWriteStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.pos", collectionId))); VectorStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vec", collectionId))); Index = sessionFactory.GetIndex(collectionId); _vals = new ValueWriter(ValueStream); _keys = new ValueWriter(KeyStream); _docs = new DocWriter(DocStream); _valIx = new ValueIndexWriter(ValueIndexStream); _keyIx = new ValueIndexWriter(KeyIndexStream); _docIx = new DocIndexWriter(DocIndexStream); _postingsReader = new PagedPostingsReader(PostingsStream); }
public void Write(IEnumerable <IDictionary> data, ITokenizer tokenizer) { var vals = new ValueWriter(ValueStream); var keys = new ValueWriter(KeyStream); var docs = new DocWriter(DocStream); var valIx = new ValueIndexWriter(ValueIndexStream); var keyIx = new ValueIndexWriter(KeyIndexStream); var docIx = new DocIndexWriter(DocIndexStream); foreach (var model in data) { var docId = docIx.GetNextDocId(); var docMap = new List <(uint keyId, uint valId)>(); foreach (var key in model.Keys) { var keyStr = key.ToString(); var keyHash = keyStr.ToHash(); var fieldIndex = GetIndex(keyHash); var val = (IComparable)model[key]; var str = val as string; var indexTokens = new List <Term>(); uint keyId, valId; if (str != null) //TODO: implement numeric index { foreach (var token in tokenizer.Tokenize(str)) { indexTokens.Add(new Term(keyStr, token)); } } if (fieldIndex == null) { // We have a new key! // store key var keyInfo = keys.Append(keyStr); keyId = keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType); SessionFactory.AddKey(keyHash, keyId); // add new index to global in-memory tree fieldIndex = new VectorNode(); Index.Add(keyId, fieldIndex); } else { keyId = SessionFactory.GetKey(keyHash); } foreach (var token in indexTokens) { var match = fieldIndex.ClosestMatch((string)token.Value); if (match.Highscore < VectorNode.IdenticalAngle) { // We have a new unique value! // store value var valInfo = vals.Append(val); valId = valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType); } else { valId = match.ValueId; } // add posting to index fieldIndex.Add((string)token.Value, valId, docId); // store refs to keys and values docMap.Add((keyId, valId)); } var indexName = string.Format("{0}.{1}", CollectionId, keyId); if (!_dirty.ContainsKey(indexName)) { _dirty.Add(indexName, fieldIndex); } } var docMeta = docs.Append(docMap); docIx.Append(docMeta.offset, docMeta.length); } }