Beispiel #1
0
        public WriteSession(
            ulong collectionId,
            LocalStorageSessionFactory sessionFactory,
            ITokenizer tokenizer) : base(collectionId, sessionFactory)
        {
            _tokenizer  = tokenizer;
            _log        = Logging.CreateLogWriter("writesession");
            _indexQueue = new ProducerConsumerQueue <IndexJob>(Write);
            _buildQueue = new ProducerConsumerQueue <BuildJob>(Write);

            ValueStream      = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.val", collectionId)));
            KeyStream        = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.key", collectionId)));
            DocStream        = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.docs", collectionId)));
            ValueIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vix", collectionId)));
            KeyIndexStream   = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.kix", collectionId)));
            DocIndexStream   = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.dix", collectionId)));
            PostingsStream   = sessionFactory.CreateReadWriteStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.pos", collectionId)));
            VectorStream     = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vec", collectionId)));
            Index            = sessionFactory.GetCollectionIndex(collectionId);

            _vals           = new ValueWriter(ValueStream);
            _keys           = new ValueWriter(KeyStream);
            _docs           = new DocWriter(DocStream);
            _valIx          = new ValueIndexWriter(ValueIndexStream);
            _keyIx          = new ValueIndexWriter(KeyIndexStream);
            _docIx          = new DocIndexWriter(DocIndexStream);
            _postingsReader = new PagedPostingsReader(PostingsStream);
            _dirty          = new Dictionary <long, VectorNode>();
        }
Beispiel #2
0
        public WriteSession(
            string collectionId,
            SessionFactory sessionFactory) : base(collectionId, sessionFactory)
        {
            _log = Logging.CreateWriter("writesession");

            var collection = collectionId.ToHash();

            ValueStream      = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.val", collection)));
            KeyStream        = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.key", collection)));
            DocStream        = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.docs", collection)));
            ValueIndexStream = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vix", collection)));
            KeyIndexStream   = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.kix", collection)));
            DocIndexStream   = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.dix", collection)));

            _vals  = new ValueWriter(ValueStream);
            _keys  = new ValueWriter(KeyStream);
            _docs  = new DocWriter(DocStream);
            _valIx = new ValueIndexWriter(ValueIndexStream);
            _keyIx = new ValueIndexWriter(KeyIndexStream);
            _docIx = new DocIndexWriter(DocIndexStream);
        }
Beispiel #3
0
        public WriteSession(ulong collectionId, LocalStorageSessionFactory sessionFactory)
            : base(collectionId, sessionFactory)
        {
            _dirty = new Dictionary <long, VectorNode>();

            ValueStream      = sessionFactory.WritableValueStream;
            KeyStream        = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.key", collectionId)));
            DocStream        = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.docs", collectionId)));
            ValueIndexStream = sessionFactory.WritableValueIndexStream;
            KeyIndexStream   = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.kix", collectionId)));
            DocIndexStream   = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.dix", collectionId)));
            PostingsStream   = sessionFactory.CreateReadWriteStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.pos", collectionId)));
            VectorStream     = sessionFactory.CreateAppendStream(Path.Combine(sessionFactory.Dir, string.Format("{0}.vec", collectionId)));
            Index            = sessionFactory.GetIndex(collectionId);

            _vals           = new ValueWriter(ValueStream);
            _keys           = new ValueWriter(KeyStream);
            _docs           = new DocWriter(DocStream);
            _valIx          = new ValueIndexWriter(ValueIndexStream);
            _keyIx          = new ValueIndexWriter(KeyIndexStream);
            _docIx          = new DocIndexWriter(DocIndexStream);
            _postingsReader = new PagedPostingsReader(PostingsStream);
        }
Beispiel #4
0
        public void Write(IEnumerable <IDictionary> data, ITokenizer tokenizer)
        {
            var vals  = new ValueWriter(ValueStream);
            var keys  = new ValueWriter(KeyStream);
            var docs  = new DocWriter(DocStream);
            var valIx = new ValueIndexWriter(ValueIndexStream);
            var keyIx = new ValueIndexWriter(KeyIndexStream);
            var docIx = new DocIndexWriter(DocIndexStream);

            foreach (var model in data)
            {
                var docId  = docIx.GetNextDocId();
                var docMap = new List <(uint keyId, uint valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  fieldIndex = GetIndex(keyHash);
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    var  indexTokens = new List <Term>();
                    uint keyId, valId;

                    if (str != null) //TODO: implement numeric index
                    {
                        foreach (var token in tokenizer.Tokenize(str))
                        {
                            indexTokens.Add(new Term(keyStr, token));
                        }
                    }

                    if (fieldIndex == null)
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = keys.Append(keyStr);
                        keyId = keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                        SessionFactory.AddKey(keyHash, keyId);

                        // add new index to global in-memory tree
                        fieldIndex = new VectorNode();
                        Index.Add(keyId, fieldIndex);
                    }
                    else
                    {
                        keyId = SessionFactory.GetKey(keyHash);
                    }

                    foreach (var token in indexTokens)
                    {
                        var match = fieldIndex.ClosestMatch((string)token.Value);

                        if (match.Highscore < VectorNode.IdenticalAngle)
                        {
                            // We have a new unique value!

                            // store value
                            var valInfo = vals.Append(val);
                            valId = valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);
                        }
                        else
                        {
                            valId = match.ValueId;
                        }

                        // add posting to index
                        fieldIndex.Add((string)token.Value, valId, docId);

                        // store refs to keys and values
                        docMap.Add((keyId, valId));
                    }

                    var indexName = string.Format("{0}.{1}", CollectionId, keyId);
                    if (!_dirty.ContainsKey(indexName))
                    {
                        _dirty.Add(indexName, fieldIndex);
                    }
                }

                var docMeta = docs.Append(docMap);
                docIx.Append(docMeta.offset, docMeta.length);
            }
        }