Ejemplo n.º 1
0
        /// <summary>
        /// Fields prefixed with "__" will not be written.
        /// The "__docid" field, if it exists, will be persisted as "_original".
        /// The reason a model may already have a "__docid" field even before it has been persisted is that it originates from another collection.
        /// </summary>
        /// <returns>Document ID</returns>
        public async Task <long> Write(IDictionary model)
        {
            var timer = new Stopwatch();

            timer.Start();

            var docId  = _docIx.GetNextDocId();
            var docMap = new List <(long keyId, long valId)>();

            if (model.Contains("__docid") && !model.Contains("_original"))
            {
                model.Add("_original", model["__docid"]);
            }

            foreach (var key in model.Keys)
            {
                var keyStr = key.ToString();

                if (keyStr.StartsWith("__"))
                {
                    continue;
                }

                var  keyHash = keyStr.ToHash();
                var  val = (IComparable)model[key];
                var  str = val as string;
                long keyId, valId;

                if (!SessionFactory.TryGetKeyId(CollectionId, keyHash, out keyId))
                {
                    // We have a new key!

                    // store key
                    var keyInfo = await _keys.Append(keyStr);

                    keyId = await _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);

                    SessionFactory.PersistKeyMapping(CollectionId, keyHash, keyId);
                }

                // store value
                var valInfo = await _vals.Append(val);

                valId = await _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                // store refs to keys and values
                docMap.Add((keyId, valId));
            }

            var docMeta = await _docs.Append(docMap);

            await _docIx.Append(docMeta.offset, docMeta.length);

            model["__docid"] = docId;

            this.Log(string.Format("processed document {0} in {1}", docId, timer.Elapsed));

            return(docId);
        }
Ejemplo n.º 2
0
        public async Task <IList <ulong> > Write(WriteJob job)
        {
            var docIds   = new List <ulong>();
            var docCount = 0;
            var timer    = new Stopwatch();

            timer.Start();

            foreach (var model in job.Documents)
            {
                var docId  = _docIx.GetNextDocId();
                var docMap = new List <(long keyId, long valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    long keyId, valId;

                    if (!SessionFactory.TryGetKeyId(keyHash, out keyId))
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = await _keys.Append(keyStr);

                        keyId = await _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);

                        await SessionFactory.PersistKeyMapping(keyHash, keyId);
                    }

                    // store value
                    var valInfo = await _vals.Append(val);

                    valId = await _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                    // store refs to keys and values
                    docMap.Add((keyId, valId));
                }

                var docMeta = await _docs.Append(docMap);

                await _docIx.Append(docMeta.offset, docMeta.length);

                model.Add("__docid", docId);

                docIds.Add(docId);
                docCount++;
            }

            _log.Log(string.Format("processed {0} documents in {1}", docCount, timer.Elapsed));

            return(docIds);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Fields prefixed with "___" will not be stored.
        /// </summary>
        /// <returns>Document ID</returns>
        public void Write(IDictionary <string, object> document)
        {
            document["__created"] = DateTime.Now.ToBinary();

            var docMap = new List <(long keyId, long valId)>();
            var docId  = _docIx.GetNextDocId();

            foreach (var key in document.Keys)
            {
                var val = document[key];

                if (val == null)
                {
                    continue;
                }

                var keyStr = key.ToString();

                if (keyStr.StartsWith("___"))
                {
                    continue;
                }

                var  keyHash = keyStr.ToHash();
                long keyId;

                if (!SessionFactory.TryGetKeyId(CollectionId, keyHash, out keyId))
                {
                    // We have a new key!

                    // store key
                    var keyInfo = _keys.Append(keyStr);

                    keyId = _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                    SessionFactory.PersistKeyMapping(CollectionId, keyHash, keyId);
                }

                // store value
                var valInfo = _vals.Append(val);
                var valId   = _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                // store refs to keys and values
                docMap.Add((keyId, valId));

                // index
                if (!keyStr.StartsWith("_") && valInfo.dataType == DataType.STRING)
                {
                    _indexSession.Put(docId, keyId, (string)val);
                }
            }

            var docMeta = _docs.Append(docMap);

            _docIx.Append(docMeta.offset, docMeta.length);
        }
Ejemplo n.º 4
0
        public void Write(IEnumerable <IDictionary> models, bool writeToIndex = false)
        {
            foreach (var model in models)
            {
                var docId  = _docIx.GetNextDocId();
                var docMap = new List <(long keyId, long valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    long keyId, valId;

                    if (!SessionFactory.TryGetKeyId(keyHash, out keyId))
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = _keys.Append(keyStr);
                        keyId = _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                        SessionFactory.PersistKeyMapping(keyHash, keyId);
                    }

                    // store value
                    var valInfo = _vals.Append(val);
                    valId = _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                    // store refs to keys and values
                    docMap.Add((keyId, valId));
                }

                var docMeta = _docs.Append(docMap);
                _docIx.Append(docMeta.offset, docMeta.length);

                model.Add("__docid", docId);
            }

            if (writeToIndex)
            {
                WriteToIndex(new IndexJob(CollectionId, models));
            }
        }
Ejemplo n.º 5
0
        public void Write(IEnumerable <IDictionary> data, ITokenizer tokenizer)
        {
            foreach (var model in data)
            {
                var docId  = _docIx.GetNextDocId();
                var docMap = new List <(long keyId, long valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  fieldIndex = CloneIndex(keyHash);
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    var  tokens = new HashSet <string>();
                    long keyId, valId;

                    if (str != null)
                    {
                        var tokenlist = tokenizer.Tokenize(str).ToList();
                        foreach (var token in tokenlist)
                        {
                            tokens.Add(token);
                        }
                    }
                    else
                    {
                        //TODO: implement numeric index

                        tokens.Add(val.ToString());
                    }

                    if (fieldIndex == null)
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = _keys.Append(keyStr);
                        keyId = _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                        SessionFactory.AddKey(keyHash, keyId);

                        // add new index to global in-memory tree
                        fieldIndex = new VectorNode();
                        //Index.Add(keyId, fieldIndex);
                    }
                    else
                    {
                        keyId = SessionFactory.GetKey(keyHash);
                    }

                    // store value
                    var valInfo = _vals.Append(val);
                    valId = _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                    // store refs to keys and values
                    docMap.Add((keyId, valId));

                    foreach (var token in tokens)
                    {
                        // add token and postings to index
                        fieldIndex.Add(token, docId);
                    }

                    if (!_dirty.ContainsKey(keyId))
                    {
                        _dirty.Add(keyId, fieldIndex);
                    }
                }

                var docMeta = _docs.Append(docMap);
                _docIx.Append(docMeta.offset, docMeta.length);
            }
        }
Ejemplo n.º 6
0
        public void Write(IEnumerable <IDictionary> data, ITokenizer tokenizer)
        {
            var vals  = new ValueWriter(ValueStream);
            var keys  = new ValueWriter(KeyStream);
            var docs  = new DocWriter(DocStream);
            var valIx = new ValueIndexWriter(ValueIndexStream);
            var keyIx = new ValueIndexWriter(KeyIndexStream);
            var docIx = new DocIndexWriter(DocIndexStream);

            foreach (var model in data)
            {
                var docId  = docIx.GetNextDocId();
                var docMap = new List <(uint keyId, uint valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  fieldIndex = GetIndex(keyHash);
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    var  indexTokens = new List <Term>();
                    uint keyId, valId;

                    if (str != null) //TODO: implement numeric index
                    {
                        foreach (var token in tokenizer.Tokenize(str))
                        {
                            indexTokens.Add(new Term(keyStr, token));
                        }
                    }

                    if (fieldIndex == null)
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = keys.Append(keyStr);
                        keyId = keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                        SessionFactory.AddKey(keyHash, keyId);

                        // add new index to global in-memory tree
                        fieldIndex = new VectorNode();
                        Index.Add(keyId, fieldIndex);
                    }
                    else
                    {
                        keyId = SessionFactory.GetKey(keyHash);
                    }

                    foreach (var token in indexTokens)
                    {
                        var match = fieldIndex.ClosestMatch((string)token.Value);

                        if (match.Highscore < VectorNode.IdenticalAngle)
                        {
                            // We have a new unique value!

                            // store value
                            var valInfo = vals.Append(val);
                            valId = valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);
                        }
                        else
                        {
                            valId = match.ValueId;
                        }

                        // add posting to index
                        fieldIndex.Add((string)token.Value, valId, docId);

                        // store refs to keys and values
                        docMap.Add((keyId, valId));
                    }

                    var indexName = string.Format("{0}.{1}", CollectionId, keyId);
                    if (!_dirty.ContainsKey(indexName))
                    {
                        _dirty.Add(indexName, fieldIndex);
                    }
                }

                var docMeta = docs.Append(docMap);
                docIx.Append(docMeta.offset, docMeta.length);
            }
        }