Example #1
0
        public async Task <IList <ulong> > Write(WriteJob job)
        {
            var docIds   = new List <ulong>();
            var docCount = 0;
            var timer    = new Stopwatch();

            timer.Start();

            foreach (var model in job.Documents)
            {
                var docId  = _docIx.GetNextDocId();
                var docMap = new List <(long keyId, long valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    long keyId, valId;

                    if (!SessionFactory.TryGetKeyId(keyHash, out keyId))
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = await _keys.Append(keyStr);

                        keyId = await _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);

                        await SessionFactory.PersistKeyMapping(keyHash, keyId);
                    }

                    // store value
                    var valInfo = await _vals.Append(val);

                    valId = await _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                    // store refs to keys and values
                    docMap.Add((keyId, valId));
                }

                var docMeta = await _docs.Append(docMap);

                await _docIx.Append(docMeta.offset, docMeta.length);

                model.Add("__docid", docId);

                docIds.Add(docId);
                docCount++;
            }

            _log.Log(string.Format("processed {0} documents in {1}", docCount, timer.Elapsed));

            return(docIds);
        }
Example #2
0
        public void Write(IEnumerable <IDictionary> models, bool writeToIndex = false)
        {
            foreach (var model in models)
            {
                var docId  = _docIx.GetNextDocId();
                var docMap = new List <(long keyId, long valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    long keyId, valId;

                    if (!SessionFactory.TryGetKeyId(keyHash, out keyId))
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = _keys.Append(keyStr);
                        keyId = _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                        SessionFactory.PersistKeyMapping(keyHash, keyId);
                    }

                    // store value
                    var valInfo = _vals.Append(val);
                    valId = _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                    // store refs to keys and values
                    docMap.Add((keyId, valId));
                }

                var docMeta = _docs.Append(docMap);
                _docIx.Append(docMeta.offset, docMeta.length);

                model.Add("__docid", docId);
            }

            if (writeToIndex)
            {
                WriteToIndex(new IndexJob(CollectionId, models));
            }
        }
Example #3
0
        public void Write(IEnumerable <IDictionary> data, ITokenizer tokenizer)
        {
            foreach (var model in data)
            {
                var docId  = _docIx.GetNextDocId();
                var docMap = new List <(long keyId, long valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  fieldIndex = CloneIndex(keyHash);
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    var  tokens = new HashSet <string>();
                    long keyId, valId;

                    if (str != null)
                    {
                        var tokenlist = tokenizer.Tokenize(str).ToList();
                        foreach (var token in tokenlist)
                        {
                            tokens.Add(token);
                        }
                    }
                    else
                    {
                        //TODO: implement numeric index

                        tokens.Add(val.ToString());
                    }

                    if (fieldIndex == null)
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = _keys.Append(keyStr);
                        keyId = _keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                        SessionFactory.AddKey(keyHash, keyId);

                        // add new index to global in-memory tree
                        fieldIndex = new VectorNode();
                        //Index.Add(keyId, fieldIndex);
                    }
                    else
                    {
                        keyId = SessionFactory.GetKey(keyHash);
                    }

                    // store value
                    var valInfo = _vals.Append(val);
                    valId = _valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);

                    // store refs to keys and values
                    docMap.Add((keyId, valId));

                    foreach (var token in tokens)
                    {
                        // add token and postings to index
                        fieldIndex.Add(token, docId);
                    }

                    if (!_dirty.ContainsKey(keyId))
                    {
                        _dirty.Add(keyId, fieldIndex);
                    }
                }

                var docMeta = _docs.Append(docMap);
                _docIx.Append(docMeta.offset, docMeta.length);
            }
        }
Example #4
0
        public void Write(IEnumerable <IDictionary> data, ITokenizer tokenizer)
        {
            var vals  = new ValueWriter(ValueStream);
            var keys  = new ValueWriter(KeyStream);
            var docs  = new DocWriter(DocStream);
            var valIx = new ValueIndexWriter(ValueIndexStream);
            var keyIx = new ValueIndexWriter(KeyIndexStream);
            var docIx = new DocIndexWriter(DocIndexStream);

            foreach (var model in data)
            {
                var docId  = docIx.GetNextDocId();
                var docMap = new List <(uint keyId, uint valId)>();

                foreach (var key in model.Keys)
                {
                    var  keyStr = key.ToString();
                    var  keyHash = keyStr.ToHash();
                    var  fieldIndex = GetIndex(keyHash);
                    var  val = (IComparable)model[key];
                    var  str = val as string;
                    var  indexTokens = new List <Term>();
                    uint keyId, valId;

                    if (str != null) //TODO: implement numeric index
                    {
                        foreach (var token in tokenizer.Tokenize(str))
                        {
                            indexTokens.Add(new Term(keyStr, token));
                        }
                    }

                    if (fieldIndex == null)
                    {
                        // We have a new key!

                        // store key
                        var keyInfo = keys.Append(keyStr);
                        keyId = keyIx.Append(keyInfo.offset, keyInfo.len, keyInfo.dataType);
                        SessionFactory.AddKey(keyHash, keyId);

                        // add new index to global in-memory tree
                        fieldIndex = new VectorNode();
                        Index.Add(keyId, fieldIndex);
                    }
                    else
                    {
                        keyId = SessionFactory.GetKey(keyHash);
                    }

                    foreach (var token in indexTokens)
                    {
                        var match = fieldIndex.ClosestMatch((string)token.Value);

                        if (match.Highscore < VectorNode.IdenticalAngle)
                        {
                            // We have a new unique value!

                            // store value
                            var valInfo = vals.Append(val);
                            valId = valIx.Append(valInfo.offset, valInfo.len, valInfo.dataType);
                        }
                        else
                        {
                            valId = match.ValueId;
                        }

                        // add posting to index
                        fieldIndex.Add((string)token.Value, valId, docId);

                        // store refs to keys and values
                        docMap.Add((keyId, valId));
                    }

                    var indexName = string.Format("{0}.{1}", CollectionId, keyId);
                    if (!_dirty.ContainsKey(indexName))
                    {
                        _dirty.Add(indexName, fieldIndex);
                    }
                }

                var docMeta = docs.Append(docMap);
                docIx.Append(docMeta.offset, docMeta.length);
            }
        }