public void Update(string collectionName, IEnumerable <IDictionary> data, IEnumerable <IDictionary> old) { if (((IList)old).Count == 0) { old = null; } _writeQueue.Enqueue(new WriteJob(collectionName.ToHash(), data, old)); }
public void Write(string collectionId, IEnumerable <IDictionary> data) { using (var tx = new WriteTransaction(collectionId.ToHash(), data)) { _writeQueue.Enqueue(tx); } }
public void Warmup(IEnumerable <IDictionary> documents, params long[] excludeKeyIds) { foreach (var doc in documents) { foreach (var key in doc.Keys) { var strKey = key.ToString(); if (!strKey.StartsWith("__")) { var keyId = SessionFactory.GetKeyId(CollectionId, strKey.ToHash()); if (excludeKeyIds.Contains(keyId)) { continue; } var terms = _tokenizer.Tokenize(doc[key].ToString()); foreach (var token in terms.Embeddings .Select(t => t.ToString())) { _httpQueue.Enqueue(token); } } } } }
public void Warmup(IEnumerable <IDictionary> documents, params long[] excludeKeyIds) { foreach (var doc in documents) { foreach (var key in doc.Keys) { var strKey = key.ToString(); if (!strKey.StartsWith("__")) { var keyId = SessionFactory.GetKeyId(CollectionId, strKey.ToHash()); if (excludeKeyIds.Contains(keyId)) { continue; } var terms = _tokenizer.Tokenize(doc[key].ToString()); foreach (var token in terms.Tokens .Select(t => terms.Original.Substring(t.offset, t.length)) .Where(s => !string.IsNullOrWhiteSpace(s))) { _httpQueue.Enqueue(token); } } } } }
public void Write(string collectionName, IEnumerable <IDictionary> data) { try { var collectionId = collectionName.ToHash(); var job = new WriteJob(collectionId, data); _writeQueue.Enqueue(job); _log.Log(string.Format("enqueued job {0} to be written to {1}", job.Id, collectionName)); } catch (Exception ex) { _log.Log(string.Format("enqueue failed: {0}", ex)); throw; } }
private void Write(IndexJob job) { try { var docCount = 0; var timer = new Stopwatch(); timer.Start(); foreach (var doc in job.Documents) { var docId = (ulong)doc["__docid"]; var keys = doc.Keys .Cast <string>() .Where(x => !x.StartsWith("__")); foreach (var key in keys) { var keyHash = key.ToHash(); var keyId = SessionFactory.GetKeyId(keyHash); VectorNode ix; if (!_dirty.TryGetValue(keyId, out ix)) { ix = GetIndex(keyHash) ?? new VectorNode(); _dirty.Add(keyId, ix); } var val = (IComparable)doc[key]; var str = val as string; var tokens = new HashSet <string>(); if (str == null || key[0] == '_') { tokens.Add(val.ToString()); } else { var tokenlist = _tokenizer.Tokenize(str); foreach (var token in tokenlist) { tokens.Add(token); } } _buildQueue.Enqueue(new BuildJob(CollectionId, docId, tokens, ix)); } if (++docCount == 100) { _log.Log(string.Format("analyzed doc {0}", doc["__docid"])); docCount = 0; } } _log.Log(string.Format("executed {0} analyze job in {1}", job.CollectionId, timer.Elapsed)); } catch (Exception ex) { _log.Log(ex.ToString()); throw; } }
public void WriteToIndex(IndexJob job) { _indexQueue.Enqueue(job); }