// read subfingerprints using the separate hashes table public IList <SubFingerprintData> ReadSubFingerprintDataByHashBucketsWithThresholdSeparateTable(int[] hashBins, int thresholdVotes, bool ignoreHashTableIndex) { // check IEnumerable<SubFingerprintData> ReadSubFingerprintDataByHashBucketsWithThreshold(long[] hashBins, int thresholdVotes) // https://github.com/AddictedCS/soundfingerprinting.mongodb/blob/release/2.3.x/src/SoundFingerprinting.MongoDb/HashBinDao.cs LiteDB.BsonExpression query = null; if (ignoreHashTableIndex) { query = GetQueryForHashBinsIgnoreIndex(hashBins, true); } else { query = GetQueryForHashBins(hashBins); } // Get hash collection var col = db.GetCollection <Hash>("hashes"); // ensure indexes col.EnsureIndex(x => x.HashBin); col.EnsureIndex(x => x.HashTable); // find the subfingerprints that have more than the threshold number // of hashes that belong to that subfingerprint var hashes = col.Find(query) .GroupBy(g => g.SubFingerprintId) .Select(s => new { Key = s.Key, MatchedCount = s.Count(), Hashes = s.OrderBy(f => f.HashTable) }) .Where(e => e.MatchedCount >= thresholdVotes) .OrderByDescending(o => o.MatchedCount) .Select(s => new ModelReference <string>(s.Key)) .ToList(); if (!hashes.Any()) { return(Enumerable.Empty <SubFingerprintData>().ToList()); } // get the SubFingerprintData for each of the hits return(ReadSubFingerprintDataByReference(hashes)); }
/// <summary> /// Filters a sequence of documents based on a predicate expression /// </summary> public ILiteQueryable <T> Where(string predicate, params BsonValue[] args) { _query.Where.Add(BsonExpression.Create(predicate, args)); return(this); }
internal static LiteException InvalidExpressionTypePredicate(BsonExpression expr) { return(new LiteException(INVALID_EXPRESSION_TYPE, "Expression '{0}' are not supported as predicate expression.", expr.Source)); }
/// <summary> /// Returns all values from array according index. If index are MaxValue, return all values /// </summary> public static IEnumerable <BsonValue> Array(IEnumerable <BsonValue> values, int index, BsonExpression expr, BsonDocument root) { foreach (var value in values) { if (value.IsArray) { var arr = value.AsArray; // [expression(Func<BsonValue, bool>)] if (expr.Source != null) { foreach (var item in arr) { // execute for each child value and except a first bool value (returns if true) var c = expr.Execute(root, item, true).First(); if (c.IsBoolean && c.AsBoolean == true) { yield return(item); } } } // [all] else if (index == int.MaxValue) { foreach (var item in arr) { yield return(item); } } // [fixed_index] else { var idx = index < 0 ? arr.Count + index : index; if (arr.Count > idx) { yield return(arr[idx]); } } } } }
/// <summary> /// Delete all documents based on predicate expression. Returns how many documents was deleted /// </summary> public Task <int> DeleteManyAsync(string predicate, params BsonValue[] args) => this.DeleteManyAsync(BsonExpression.Create(predicate, args));
/// <summary> /// Returns all values from array according filter expression or all values (index = MaxValue) /// </summary> public static IEnumerable <BsonValue> ARRAY_FILTER(BsonValue value, int index, BsonExpression filterExpr, BsonDocument root, Collation collation, BsonDocument parameters) { if (!value.IsArray) { yield break; } var arr = value.AsArray; // [*] - index are all values if (index == int.MaxValue) { foreach (var item in arr) { yield return(item); } } // [<expr>] - index are an expression else { // update parameters in expression parameters.CopyTo(filterExpr.Parameters); foreach (var item in arr) { // execute for each child value and except a first bool value (returns if true) var c = filterExpr.ExecuteScalar(new BsonDocument[] { root }, root, item, collation); if (c.IsBoolean && c.AsBoolean == true) { yield return(item); } } } }
/// <summary> /// Find the first document using predicate expression. Returns null if not found /// </summary> public T FindOne(BsonExpression predicate, params BsonValue[] args) => this.FindOne(BsonExpression.Create(predicate, args));
/// <summary> /// Find the first document using predicate expression. Returns null if not found /// </summary> public T FindOne(BsonExpression predicate) => this.Find(predicate).FirstOrDefault();
/// <summary> /// Returns all values from array according index. If index are MaxValue, return all values /// </summary> public static IEnumerable <BsonValue> Array(IEnumerable <BsonValue> values, int index, BsonExpression expr, BsonDocument root) { foreach (var value in values) { if (value.IsArray) { var arr = value.AsArray; // [<expr>] - index are an expression if (expr.Source != null) { foreach (var item in arr) { // execute for each child value and except a first bool value (returns if true) var c = expr.Execute(root, item, true).First(); if (c.IsBoolean && c.AsBoolean == true) { // fill destroy action to remove value from parent array item.Destroy = () => arr.Remove(item); yield return(item); } } } // [*] - index are all values else if (index == int.MaxValue) { foreach (var item in arr) { // fill destroy action to remove value from parent array item.Destroy = () => arr.Remove(item); yield return(item); } } // [n] - fixed index else { var idx = index < 0 ? arr.Count + index : index; if (arr.Count > idx) { var item = arr[idx]; // fill destroy action to remove value from parent array item.Destroy = () => arr.Remove(item); yield return(item); } } } } }
/// <summary> /// Delete all documents based on predicate expression. Returns how many documents was deleted /// </summary> public int DeleteMany(string predicate, params BsonValue[] args) => this.DeleteMany(BsonExpression.Create(predicate, args));
/// <summary> /// Delete all documents based on predicate expression. Returns how many documents was deleted /// </summary> public int DeleteMany(string predicate, BsonDocument parameters) => this.DeleteMany(BsonExpression.Create(predicate, parameters));
/// <summary> /// Execute Query[T].Where(predicate).SingleOrDefault(); /// </summary> public T SingleOrDefault <T>(BsonExpression predicate, string collectionName = null) { return(this.Query <T>(collectionName) .Where(predicate) .SingleOrDefault()); }
/// <summary> /// Execute Query[T].Where(predicate).First(); /// </summary> public T First <T>(BsonExpression predicate, string collectionName = null) { return(this.Query <T>(collectionName) .Where(predicate) .First()); }
/// <summary> /// Delete entity based on Query /// </summary> public int DeleteMany <T>(BsonExpression predicate, string collectionName = null) { return(_db.GetCollection <T>(collectionName).DeleteMany(predicate)); }
/// <summary> /// Create a new permanent index in all documents inside this collections if index not exists already. Returns true if index was created or false if already exits /// </summary> /// <param name="expression">Create a custom expression function to be indexed</param> /// <param name="unique">If is a unique index</param> /// <param name="collectionName">Collection Name</param> public bool EnsureIndex <T>(BsonExpression expression, bool unique = false, string collectionName = null) { return(_db.GetCollection <T>(collectionName).EnsureIndex(expression, unique)); }
/// <summary> /// Count documents matching a query. This method does not deserialize any documents. Needs indexes on query expression /// </summary> public long LongCount(string predicate, BsonDocument parameters) => this.LongCount(BsonExpression.Create(predicate, parameters));
/// <summary> /// Count documents matching a query. This method does not deserialize any documents. Needs indexes on query expression /// </summary> public long LongCount(string predicate, params BsonValue[] args) => this.LongCount(BsonExpression.Create(predicate, args));
/// <summary> /// Find all files that match with predicate expression. /// </summary> public IEnumerable <LiteFileInfo <TFileId> > Find(string predicate, BsonDocument parameters) => this.Find(BsonExpression.Create(predicate, parameters));
/// <summary> /// Find the first document using predicate expression. Returns null if not found /// </summary> public T FindOne(string predicate, BsonDocument parameters) => this.FindOne(BsonExpression.Create(predicate, parameters));
/// <summary> /// Find all files that match with predicate expression. /// </summary> public IEnumerable <LiteFileInfo <TFileId> > Find(string predicate, params BsonValue[] args) => this.Find(BsonExpression.Create(predicate, args));
/// <summary> /// Get an IEnumerable of values from a json-like path inside document. Use BsonExpression to parse this path /// </summary> public IEnumerable <BsonValue> Get(string path, bool includeNullIfEmpty = false) { var expr = new BsonExpression(new StringScanner(path), true, true); return(expr.Execute(this, includeNullIfEmpty)); }
private const int MAX_SORT_PAGES = 5000; // ~ 20Mb? /// <summary> /// EXPERIMENTAL Find with sort operation - use memory or disk (temp file) to sort /// </summary> public List <BsonDocument> FindSort(string collection, Query query, string orderBy, int order = Query.Ascending, int skip = 0, int limit = int.MaxValue) { if (collection.IsNullOrWhiteSpace()) { throw new ArgumentNullException(nameof(collection)); } if (query == null) { throw new ArgumentNullException(nameof(query)); } _log.Write(Logger.COMMAND, "query-sort documents in '{0}' => {1}", collection, query); // evaluate orderBy path/expression var expr = new BsonExpression(orderBy); // lock database for read access using (_locker.Read()) { var last = order == Query.Ascending ? BsonValue.MaxValue : BsonValue.MinValue; var total = limit == int.MaxValue ? int.MaxValue : skip + limit; var indexCounter = 0; var disk = new TempDiskService(); // create memory database using (var engine = new LiteEngine(disk)) { // get collection page var col = this.GetCollectionPage(collection, false); if (col == null) { return(new List <BsonDocument>()); } // create a temp collection in new memory database var tmp = engine._collections.Add("tmp"); // create index pointer var index = engine._indexer.CreateIndex(tmp); // get head/tail index node var head = engine._indexer.GetNode(index.HeadNode); var tail = engine._indexer.GetNode(index.TailNode); // first lets works only with index in query var nodes = query.Run(col, _indexer); foreach (var node in nodes) { var buffer = _data.Read(node.DataBlock); var doc = _bsonReader.Deserialize(buffer).AsDocument; // if needs use filter if (query.UseFilter && query.FilterDocument(doc) == false) { continue; } // get key to be sorted var key = expr.Execute(doc, true).First(); var diff = key.CompareTo(last); // add to list only if lower than last space if ((order == Query.Ascending && diff < 1) || (order == Query.Descending && diff > -1)) { var tmpNode = engine._indexer.AddNode(index, key, null); tmpNode.DataBlock = node.DataBlock; tmpNode.CacheDocument = doc; indexCounter++; // exceeded limit if (indexCounter > total) { var exceeded = (order == Query.Ascending) ? tail.Prev[0] : head.Next[0]; engine._indexer.Delete(index, exceeded); var lnode = (order == Query.Ascending) ? tail.Prev[0] : head.Next[0]; last = engine._indexer.GetNode(lnode).Key; indexCounter--; } // if memory pages excedded limit size, flush to disk if (engine._cache.DirtyUsed > MAX_SORT_PAGES) { engine._trans.PersistDirtyPages(); engine._trans.CheckPoint(); } } } var result = new List <BsonDocument>(); // if skip is lower than limit, take nodes from skip from begin // if skip is higher than limit, take nodes from end and revert order (avoid lots of skip) var find = skip < limit? engine._indexer.FindAll(index, order).Skip(skip).Take(limit) : // get from original order engine._indexer.FindAll(index, -order).Take(limit).Reverse(); // avoid long skips, take from end and revert // --- foreach (var node in engine._indexer.FindAll(index, order).Skip(skip).Take(limit)) foreach (var node in find) { // if document are in cache, use it. if not, get from disk again var doc = node.CacheDocument; if (doc == null) { var buffer = _data.Read(node.DataBlock); doc = _bsonReader.Deserialize(buffer).AsDocument; } result.Add(doc); } return(result); } } }
/// <summary> /// Delete all documents based on predicate expression. Returns how many documents was deleted /// </summary> public Task <int> DeleteManyAsync(string predicate, BsonDocument parameters) => this.DeleteManyAsync(BsonExpression.Create(predicate, parameters));
/// <summary> /// [ EXPLAIN ] /// SELECT {selectExpr} /// [ INTO {newcollection|$function} [ : {autoId} ] ] /// [ FROM {collection|$function} ] /// [ INCLUDE {pathExpr0} [, {pathExprN} ] /// [ WHERE {filterExpr} ] /// [ GROUP BY {groupByExpr} ] /// [ HAVING {filterExpr} ] /// [ ORDER BY {orderByExpr} [ ASC | DESC ] ] /// [ LIMIT {number} ] /// [ OFFSET {number} ] /// [ FOR UPDATE ] /// </summary> private IBsonDataReader ParseSelect() { // initialize query definition var query = new Query(); var token = _tokenizer.ReadToken(); query.ExplainPlan = token.Is("EXPLAIN"); if (query.ExplainPlan) { token = _tokenizer.ReadToken(); } token.Expect("SELECT"); // read required SELECT <expr> and convert into single expression query.Select = BsonExpression.Create(_tokenizer, BsonExpressionParserMode.SelectDocument, _parameters); // read FROM|INTO var from = _tokenizer.ReadToken(); if (from.Type == TokenType.EOF || from.Type == TokenType.SemiColon) { // select with no FROM - just run expression (avoid DUAL table, Mr. Oracle) //TODO: i think will be better add all sql into engine var result = query.Select.Execute(_collation.Value); var defaultName = "expr"; return(new BsonDataReader(result.Select(x => x.IsDocument ? x.AsDocument : new BsonDocument { [defaultName] = x }), null)); } else if (from.Is("INTO")) { query.Into = ParseCollection(_tokenizer); query.IntoAutoId = this.ParseWithAutoId(); _tokenizer.ReadToken().Expect("FROM"); } else { from.Expect("FROM"); } // read FROM <name> var collection = ParseCollection(_tokenizer); var ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("INCLUDE")) { // read first INCLUDE (before) _tokenizer.ReadToken(); foreach (var path in this.ParseListOfExpressions()) { query.Includes.Add(path); } } ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("WHERE")) { // read WHERE keyword _tokenizer.ReadToken(); var where = BsonExpression.Create(_tokenizer, BsonExpressionParserMode.Full, _parameters); query.Where.Add(where); } ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("GROUP")) { // read GROUP BY keyword _tokenizer.ReadToken(); _tokenizer.ReadToken().Expect("BY"); var groupBy = BsonExpression.Create(_tokenizer, BsonExpressionParserMode.Full, _parameters); query.GroupBy = groupBy; ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("HAVING")) { // read HAVING keyword _tokenizer.ReadToken(); var having = BsonExpression.Create(_tokenizer, BsonExpressionParserMode.Full, _parameters); query.Having = having; } } ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("ORDER")) { // read ORDER BY keyword _tokenizer.ReadToken(); _tokenizer.ReadToken().Expect("BY"); var orderBy = BsonExpression.Create(_tokenizer, BsonExpressionParserMode.Full, _parameters); var orderByOrder = Query.Ascending; var orderByToken = _tokenizer.LookAhead(); if (orderByToken.Is("ASC") || orderByToken.Is("DESC")) { orderByOrder = _tokenizer.ReadToken().Is("ASC") ? Query.Ascending : Query.Descending; } query.OrderBy = orderBy; query.Order = orderByOrder; } ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("LIMIT")) { // read LIMIT keyword _tokenizer.ReadToken(); var limit = _tokenizer.ReadToken().Expect(TokenType.Int).Value; query.Limit = Convert.ToInt32(limit); } ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("OFFSET")) { // read OFFSET keyword _tokenizer.ReadToken(); var offset = _tokenizer.ReadToken().Expect(TokenType.Int).Value; query.Offset = Convert.ToInt32(offset); } ahead = _tokenizer.LookAhead().Expect(TokenType.Word, TokenType.EOF, TokenType.SemiColon); if (ahead.Is("FOR")) { // read FOR keyword _tokenizer.ReadToken(); _tokenizer.ReadToken().Expect("UPDATE"); query.ForUpdate = true; } // read eof/; _tokenizer.ReadToken().Expect(TokenType.EOF, TokenType.SemiColon); return(_engine.Query(collection, query)); }
/// <summary> /// Internal implementation of insert a document /// </summary> private void InsertDocument(CollectionPage col, BsonDocument doc, BsonType autoId) { // collection Sequence was created after release current datafile version. // In this case, Sequence will be 0 but already has documents. Let's fix this // ** this code can be removed when datafile change from 7 (HeaderPage.FILE_VERSION) ** if (col.Sequence == 0 && col.DocumentCount > 0) { var max = this.Max(col.CollectionName, "_id"); // if max value is a number, convert to Sequence last value // if not, just set sequence as document count col.Sequence = (max.IsInt32 || max.IsInt64 || max.IsDouble || max.IsDecimal) ? Convert.ToInt64(max.RawValue) : Convert.ToInt64(col.DocumentCount); } // increase collection sequence _id col.Sequence++; _pager.SetDirty(col); // if no _id, add one if (!doc.RawValue.TryGetValue("_id", out var id)) { doc["_id"] = id = autoId == BsonType.ObjectId ? new BsonValue(ObjectId.NewObjectId()) : autoId == BsonType.Guid ? new BsonValue(Guid.NewGuid()) : autoId == BsonType.DateTime ? new BsonValue(DateTime.Now) : autoId == BsonType.Int32 ? new BsonValue((Int32)col.Sequence) : autoId == BsonType.Int64 ? new BsonValue(col.Sequence) : BsonValue.Null; } // create bubble in sequence number if _id is bigger than current sequence else if (autoId == BsonType.Int32 || autoId == BsonType.Int64) { var current = id.AsInt64; // if current id is bigger than sequence, jump sequence to this number. Other was, do not increse sequnce col.Sequence = current >= col.Sequence ? current : col.Sequence - 1; } // test if _id is a valid type if (id.IsNull || id.IsMinValue || id.IsMaxValue) { throw LiteException.InvalidDataType("_id", id); } _log.Write(Logger.COMMAND, "insert document on '{0}' :: _id = {1}", col.CollectionName, id.RawValue); // serialize object var bytes = _bsonWriter.Serialize(doc); // storage in data pages - returns dataBlock address var dataBlock = _data.Insert(col, bytes); // store id in a PK index [0 array] var pk = _indexer.AddNode(col.PK, id, null); // do link between index <-> data block pk.DataBlock = dataBlock.Position; // for each index, insert new IndexNode foreach (var index in col.GetIndexes(false)) { // for each index, get all keys (support now multi-key) - gets distinct values only // if index are unique, get single key only var expr = new BsonExpression(index.Expression); var keys = expr.Execute(doc, true); // do a loop with all keys (multi-key supported) foreach (var key in keys) { // insert node var node = _indexer.AddNode(index, key, pk); // link my index node to data block address node.DataBlock = dataBlock.Position; } } }
/// <summary> /// Implement internal update document /// </summary> private bool UpdateDocument(CollectionPage col, BsonDocument doc) { // normalize id before find var id = doc["_id"]; // validate id for null, min/max values if (id.IsNull || id.IsMinValue || id.IsMaxValue) { throw LiteException.InvalidDataType("_id", id); } _log.Write(Logger.COMMAND, "update document on '{0}' :: _id = {1}", col.CollectionName, id.RawValue); // find indexNode from pk index var pkNode = _indexer.Find(col.PK, id, false, Query.Ascending); // if not found document, no updates if (pkNode == null) { return(false); } // serialize document in bytes var bytes = _bsonWriter.Serialize(doc); // update data storage var dataBlock = _data.Update(col, pkNode.DataBlock, bytes); // get all non-pk index nodes from this data block var allNodes = _indexer.GetNodeList(pkNode, false).ToArray(); // delete/insert indexes - do not touch on PK foreach (var index in col.GetIndexes(false)) { var expr = new BsonExpression(index.Expression); // getting all keys do check var keys = expr.Execute(doc).ToArray(); // get a list of to delete nodes (using ToArray to resolve now) var toDelete = allNodes .Where(x => x.Slot == index.Slot && !keys.Any(k => k == x.Key)) .ToArray(); // get a list of to insert nodes (using ToArray to resolve now) var toInsert = keys .Where(x => !allNodes.Any(k => k.Slot == index.Slot && k.Key == x)) .ToArray(); // delete changed index nodes foreach (var node in toDelete) { _indexer.Delete(index, node.Position); } // insert new nodes foreach (var key in toInsert) { // and add a new one var node = _indexer.AddNode(index, key, pkNode); // link my node to data block node.DataBlock = dataBlock.Position; } } return(true); }
internal static LiteException InvalidExpressionType(BsonExpression expr, BsonExpressionType type) { return(new LiteException(INVALID_EXPRESSION_TYPE, "Expression '{0}' must be a {1} type.", expr.Source, type)); }
/// <summary> /// Returns true if query returns any document. This method does not deserialize any document. Needs indexes on query expression /// </summary> public bool Exists(string predicate, BsonDocument parameters) => this.Exists(BsonExpression.Create(predicate, parameters));
/// <summary> /// Returns true if query returns any document. This method does not deserialize any document. Needs indexes on query expression /// </summary> public bool Exists(string predicate, params BsonValue[] args) => this.Exists(BsonExpression.Create(predicate, args));
/// <summary> /// Filters a sequence of documents based on a predicate expression /// </summary> public ILiteQueryable <T> Where(string predicate, BsonDocument parameters) { _query.Where.Add(BsonExpression.Create(predicate, parameters)); return(this); }