public override void Apply(BsonDocument doc) { foreach (var field in TargetFields) { var docFieldVal = doc[field.Name].ToString(); var categories = FieldCache[field.Name]; IFieldExtra extrasCategory = null; if (categories.Count >= Limit) { extrasCategory = EmptyFieldExtra; } else { extrasCategory = categories.GetOrAdd(docFieldVal, (key) => { var newExtra = new FieldExtra() { Field = field, Key = EncodeKey(categories.Count + 1), Value = docFieldVal }; if (field.Extras == null) { field.Extras = new FieldExtras(); } field.Extras.Extra.Add(newExtra); return(newExtra); }); } //doc[field.Name] = uint.Parse(extrasCategory.Key); doc[field.Name] = extrasCategory.Key; } }
public override void Apply(BsonDocument doc) { foreach (var field in TargetFields) { var docFieldVal = doc[field.Name].ToString(); var categories = FieldCache[field.Name]; IFieldExtra extrasCategory = null; extrasCategory = categories.GetOrAdd(docFieldVal, (key) => { var newExtra = new FieldExtra() { Field = field, Key = $"{field.Name}{categories.Count + 1}", Value = docFieldVal }; if (field.Extras == null) { field.Extras = new FieldExtras(); } field.Extras.Extra.Add(newExtra); return(newExtra); }); foreach (var dummy in field.Extras.Extra) { doc[dummy.Key] = dummy.Key == extrasCategory.Key ? 1 : 0; } } }
public BinaryCategoryEncoding(FieldEncodingOptions options) : base(options, FieldDataEncoding.BinaryIntId) { Limit = 20; EmptyFieldExtra = new FieldExtra { Key = EncodeKey(9), Value = "none" }; }
public override IIntegration GetEncodedIntegration(bool truncateDestination = false) { var collection = Integration.Collection; var db = MongoHelper.GetDatabase(); if (truncateDestination) { db.DropCollection(collection); } var records = db.GetCollection <BsonDocument>(collection); foreach (var oneHotField in TargetFields) { var fld = oneHotField; //Run a group aggregate var pipeline = new List <BsonDocument>(); var group = new BsonDocument(); group["$group"] = new BsonDocument() { { "_id", $"${fld.Name}" } }; pipeline.Add(group); var uniqueColumnResults = records.Aggregate <BsonDocument>(pipeline).ToList(); int iVariation = fld.Extras.Extra == null ? 1 : fld.Extras.Extra.Count + 1; foreach (var uniqueValue in uniqueColumnResults) { var columnVal = uniqueValue["_id"].ToString(); if (fld.Extras.Extra.Any(y => y.Key == columnVal)) { continue; } var fieldExtra = new FieldExtra() { Field = fld, Key = EncodeKey(iVariation++), Value = columnVal, Type = FieldExtraType.Dummy }; if (fld.Extras == null) { fld.Extras = new FieldExtras(); } fld.Extras.Extra.Add(fieldExtra); } } return(Integration); }