public void TestRdd() { const string jsonSchema = @" { ""type"" : ""struct"", ""fields"" : [{ ""name"" : ""age"", ""type"" : ""long"", ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""id"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } }, { ""name"" : ""name"", ""type"" : ""string"", ""nullable"" : true, ""metadata"" : { } } ] }"; Mock <IStructTypeProxy> mockStructTypeProxy = new Mock <IStructTypeProxy>(); mockStructTypeProxy.Setup(m => m.ToJson()).Returns(jsonSchema); mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockStructTypeProxy.Object); var rows = new object[] { new RowImpl(new object[] { 34, "123", "Bill" }, RowSchema.ParseRowSchemaFromJson(jsonSchema)) }; mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(new MockRddProxy(rows)); var sc = new SparkContext(null); var dataFrame = new DataFrame(mockDataFrameProxy.Object, sc); // act var rdd = dataFrame.Rdd; Assert.IsNotNull(rdd); mockDataFrameProxy.Verify(m => m.JavaToCSharp(), Times.Once); mockStructTypeProxy.Verify(m => m.ToJson(), Times.Once); mockDataFrameProxy.Reset(); mockStructTypeProxy.Reset(); rdd = dataFrame.Rdd; Assert.IsNotNull(rdd); mockDataFrameProxy.Verify(m => m.JavaToCSharp(), Times.Never); mockStructTypeProxy.Verify(m => m.ToJson(), Times.Never); }
public async Task RunAsync(CancellationToken token) { var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None); var targetRows = new List <DataRow>(); var targetSchema = new RowSchema( new FieldDefinition("ContactId", FieldKind.Key, FieldDataType.Guid), new FieldDefinition("MovieId", FieldKind.Key, FieldDataType.String), new FieldDefinition("Title", FieldKind.Attribute, FieldDataType.String), new FieldDefinition("Overview", FieldKind.Attribute, FieldDataType.String), new FieldDefinition("Image", FieldKind.Attribute, FieldDataType.String), new FieldDefinition("ImdbRating", FieldKind.Attribute, FieldDataType.String) ); while (await sourceRows.MoveNext()) { foreach (var row in sourceRows.Current) { var movieIds = row["LastMoviesId"].ToString().Split(new[] { "," }, StringSplitOptions.RemoveEmptyEntries); //grabbing only unique movie ID's movieIds = movieIds.Distinct().ToArray(); if (!movieIds.Any()) { continue; } var recommendations = _movieRecommendationsProvider.GetRecommendations(movieIds); //clean recommendations to not have any dups recommendations = recommendations.GroupBy(m => m.netflixid).Select(g => g.FirstOrDefault()).ToList(); if (recommendations.Any()) { for (var i = 0; i < _limit; i++) { if (i < recommendations.Count) { var targetRow = new DataRow(targetSchema); targetRow.SetGuid(0, row.GetGuid(0)); targetRow.SetString(1, recommendations[i].netflixid); targetRow.SetString(2, recommendations[i].title); targetRow.SetString(3, recommendations[i].synopsis); targetRow.SetString(4, recommendations[i].image); targetRow.SetString(5, recommendations[i].rating); targetRows.Add(targetRow); } } } } } var tableDefinition = new TableDefinition(_targetTableName, targetSchema); var targetTable = new InMemoryTableData(tableDefinition, targetRows); await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None); }
public async Task RunAsync(CancellationToken token) { var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None); var targetRows = new List <DataRow>(); var targetSchema = new RowSchema( new FieldDefinition("ContactId", FieldKind.Key, FieldDataType.Guid), new FieldDefinition("MovieId", FieldKind.Key, FieldDataType.Int64), new FieldDefinition("Title", FieldKind.Attribute, FieldDataType.String), new FieldDefinition("Overview", FieldKind.Attribute, FieldDataType.String), new FieldDefinition("PosterPath", FieldKind.Attribute, FieldDataType.String) ); //ML context MLContext mlContext = new MLContext(); ITransformer mlModel = mlContext.Model.Load(GetAbsolutePath(MODEL_FILEPATH), out DataViewSchema inputSchema); var predEngine = mlContext.Model.CreatePredictionEngine <TacoFlixML.Model.DataModels.ModelInput, ModelOutput>(mlModel); while (await sourceRows.MoveNext()) { foreach (var row in sourceRows.Current) { var movieId = int.Parse(row["LastMovieId"].ToString()); var recommendations = _movieRecommendationsProvider.GetRecommendations(movieId); if (recommendations.Any()) { for (var i = 0; i < _limit; i++) { if (i < recommendations.Count) { //ML ModelInput input = new ModelInput(); input.Movieid = recommendations[i].Id; ModelOutput predictionResult = predEngine.Predict(input); var targetRow = new DataRow(targetSchema); targetRow.SetGuid(0, row.GetGuid(0)); targetRow.SetInt64(1, recommendations[i].Id); targetRow.SetString(2, recommendations[i].Title + "score=" + predictionResult.Score); targetRow.SetString(3, recommendations[i].Overview); targetRow.SetString(4, recommendations[i].PosterPath); targetRows.Add(targetRow); } } } } } var tableDefinition = new TableDefinition(_targetTableName, targetSchema); var targetTable = new InMemoryTableData(tableDefinition, targetRows); await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None); }
public TableObserver(Database db, SqlObjectName sqlObjectName, string[] primaryKeyColumns, bool memoryTrigger) { _db = db; SqlObjectName = sqlObjectName; Schema = new RowSchema(db.Config.DatabaseConnectionString, sqlObjectName, primaryKeyColumns); _insertTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "insert", sqlObjectName.Name); _deleteTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "delete", sqlObjectName.Name); _updateTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "update", sqlObjectName.Name); SetupDatabaseSchema(memoryTrigger); }
public override IReadOnlyCollection <Row> Initialize() { Db.Config.DatabaseConnectionString.WithReader(Query, reader => { Schema = new RowSchema(reader, PrimaryKeyColumns); _data = new ReducableDictionary <object[], Row>(Schema.RowKeyEqualityComparer); while (reader.Read()) { var row = Schema.ReadRow(reader); _data.Add(row.Key, row); } }); return(_data.Values.ToArray()); }
private RowSchema <TCellType> CreateRowSchema <TCellType>(Func <DataTableColumn, TCellType> cellTemplateSelector) where TCellType : TableCellBase <TCellType> { var schema = new RowSchema <TCellType>(); var newRow = true; List <List <DataTableColumn> > rows = new List <List <DataTableColumn> >(); List <DataTableColumn> row = null; foreach (var column in _columns) { column.ItemTemplate.RowSpan = 0; column.ItemTemplate.ColSpan = 0; column.HeaderTemplate.RowSpan = 0; column.HeaderTemplate.ColSpan = 0; if (newRow) { newRow = false; row = new List <DataTableColumn>(); rows.Add(row); } if (column.ColSpanBehavior == ColSpanBehavior.Row) { foreach (var col in row) { col.HeaderTemplate.RowSpan = Math.Max(2, col.HeaderTemplate.RowSpan + 1); col.ItemTemplate.RowSpan = Math.Max(2, col.ItemTemplate.RowSpan + 1); } var colSpan = _columns.Count - row.Count - 1; column.HeaderTemplate.ColSpan = colSpan; column.ItemTemplate.ColSpan = colSpan; newRow = true; } row.Add(column); } foreach (var rowItems in rows) { var tableRow = new List <TCellType>(); schema.Add(tableRow); foreach (var col in rowItems) { var cell = cellTemplateSelector(col); tableRow.Add(cell); } } return(schema); }
public async Task RunAsync(CancellationToken token) { // Retrieve the source data from the projection. var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None); // Define the target table shcema we'll be populating into. var targetRows = new List <DataRow>(); var targetSchema = new RowSchema( new FieldDefinition("ContactID", FieldKind.Key, FieldDataType.Guid), new FieldDefinition("AlbumID", FieldKind.Key, FieldDataType.String), new FieldDefinition("AlbumName", FieldKind.Attribute, FieldDataType.String), new FieldDefinition("ArtistID", FieldKind.Attribute, FieldDataType.String), new FieldDefinition("ArtistName", FieldKind.Attribute, FieldDataType.String) ); // Iterate the source data. while (await sourceRows.MoveNext()) { foreach (var row in sourceRows.Current) { // Retrieve the IDs of the album artists from the projected data. var artistIds = row["Artists"].ToString().Split(new[] { "," }, StringSplitOptions.RemoveEmptyEntries); // Call the recomender service. var recommendations = await _musicRecommender.GetRecommendationsAsync(artistIds, _limit); // Add a target data row for each recommendation. foreach (var album in recommendations) { var targetRow = new DataRow(targetSchema); targetRow.SetGuid(0, row.GetGuid(0)); targetRow.SetString(1, album.AlbumId); targetRow.SetString(2, album.AlbumName); targetRow.SetString(3, album.ArtistId); targetRow.SetString(4, album.ArtistName); targetRows.Add(targetRow); } } } // Populate the rows into the target table. var tableDefinition = new TableDefinition(_targetTableName, targetSchema); var targetTable = new InMemoryTableData(tableDefinition, targetRows); await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None); }
/// <summary> /// Verify the schema of people dataframe. /// </summary> /// <param name="schema"> RowSchema of people DataFrame </param> internal static void VerifySchemaOfPeopleDataFrame(RowSchema schema) { Assert.IsNotNull(schema); Assert.AreEqual("struct", schema.type); Assert.IsNotNull(schema.columns); Assert.AreEqual(4, schema.columns.Count); // name var nameColSchema = schema.columns.Find(c => c.name.Equals("name")); Assert.IsNotNull(nameColSchema); Assert.AreEqual("name", nameColSchema.name); Assert.IsTrue(nameColSchema.nullable); Assert.AreEqual("string", nameColSchema.type.ToString()); // id var idColSchema = schema.columns.Find(c => c.name.Equals("id")); Assert.IsNotNull(idColSchema); Assert.AreEqual("id", idColSchema.name); Assert.IsTrue(idColSchema.nullable); Assert.AreEqual("string", nameColSchema.type.ToString()); // age var ageColSchema = schema.columns.Find(c => c.name.Equals("age")); Assert.IsNotNull(ageColSchema); Assert.AreEqual("age", ageColSchema.name); Assert.IsTrue(ageColSchema.nullable); Assert.AreEqual("long", ageColSchema.type.ToString()); // address var addressColSchema = schema.columns.Find(c => c.name.Equals("address")); Assert.IsNotNull(addressColSchema); Assert.AreEqual("address", addressColSchema.name); Assert.IsTrue(addressColSchema.nullable); Assert.IsNotNull(addressColSchema.type); Assert.AreEqual("struct", addressColSchema.type.type); Assert.IsNotNull(addressColSchema.type.columns.Find(c => c.name.Equals("state"))); Assert.IsNotNull(addressColSchema.type.columns.Find(c => c.name.Equals("city"))); }
/// <summary> /// Print given rows from people dataframe, and verify schema and contents if validation is enabled. /// </summary> /// <param name="rows"> Rows from people DataFrame </param> /// <param name="num"> Expected number of rows from people DataFrame </param> internal static void PrintAndVerifyPeopleDataFrameRows(IEnumerable <Row> rows, int num) { Console.WriteLine("peopleDataFrame:"); var count = 0; RowSchema schema = null; Row firstRow = null; foreach (var row in rows) { if (count == 0) { firstRow = row; schema = row.GetSchema(); Console.WriteLine("schema: {0}", schema); } // output each row Console.WriteLine(row); Console.Write("id: {0}, name: {1}, age: {2}", row.GetAs <string>("id"), row.GetAs <string>("name"), row.GetAs <int>("age")); var address = row.GetAs <Row>("address"); if (address != null) { Console.WriteLine(", state: {0}, city: {1}", address.GetAs <string>("state"), address.GetAs <string>("city")); } else { Console.WriteLine(); } count++; } if (SparkCLRSamples.Configuration.IsValidationEnabled) { Assert.AreEqual(count, num); VerifySchemaOfPeopleDataFrame(schema); VerifyFirstRowOfPeopleDataFrame(firstRow); } }
public async Task RunAsync(CancellationToken token) { var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None); var targetRows = new List <DataRow>(); var targetSchema = new RowSchema( new FieldDefinition("ContactId", FieldKind.Key, FieldDataType.Guid), new FieldDefinition("ProductId", FieldKind.Key, FieldDataType.Int64), new FieldDefinition("Score", FieldKind.Attribute, FieldDataType.Double) ); while (await sourceRows.MoveNext()) { foreach (var row in sourceRows.Current) { var purchasedProductId = uint.Parse(row["ProductId"].ToString()); var recommendations = GetRecommendList(purchasedProductId, _limit); if (recommendations.Any()) { for (var i = 0; i < _limit; i++) { var targetRow = new DataRow(targetSchema); targetRow.SetGuid(0, row.GetGuid(0)); targetRow.SetInt64(1, recommendations[i].RecommendedItemId); targetRow.SetDouble(2, recommendations[i].Score); targetRows.Add(targetRow); } } } } var tableDefinition = new TableDefinition(_targetTableName, targetSchema); var targetTable = new InMemoryTableData(tableDefinition, targetRows); await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None); }
public override IReadOnlyCollection <Row> Initialize() { var connectionString = Db.Config.DatabaseConnectionString; Schema = new RowSchema(connectionString, View.ViewName, PrimaryKeyColumns); CreateCacheTable(connectionString, View.ViewName, _cacheTableName, PrimaryKeyColumns); //Db.Config.DatabaseConnectionString.CreateMemoryViewCache(_view.ViewName,_cacheTableName,PrimaryKeyColumns); DisposeHelper.Attach(() => connectionString.WithConnection(con => con.ExecuteNonQuery($"DROP TABLE {_cacheTableName}"))); _mergeSelectViewChanges = new MergeSelectViewChanges(_cacheTableName, View.ViewName, Schema.ColumnNames.Values.ToArray(), PrimaryKeyColumns, PrimaryKeyColumns); List <Row> rows = new List <Row>(); connectionString.WithReader($"SELECT * FROM {_cacheTableName}", r => { while (r.Read()) { rows.Add(Schema.ReadRow(r)); } }); return(rows); }
/// <summary> /// Initialize a new instance of <see cref="TableObserver"/> /// </summary> /// <param name="db">The database that contains table</param> /// <param name="sqlObjectName">The name of the table</param> public TableObserver(Database db, SqlObjectName sqlObjectName) { _db = db; SqlObjectName = sqlObjectName; Schema = new RowSchema(db.Config.DatabaseConnectionString, sqlObjectName); _insertTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "insert", sqlObjectName.Name); _deleteTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "delete", sqlObjectName.Name); _updateTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "update", sqlObjectName.Name); bool isMemoryTable = false; db.Config.DatabaseConnectionString.WithConnection(con => { using (var command = con.CreateCommand()) { command.CommandText = "SELECT is_memory_optimized FROM sys.tables WHERE name = @name And schema_id = schema_id(@schema)"; command.Parameters.AddWithValue("name", sqlObjectName.Name); command.Parameters.AddWithValue("schema", sqlObjectName.Schema); isMemoryTable = (bool)command.ExecuteScalar(); } }); SetupDatabaseSchema(isMemoryTable); }
private RowSchema GetRowSchema() { if (rowSchema == null) { string json = Schema.ToJson(); rowSchema = RowSchema.ParseRowSchemaFromJson(json); } return rowSchema; }
private List<Row> Collect(int port, RowSchema dataType) { List<Row> items = new List<Row>(); IFormatter formatter = new BinaryFormatter(); Unpickler unpickler = new Unpickler(); Socket sock = new Socket(SocketType.Stream, ProtocolType.Tcp); sock.Connect("127.0.0.1", port); NetworkStream ns = new NetworkStream(sock); using (BinaryReader br = new BinaryReader(ns)) { byte[] buffer = null; do { buffer = br.ReadBytes(4); if (buffer != null && buffer.Length > 0) { //In JVM, Multibyte data items are always stored in big-endian order, where the high bytes come first //http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html //So we should handle revert buffer if our system is little-endian //https://msdn.microsoft.com/en-us/library/system.bitconverter(v=vs.110).aspx if (BitConverter.IsLittleEndian) { Array.Reverse(buffer); } int len = BitConverter.ToInt32(buffer, 0); byte[] data = br.ReadBytes(len); foreach (var item in (unpickler.loads(data) as object[])) { RowImpl row = new RowImpl(item, dataType); items.Add(row); } } } while (buffer != null && buffer.Length > 0); } return items; }