コード例 #1
0
        public void TestRdd()
        {
            const string jsonSchema = @"
                {
                  ""type"" : ""struct"",
                  ""fields"" : [{
                    ""name"" : ""age"",
                    ""type"" : ""long"",
                    ""nullable"" : true,
                    ""metadata"" : { }
                  }, {
                    ""name"" : ""id"",
                    ""type"" : ""string"",
                    ""nullable"" : true,
                    ""metadata"" : { }
                  }, {
                    ""name"" : ""name"",
                    ""type"" : ""string"",
                    ""nullable"" : true,
                    ""metadata"" : { }
                  } ]
                }";

            Mock <IStructTypeProxy> mockStructTypeProxy = new Mock <IStructTypeProxy>();

            mockStructTypeProxy.Setup(m => m.ToJson()).Returns(jsonSchema);
            mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockStructTypeProxy.Object);

            var rows = new object[]
            {
                new RowImpl(new object[]
                {
                    34,
                    "123",
                    "Bill"
                },
                            RowSchema.ParseRowSchemaFromJson(jsonSchema))
            };

            mockDataFrameProxy.Setup(m => m.JavaToCSharp()).Returns(new MockRddProxy(rows));

            var sc        = new SparkContext(null);
            var dataFrame = new DataFrame(mockDataFrameProxy.Object, sc);

            // act
            var rdd = dataFrame.Rdd;

            Assert.IsNotNull(rdd);
            mockDataFrameProxy.Verify(m => m.JavaToCSharp(), Times.Once);
            mockStructTypeProxy.Verify(m => m.ToJson(), Times.Once);

            mockDataFrameProxy.Reset();
            mockStructTypeProxy.Reset();

            rdd = dataFrame.Rdd;
            Assert.IsNotNull(rdd);
            mockDataFrameProxy.Verify(m => m.JavaToCSharp(), Times.Never);
            mockStructTypeProxy.Verify(m => m.ToJson(), Times.Never);
        }
コード例 #2
0
        public async Task RunAsync(CancellationToken token)
        {
            var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None);

            var targetRows   = new List <DataRow>();
            var targetSchema = new RowSchema(
                new FieldDefinition("ContactId", FieldKind.Key, FieldDataType.Guid),
                new FieldDefinition("MovieId", FieldKind.Key, FieldDataType.String),
                new FieldDefinition("Title", FieldKind.Attribute, FieldDataType.String),
                new FieldDefinition("Overview", FieldKind.Attribute, FieldDataType.String),
                new FieldDefinition("Image", FieldKind.Attribute, FieldDataType.String),
                new FieldDefinition("ImdbRating", FieldKind.Attribute, FieldDataType.String)
                );

            while (await sourceRows.MoveNext())
            {
                foreach (var row in sourceRows.Current)
                {
                    var movieIds = row["LastMoviesId"].ToString().Split(new[] { "," }, StringSplitOptions.RemoveEmptyEntries);
                    //grabbing only unique movie ID's
                    movieIds = movieIds.Distinct().ToArray();

                    if (!movieIds.Any())
                    {
                        continue;
                    }
                    var recommendations = _movieRecommendationsProvider.GetRecommendations(movieIds);

                    //clean recommendations to not have any dups
                    recommendations = recommendations.GroupBy(m => m.netflixid).Select(g => g.FirstOrDefault()).ToList();
                    if (recommendations.Any())
                    {
                        for (var i = 0; i < _limit; i++)
                        {
                            if (i < recommendations.Count)
                            {
                                var targetRow = new DataRow(targetSchema);
                                targetRow.SetGuid(0, row.GetGuid(0));
                                targetRow.SetString(1, recommendations[i].netflixid);
                                targetRow.SetString(2, recommendations[i].title);
                                targetRow.SetString(3, recommendations[i].synopsis);
                                targetRow.SetString(4, recommendations[i].image);
                                targetRow.SetString(5, recommendations[i].rating);
                                targetRows.Add(targetRow);
                            }
                        }
                    }
                }
            }

            var tableDefinition = new TableDefinition(_targetTableName, targetSchema);
            var targetTable     = new InMemoryTableData(tableDefinition, targetRows);
            await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None);
        }
コード例 #3
0
        public async Task RunAsync(CancellationToken token)
        {
            var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None);

            var targetRows   = new List <DataRow>();
            var targetSchema = new RowSchema(
                new FieldDefinition("ContactId", FieldKind.Key, FieldDataType.Guid),
                new FieldDefinition("MovieId", FieldKind.Key, FieldDataType.Int64),
                new FieldDefinition("Title", FieldKind.Attribute, FieldDataType.String),
                new FieldDefinition("Overview", FieldKind.Attribute, FieldDataType.String),
                new FieldDefinition("PosterPath", FieldKind.Attribute, FieldDataType.String)
                );

            //ML context
            MLContext    mlContext  = new MLContext();
            ITransformer mlModel    = mlContext.Model.Load(GetAbsolutePath(MODEL_FILEPATH), out DataViewSchema inputSchema);
            var          predEngine = mlContext.Model.CreatePredictionEngine <TacoFlixML.Model.DataModels.ModelInput, ModelOutput>(mlModel);

            while (await sourceRows.MoveNext())
            {
                foreach (var row in sourceRows.Current)
                {
                    var movieId         = int.Parse(row["LastMovieId"].ToString());
                    var recommendations = _movieRecommendationsProvider.GetRecommendations(movieId);

                    if (recommendations.Any())
                    {
                        for (var i = 0; i < _limit; i++)
                        {
                            if (i < recommendations.Count)
                            {
                                //ML
                                ModelInput input = new ModelInput();
                                input.Movieid = recommendations[i].Id;
                                ModelOutput predictionResult = predEngine.Predict(input);

                                var targetRow = new DataRow(targetSchema);
                                targetRow.SetGuid(0, row.GetGuid(0));
                                targetRow.SetInt64(1, recommendations[i].Id);
                                targetRow.SetString(2, recommendations[i].Title + "score=" + predictionResult.Score);
                                targetRow.SetString(3, recommendations[i].Overview);
                                targetRow.SetString(4, recommendations[i].PosterPath);

                                targetRows.Add(targetRow);
                            }
                        }
                    }
                }
            }

            var tableDefinition = new TableDefinition(_targetTableName, targetSchema);
            var targetTable     = new InMemoryTableData(tableDefinition, targetRows);
            await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None);
        }
コード例 #4
0
        public TableObserver(Database db, SqlObjectName sqlObjectName, string[] primaryKeyColumns, bool memoryTrigger)
        {
            _db           = db;
            SqlObjectName = sqlObjectName;
            Schema        = new RowSchema(db.Config.DatabaseConnectionString, sqlObjectName, primaryKeyColumns);

            _insertTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "insert", sqlObjectName.Name);
            _deleteTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "delete", sqlObjectName.Name);
            _updateTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "update", sqlObjectName.Name);

            SetupDatabaseSchema(memoryTrigger);
        }
コード例 #5
0
 public override IReadOnlyCollection <Row> Initialize()
 {
     Db.Config.DatabaseConnectionString.WithReader(Query, reader =>
     {
         Schema = new RowSchema(reader, PrimaryKeyColumns);
         _data  = new ReducableDictionary <object[], Row>(Schema.RowKeyEqualityComparer);
         while (reader.Read())
         {
             var row = Schema.ReadRow(reader);
             _data.Add(row.Key, row);
         }
     });
     return(_data.Values.ToArray());
 }
コード例 #6
0
        private RowSchema <TCellType> CreateRowSchema <TCellType>(Func <DataTableColumn, TCellType> cellTemplateSelector)
            where TCellType : TableCellBase <TCellType>
        {
            var schema = new RowSchema <TCellType>();

            var newRow = true;
            List <List <DataTableColumn> > rows = new List <List <DataTableColumn> >();
            List <DataTableColumn>         row  = null;

            foreach (var column in _columns)
            {
                column.ItemTemplate.RowSpan   = 0;
                column.ItemTemplate.ColSpan   = 0;
                column.HeaderTemplate.RowSpan = 0;
                column.HeaderTemplate.ColSpan = 0;
                if (newRow)
                {
                    newRow = false;
                    row    = new List <DataTableColumn>();
                    rows.Add(row);
                }
                if (column.ColSpanBehavior == ColSpanBehavior.Row)
                {
                    foreach (var col in row)
                    {
                        col.HeaderTemplate.RowSpan = Math.Max(2, col.HeaderTemplate.RowSpan + 1);
                        col.ItemTemplate.RowSpan   = Math.Max(2, col.ItemTemplate.RowSpan + 1);
                    }
                    var colSpan = _columns.Count - row.Count - 1;
                    column.HeaderTemplate.ColSpan = colSpan;
                    column.ItemTemplate.ColSpan   = colSpan;
                    newRow = true;
                }
                row.Add(column);
            }

            foreach (var rowItems in rows)
            {
                var tableRow = new List <TCellType>();
                schema.Add(tableRow);
                foreach (var col in rowItems)
                {
                    var cell = cellTemplateSelector(col);
                    tableRow.Add(cell);
                }
            }

            return(schema);
        }
コード例 #7
0
        public async Task RunAsync(CancellationToken token)
        {
            // Retrieve the source data from the projection.
            var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None);

            // Define the target table shcema we'll be populating into.
            var targetRows   = new List <DataRow>();
            var targetSchema = new RowSchema(
                new FieldDefinition("ContactID", FieldKind.Key, FieldDataType.Guid),
                new FieldDefinition("AlbumID", FieldKind.Key, FieldDataType.String),
                new FieldDefinition("AlbumName", FieldKind.Attribute, FieldDataType.String),
                new FieldDefinition("ArtistID", FieldKind.Attribute, FieldDataType.String),
                new FieldDefinition("ArtistName", FieldKind.Attribute, FieldDataType.String)
                );

            // Iterate the source data.
            while (await sourceRows.MoveNext())
            {
                foreach (var row in sourceRows.Current)
                {
                    // Retrieve the IDs of the album artists from the projected data.
                    var artistIds = row["Artists"].ToString().Split(new[] { "," }, StringSplitOptions.RemoveEmptyEntries);

                    // Call the recomender service.
                    var recommendations = await _musicRecommender.GetRecommendationsAsync(artistIds, _limit);

                    // Add a target data row for each recommendation.
                    foreach (var album in recommendations)
                    {
                        var targetRow = new DataRow(targetSchema);
                        targetRow.SetGuid(0, row.GetGuid(0));
                        targetRow.SetString(1, album.AlbumId);
                        targetRow.SetString(2, album.AlbumName);
                        targetRow.SetString(3, album.ArtistId);
                        targetRow.SetString(4, album.ArtistName);

                        targetRows.Add(targetRow);
                    }
                }
            }

            // Populate the rows into the target table.
            var tableDefinition = new TableDefinition(_targetTableName, targetSchema);
            var targetTable     = new InMemoryTableData(tableDefinition, targetRows);
            await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None);
        }
コード例 #8
0
ファイル: DataFrameSamples.cs プロジェクト: hhland/SparkCLR
        /// <summary>
        /// Verify the schema of people dataframe.
        /// </summary>
        /// <param name="schema"> RowSchema of people DataFrame </param>
        internal static void VerifySchemaOfPeopleDataFrame(RowSchema schema)
        {
            Assert.IsNotNull(schema);
            Assert.AreEqual("struct", schema.type);
            Assert.IsNotNull(schema.columns);
            Assert.AreEqual(4, schema.columns.Count);

            // name
            var nameColSchema = schema.columns.Find(c => c.name.Equals("name"));

            Assert.IsNotNull(nameColSchema);
            Assert.AreEqual("name", nameColSchema.name);
            Assert.IsTrue(nameColSchema.nullable);
            Assert.AreEqual("string", nameColSchema.type.ToString());

            // id
            var idColSchema = schema.columns.Find(c => c.name.Equals("id"));

            Assert.IsNotNull(idColSchema);
            Assert.AreEqual("id", idColSchema.name);
            Assert.IsTrue(idColSchema.nullable);
            Assert.AreEqual("string", nameColSchema.type.ToString());

            // age
            var ageColSchema = schema.columns.Find(c => c.name.Equals("age"));

            Assert.IsNotNull(ageColSchema);
            Assert.AreEqual("age", ageColSchema.name);
            Assert.IsTrue(ageColSchema.nullable);
            Assert.AreEqual("long", ageColSchema.type.ToString());

            // address
            var addressColSchema = schema.columns.Find(c => c.name.Equals("address"));

            Assert.IsNotNull(addressColSchema);
            Assert.AreEqual("address", addressColSchema.name);
            Assert.IsTrue(addressColSchema.nullable);
            Assert.IsNotNull(addressColSchema.type);
            Assert.AreEqual("struct", addressColSchema.type.type);
            Assert.IsNotNull(addressColSchema.type.columns.Find(c => c.name.Equals("state")));
            Assert.IsNotNull(addressColSchema.type.columns.Find(c => c.name.Equals("city")));
        }
コード例 #9
0
ファイル: DataFrameSamples.cs プロジェクト: hhland/SparkCLR
        /// <summary>
        /// Print given rows from people dataframe, and verify schema and contents if validation is enabled.
        /// </summary>
        /// <param name="rows"> Rows from people DataFrame </param>
        /// <param name="num"> Expected number of rows from people DataFrame </param>
        internal static void PrintAndVerifyPeopleDataFrameRows(IEnumerable <Row> rows, int num)
        {
            Console.WriteLine("peopleDataFrame:");

            var       count    = 0;
            RowSchema schema   = null;
            Row       firstRow = null;

            foreach (var row in rows)
            {
                if (count == 0)
                {
                    firstRow = row;

                    schema = row.GetSchema();
                    Console.WriteLine("schema: {0}", schema);
                }

                // output each row
                Console.WriteLine(row);
                Console.Write("id: {0}, name: {1}, age: {2}", row.GetAs <string>("id"), row.GetAs <string>("name"), row.GetAs <int>("age"));

                var address = row.GetAs <Row>("address");
                if (address != null)
                {
                    Console.WriteLine(", state: {0}, city: {1}", address.GetAs <string>("state"), address.GetAs <string>("city"));
                }
                else
                {
                    Console.WriteLine();
                }

                count++;
            }

            if (SparkCLRSamples.Configuration.IsValidationEnabled)
            {
                Assert.AreEqual(count, num);
                VerifySchemaOfPeopleDataFrame(schema);
                VerifyFirstRowOfPeopleDataFrame(firstRow);
            }
        }
コード例 #10
0
        public async Task RunAsync(CancellationToken token)
        {
            var sourceRows = await _tableStore.GetRowsAsync(_sourceTableName, CancellationToken.None);

            var targetRows   = new List <DataRow>();
            var targetSchema = new RowSchema(
                new FieldDefinition("ContactId", FieldKind.Key, FieldDataType.Guid),
                new FieldDefinition("ProductId", FieldKind.Key, FieldDataType.Int64),
                new FieldDefinition("Score", FieldKind.Attribute, FieldDataType.Double)
                );

            while (await sourceRows.MoveNext())
            {
                foreach (var row in sourceRows.Current)
                {
                    var purchasedProductId = uint.Parse(row["ProductId"].ToString());

                    var recommendations = GetRecommendList(purchasedProductId, _limit);

                    if (recommendations.Any())
                    {
                        for (var i = 0; i < _limit; i++)
                        {
                            var targetRow = new DataRow(targetSchema);
                            targetRow.SetGuid(0, row.GetGuid(0));
                            targetRow.SetInt64(1, recommendations[i].RecommendedItemId);
                            targetRow.SetDouble(2, recommendations[i].Score);

                            targetRows.Add(targetRow);
                        }
                    }
                }
            }

            var tableDefinition = new TableDefinition(_targetTableName, targetSchema);
            var targetTable     = new InMemoryTableData(tableDefinition, targetRows);
            await _tableStore.PutTableAsync(targetTable, TimeSpan.FromMinutes(30), CancellationToken.None);
        }
コード例 #11
0
        public override IReadOnlyCollection <Row> Initialize()
        {
            var connectionString = Db.Config.DatabaseConnectionString;

            Schema = new RowSchema(connectionString, View.ViewName, PrimaryKeyColumns);

            CreateCacheTable(connectionString, View.ViewName, _cacheTableName, PrimaryKeyColumns);
            //Db.Config.DatabaseConnectionString.CreateMemoryViewCache(_view.ViewName,_cacheTableName,PrimaryKeyColumns);
            DisposeHelper.Attach(() => connectionString.WithConnection(con => con.ExecuteNonQuery($"DROP TABLE {_cacheTableName}")));

            _mergeSelectViewChanges = new MergeSelectViewChanges(_cacheTableName, View.ViewName, Schema.ColumnNames.Values.ToArray(), PrimaryKeyColumns, PrimaryKeyColumns);

            List <Row> rows = new List <Row>();

            connectionString.WithReader($"SELECT * FROM {_cacheTableName}", r =>
            {
                while (r.Read())
                {
                    rows.Add(Schema.ReadRow(r));
                }
            });
            return(rows);
        }
コード例 #12
0
        /// <summary>
        /// Initialize a new instance of <see cref="TableObserver"/>
        /// </summary>
        /// <param name="db">The database that contains table</param>
        /// <param name="sqlObjectName">The name of the table</param>
        public TableObserver(Database db, SqlObjectName sqlObjectName)
        {
            _db           = db;
            SqlObjectName = sqlObjectName;
            Schema        = new RowSchema(db.Config.DatabaseConnectionString, sqlObjectName);

            _insertTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "insert", sqlObjectName.Name);
            _deleteTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "delete", sqlObjectName.Name);
            _updateTriggerName = string.Format(db.Config.TriggerNameTemplate, sqlObjectName.Schema, "update", sqlObjectName.Name);

            bool isMemoryTable = false;

            db.Config.DatabaseConnectionString.WithConnection(con =>
            {
                using (var command = con.CreateCommand())
                {
                    command.CommandText = "SELECT is_memory_optimized FROM sys.tables WHERE name = @name And schema_id = schema_id(@schema)";
                    command.Parameters.AddWithValue("name", sqlObjectName.Name);
                    command.Parameters.AddWithValue("schema", sqlObjectName.Schema);
                    isMemoryTable = (bool)command.ExecuteScalar();
                }
            });
            SetupDatabaseSchema(isMemoryTable);
        }
コード例 #13
0
        /// <summary>
        /// Verify the schema of people dataframe.
        /// </summary>
        /// <param name="schema"> RowSchema of people DataFrame </param>
        internal static void VerifySchemaOfPeopleDataFrame(RowSchema schema)
        {
            Assert.IsNotNull(schema);
            Assert.AreEqual("struct", schema.type);
            Assert.IsNotNull(schema.columns);
            Assert.AreEqual(4, schema.columns.Count);

            // name
            var nameColSchema = schema.columns.Find(c => c.name.Equals("name"));
            Assert.IsNotNull(nameColSchema);
            Assert.AreEqual("name", nameColSchema.name);
            Assert.IsTrue(nameColSchema.nullable);
            Assert.AreEqual("string", nameColSchema.type.ToString());

            // id
            var idColSchema = schema.columns.Find(c => c.name.Equals("id"));
            Assert.IsNotNull(idColSchema);
            Assert.AreEqual("id", idColSchema.name);
            Assert.IsTrue(idColSchema.nullable);
            Assert.AreEqual("string", nameColSchema.type.ToString());

            // age
            var ageColSchema = schema.columns.Find(c => c.name.Equals("age"));
            Assert.IsNotNull(ageColSchema);
            Assert.AreEqual("age", ageColSchema.name);
            Assert.IsTrue(ageColSchema.nullable);
            Assert.AreEqual("long", ageColSchema.type.ToString());

            // address
            var addressColSchema = schema.columns.Find(c => c.name.Equals("address"));
            Assert.IsNotNull(addressColSchema);
            Assert.AreEqual("address", addressColSchema.name);
            Assert.IsTrue(addressColSchema.nullable);
            Assert.IsNotNull(addressColSchema.type);
            Assert.AreEqual("struct", addressColSchema.type.type);
            Assert.IsNotNull(addressColSchema.type.columns.Find(c => c.name.Equals("state")));
            Assert.IsNotNull(addressColSchema.type.columns.Find(c => c.name.Equals("city")));
        }
コード例 #14
0
ファイル: DataFrame.cs プロジェクト: jango2015/SparkCLR
 private RowSchema GetRowSchema()
 {
     if (rowSchema == null)
     {
         string json = Schema.ToJson();
         rowSchema = RowSchema.ParseRowSchemaFromJson(json);
     }
     return rowSchema;
 }
コード例 #15
0
ファイル: DataFrame.cs プロジェクト: jango2015/SparkCLR
        private List<Row> Collect(int port, RowSchema dataType)
        {
            List<Row> items = new List<Row>();
            IFormatter formatter = new BinaryFormatter();
            Unpickler unpickler = new Unpickler();
            Socket sock = new Socket(SocketType.Stream, ProtocolType.Tcp);
            sock.Connect("127.0.0.1", port);
            NetworkStream ns = new NetworkStream(sock);

            using (BinaryReader br = new BinaryReader(ns))
            {
                byte[] buffer = null;
                do
                {
                    buffer = br.ReadBytes(4);
                    if (buffer != null && buffer.Length > 0)
                    {
                        //In JVM, Multibyte data items are always stored in big-endian order, where the high bytes come first
                        //http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html
                        //So we should handle revert buffer if our system is little-endian
                        //https://msdn.microsoft.com/en-us/library/system.bitconverter(v=vs.110).aspx
                        if (BitConverter.IsLittleEndian)
                        {
                            Array.Reverse(buffer);
                        }

                        int len = BitConverter.ToInt32(buffer, 0);
                        byte[] data = br.ReadBytes(len);
                        foreach (var item in (unpickler.loads(data) as object[]))
                        {
                            RowImpl row = new RowImpl(item, dataType);
                            items.Add(row);
                        }
                    }
                } while (buffer != null && buffer.Length > 0);
            }

            return items;
        }