Ejemplo n.º 1
0
        public void TestInitParameters()
        {
            // uninitialized Params
            List <DfModels.LinkedService> linkedServices = null;
            List <DfModels.Dataset>       datasets       = null;

            DfModels.Activity         activity = null;
            DfRuntime.IActivityLogger logger   = new ActivityLogger();

            // testdata
            var pipelinePath = Path.GetFullPath(@"..\..\Data\logs-etl-pipeline.json");
            var activityName = "stage";

            Assert.IsTrue(File.Exists(pipelinePath));

            // do run the wizard
            Wizard.InitParameters(pipelinePath, activityName,
                                  out linkedServices, out datasets, out activity);

            Assert.AreEqual(2, linkedServices.Count);
            Assert.AreEqual(3, datasets.Count);

            DfModels.Dataset importDataset = datasets.Where(dataset => dataset.Name == "import-month-dataset").Single();
            Assert.IsInstanceOfType(importDataset.Properties.TypeProperties, typeof(DfModels.AzureBlobDataset));
            DfModels.Dataset factDataset = datasets.Where(dataset => dataset.Name == "fact-month-dataset").Single();
            Assert.IsInstanceOfType(factDataset.Properties.TypeProperties, typeof(DfModels.AzureBlobDataset));
            DfModels.Dataset factTable = datasets.Where(dataset => dataset.Name == "fact-month-table").Single();
            Assert.IsInstanceOfType(factTable.Properties.TypeProperties, typeof(DfModels.AzureTableDataset));

            // run the activity
            DfRuntime.IDotNetActivity testedActivity = new TestActivity();
            testedActivity.Execute(linkedServices, datasets, activity, logger);
        }
Ejemplo n.º 2
0
        public BlobLocation(LinkedService linkedService, Dataset dataset, string sliceYear, string sliceMonth,
            string sliceDay)
        {
            _linkedService = linkedService;
            _dataset = dataset;
            _sliceYear = sliceYear;
            _sliceMonth = sliceMonth;
            _sliceDay = sliceDay;


            
        }
Ejemplo n.º 3
0
        public PSDataset(Dataset dataset)
        {
            if (dataset == null)
            {
                throw new ArgumentNullException("dataset");
            }

            if (dataset.Properties == null)
            {
                dataset.Properties = new DatasetProperties();
            }

            this.dataset = dataset;
        }
Ejemplo n.º 4
0
        // ToDo: enable the tests when we can set readonly provisioning state in test
        //[Fact]
        //[Trait(Category.AcceptanceType, Category.CheckIn)]
        public void CanCreateDataset()
        {
            // Arrange
            Dataset expected = new Dataset()
            {
                Name = datasetName,
                Properties = new DatasetProperties()
            };

            dataFactoriesClientMock.Setup(c => c.ReadJsonFileContent(It.IsAny<string>()))
                .Returns(rawJsonContent)
                .Verifiable();

            dataFactoriesClientMock.Setup(
                c =>
                    c.CreatePSDataset(
                        It.Is<CreatePSDatasetParameters>(
                            parameters =>
                                parameters.Name == datasetName &&
                                parameters.ResourceGroupName == ResourceGroupName &&
                                parameters.DataFactoryName == DataFactoryName)))
                .CallBase()
                .Verifiable();

            dataFactoriesClientMock.Setup(
                c =>
                    c.CreateOrUpdateDataset(ResourceGroupName, DataFactoryName, datasetName, rawJsonContent))
                .Returns(expected)
                .Verifiable();
            
            // Action
            cmdlet.File = filePath;
            cmdlet.Force = true;
            cmdlet.ExecuteCmdlet();

            // Assert
            dataFactoriesClientMock.VerifyAll();

            commandRuntimeMock.Verify(
                f =>
                    f.WriteObject(
                        It.Is<PSDataset>(
                            tbl =>
                                ResourceGroupName == tbl.ResourceGroupName &&
                                DataFactoryName == tbl.DataFactoryName &&
                                expected.Name == tbl.DatasetName)),
                Times.Once());
        }
        public CustomDbDatasetProvider(Dataset dataset, LinkedService linkedService, Func<string, LinkedService> linkedServiceResolver)
        {
            var props = dataset.Properties.TypeProperties as CustomDataset;
            var packageLnkedServiceName = props.ServiceExtraProperties["packageLinkedService"].ToString();
            Dataset = new CustomDbDataset
            {
                InstanceName = props.ServiceExtraProperties["instanceName"].ToString(),
                DbContextName = props.ServiceExtraProperties["dbContextName"].ToString(),
                AssemblyFile = props.ServiceExtraProperties["assemblyFile"].ToString()
            };
            InstanceName = Dataset.InstanceName;

            var blob = Helpers.GetBlob(linkedServiceResolver(packageLnkedServiceName), Dataset.AssemblyFile);
            var path = Path.Combine(Environment.CurrentDirectory, Path.GetFileName(Dataset.AssemblyFile));
            blob.DownloadToFile(path, FileMode.Create);
            var m = Assembly.LoadFrom(path);
            Context = (DbContext)Activator.CreateInstance(m.GetType(Dataset.DbContextName), (linkedService.Properties.TypeProperties as AzureSqlDatabaseLinkedService).ConnectionString);
        }
Ejemplo n.º 6
0
        public AzureBlobProvider(Dataset dataset, LinkedService linkedService, Slice slice)
        {
            InstanceName = dataset.Name;
            Structure = (dataset.Properties as DatasetProperties).Structure;
            var azblobDataset = dataset.Properties.TypeProperties as AzureBlobDataset;
            var filepath = Path.Combine(azblobDataset.FolderPath, azblobDataset.FileName);
            filepath = Helpers.ReplaceByPatition(filepath, azblobDataset.PartitionedBy, slice);

            Blob = Helpers.GetBlob(linkedService, filepath);
            var format = azblobDataset.Format as TextFormat;
            if (format != null)
                Configuration = new CsvConfiguration
                {
                    Delimiter = format.ColumnDelimiter,
                    Encoding = Encoding.GetEncoding(format.EncodingName)
                };
            else
                Configuration = new CsvConfiguration { };
        }
        public CustomAzureBlobProvider(Dataset dataset, LinkedService linkedService, Slice slice)
        {
            var props = dataset.Properties.TypeProperties as CustomDataset;
            if (props.ServiceExtraProperties.ContainsKey("instanceName"))
                InstanceName = props.ServiceExtraProperties["instanceName"].ToString();
            var enc = props.ServiceExtraProperties.ContainsKey("encoding") 
                ? props.ServiceExtraProperties["encoding"].ToString() : null;
            if (!string.IsNullOrEmpty(enc))
            {
                try
                {
                    Encoding = Encoding.GetEncoding(enc);
                }
                catch (ArgumentException){}
            }
            
            var filepath = props.ServiceExtraProperties["filePath"].ToString();
            var regex = new Regex(@"\{(.+?)\}");
            var matches = regex.Matches(filepath);
            foreach(Match m in matches)
            {
                var key = m.Groups[1].Value;
                if (props.ServiceExtraProperties.ContainsKey(key))
                {
                    var formatAndName = props.ServiceExtraProperties[key].ToString().Split(',').Select(_ => _.Trim()).ToArray();
                    var value = formatAndName[1].ToLower() == "slicestart" ? slice.Start.ToString(formatAndName[0])
                              : formatAndName[1].ToLower() == "sliceend" ? slice.End.ToString(formatAndName[0])
                              : "";
                    filepath = filepath.Replace($"{{{formatAndName[1]}}}", value);
                }
            }
            Blob = Helpers.GetBlob(linkedService, filepath);

            var format = props.ServiceExtraProperties["format"];
            Format = format == null ? CustomAzureBlobFormat.Unknown
                   : format.ToString().ToLower() == "json" ? CustomAzureBlobFormat.Json
                   : CustomAzureBlobFormat.Unknown;

            if (props.ServiceExtraProperties.ContainsKey("test"))
                Console.Write(props.ServiceExtraProperties["test"].ToString());
        }
 public static bool IsMatch(Dataset dataset, LinkedService linkedService)
 {
     return dataset.Properties.Type == "CustomDataset" && linkedService.Properties.Type == "AzureSqlDatabase";
 }
Ejemplo n.º 9
0
        public void CanThrowIfDatasetProvisioningFailed()
        {
            // Arrange
            Dataset expected = new Dataset()
            {
                Name = datasetName,
                Properties = new DatasetProperties()
            };

            dataFactoriesClientMock.Setup(c => c.ReadJsonFileContent(It.IsAny<string>()))
                .Returns(rawJsonContent)
                .Verifiable();

            dataFactoriesClientMock.Setup(
                c =>
                    c.CreatePSDataset(
                        It.Is<CreatePSDatasetParameters>(
                            parameters =>
                                parameters.Name == datasetName &&
                                parameters.ResourceGroupName == ResourceGroupName &&
                                parameters.DataFactoryName == DataFactoryName)))
                .CallBase()
                .Verifiable();

            dataFactoriesClientMock.Setup(
                c =>
                    c.CreateOrUpdateDataset(ResourceGroupName, DataFactoryName, datasetName, rawJsonContent))
                .Returns(expected)
                .Verifiable();

            // Action
            cmdlet.File = filePath;
            cmdlet.Force = true;
            
            // Assert
            Assert.Throws<ProvisioningFailedException>(() => cmdlet.ExecuteCmdlet());
        }
Ejemplo n.º 10
0
        static void Main(string[] args)
        {
            var customActivity = new MongoDbDumpTransformActivity();

            var config =
                JObject.Parse(
                    File.ReadAllText(@"..\..\..\..\..\LocalSettings-DataFactory.json"));

            var linkedServices = new List<LinkedService>()
            {   new LinkedService("GHTorrentAzureStorage",
                    new LinkedServiceProperties(new CustomDataSourceLinkedService(JObject.Parse(String.Format("{{\"sasUri\": \"{0}\"}}", config["GHTorrentAzureStorage"][0]["value"].Value<string>())))))
                ,new LinkedService("GHInsightsAzureStorage",
                    new LinkedServiceProperties(new AzureStorageLinkedService(config["GHInsightsAzureStorage"][0]["value"].Value<string>())))
            };
            
            var mongoDbDump = new Dataset("MongoDbDump",
                new DatasetProperties(new AzureBlobDataset()
                {
                    FolderPath = "downloads/",
                    FileName = "mongo-dump-{Year}-{Month}-{Day}.tar.gz",
                    PartitionedBy = new List<Partition>()
                    {
                        {
                            new Partition()
                            {
                                Name = "Year",
                                Value = new DateTimePartitionValue() {Date = "SliceStart", Format = "yyyy"}
                            }
                        },
                        {
                            new Partition()
                            {
                                Name = "Month",
                                Value = new DateTimePartitionValue() {Date = "SliceStart", Format = "MM"}
                            }
                        },
                        {
                            new Partition()
                            {
                                Name = "Day",
                                Value = new DateTimePartitionValue() {Date = "SliceStart", Format = "dd"}
                            }
                        }
                    }
                },
                    new Availability("Daily", 1), "GHTorrentAzureStorage"));

            var eventDetailRawFilesBlob = new Dataset("EventDetail",
                new DatasetProperties(new AzureBlobDataset()
                {
                   FolderPath = @"test/{EventName}/v1/{Year}/{Month}",
                   FileName = "{EventName}_{Year}_{Month}_{Day}.json.gz",
                    PartitionedBy = new List<Partition>()
                    {
                        {
                            new Partition()
                            {
                                Name = "Year",
                                Value = new DateTimePartitionValue() {Date = "SliceStart", Format = "yyyy"}
                            }
                        },
                        {
                            new Partition()
                            {
                                Name = "Month",
                                Value = new DateTimePartitionValue() {Date = "SliceStart", Format = "MM"}
                            }
                        },
                        {
                            new Partition()
                            {
                                Name = "Day",
                                Value = new DateTimePartitionValue() {Date = "SliceStart", Format = "dd"}
                            }
                        }
                    }
                },
                    new Availability("Daily", 1), "GHInsightsAzureStorage"));
            

            var datasets = new List<Dataset>() { mongoDbDump, eventDetailRawFilesBlob };


            var activity = new Activity()
            {
                Description = "Fake Activity",
                Inputs = new ActivityInput[] {new ActivityInput("MongoDbDump")},
                LinkedServiceName = "BatchProcessor",
                Name = "GHTorrentEventDetailPipeline",
                Outputs = new ActivityOutput[] {new ActivityOutput("EventDetail") },
                Policy =
                    new ActivityPolicy()
                    {
                        Concurrency = 4,
                        ExecutionPriorityOrder = "NewestFirst",
                        Retry = 3,
                        Timeout = TimeSpan.Parse("04:00:00")
                    },
                Scheduler = new Scheduler("Day", 1),
                TypeProperties = new DotNetActivity("DataFactoryLib.dll"
                    , "DataFactoryLib.MongoDbDumpTransformActivity"
                    , "datafactory/DataFactoryLib.zip"
                    , "GHInsightsAzureStorage")
                {
                    ExtendedProperties = new Dictionary<string, string>()
                    {
                        {"Year", "2015"},
                        {"Month", "12"},
                        {"Day", "02"}
                    }
                }
            };

            IActivityLogger logger = new DebugLogger();



            customActivity.Execute(linkedServices, datasets, activity, logger);
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Gets the folderPath value from the input/output dataset.
        /// </summary>
        private static string GetFolderPath(Dataset dataArtifact)
        {
            if (dataArtifact == null || dataArtifact.Properties == null)
            {
                return null;
            }

            AzureBlobDataset blobDataset = dataArtifact.Properties.TypeProperties as AzureBlobDataset;
            if (blobDataset == null)
            {
                return null;
            }

            return blobDataset.FolderPath;
        }
Ejemplo n.º 12
0
 public static bool IsMatch(Dataset dataset, LinkedService linkedService)
 {
     return dataset.Properties.Type == "CustomDataset" 
         && linkedService.Properties.Type == "AzureStorage"
         && ((CustomDataset)dataset.Properties.TypeProperties).ServiceExtraProperties["type"].ToString() == "AzureBlob";
 }
Ejemplo n.º 13
0
 public PSDataset()
 {
     this.dataset = new Dataset() { Properties = new DatasetProperties() };
 }
Ejemplo n.º 14
0
        public static void InitParameters(
            string pipelinePath,
            string activityName,
            out List <Models.LinkedService> linkedServices,
            out List <Models.Dataset> datasets,
            out Models.Activity activity)
        {
            // init the parameters
            linkedServices = new List <Models.LinkedService>();
            datasets       = new List <Models.Dataset>();
            activity       = new Models.Activity();

            // parse the pipeline json source
            var pipelineJson  = File.ReadAllText(pipelinePath);
            var dummyPipeline = JsonConvert.DeserializeObject <Dummy.Pipeline>(pipelineJson);

            foreach (var dummyActivity in dummyPipeline.Properties.Activities)
            {
                // find the relevant activity in the pipeline
                if (dummyActivity.Name != activityName)
                {
                    continue;
                }

                activity.Name = dummyActivity.Name;

                // get the input and output tables
                var dummyDatasets = new HashSet <Dummy.ActivityData>();
                dummyDatasets.UnionWith(dummyActivity.Inputs);
                dummyDatasets.UnionWith(dummyActivity.Outputs);

                var dummyServices = new HashSet <Dummy.LinkedService>();

                // init the data tables
                foreach (var dummyDataset in dummyDatasets)
                {
                    // parse the table json source
                    var dataPath   = Path.Combine(Path.GetDirectoryName(pipelinePath), dummyDataset.Name + ".json");
                    var dataJson   = File.ReadAllText(dataPath);
                    var dummyTable = JsonConvert.DeserializeObject <Dummy.Table>(dataJson);

                    {
                        // initialize dataset properties
                        Models.DatasetTypeProperties datasetProperties;
                        switch (dummyTable.Properties.Type)
                        {
                        case "AzureBlob":
                            // init the azure model
                            var blobDataset = new Models.AzureBlobDataset();
                            blobDataset.FolderPath = dummyTable.Properties.TypeProperties.FolderPath;
                            blobDataset.FileName   = dummyTable.Properties.TypeProperties.FileName;
                            datasetProperties      = blobDataset;
                            break;

                        case "AzureTable":
                            var tableDataset = new Models.AzureTableDataset();
                            tableDataset.TableName = dummyTable.Properties.TypeProperties.TableName;
                            datasetProperties      = tableDataset;
                            break;

                        default:
                            throw new Exception(string.Format("Unexpected Dataset.Type {0}", dummyTable.Properties.Type));
                        }

                        // initialize dataset
                        {
                            var dataDataset = new Models.Dataset(
                                dummyDataset.Name,
                                new Models.DatasetProperties(
                                    datasetProperties,
                                    new CommonModels.Availability(),
                                    ""
                                    )
                                );
                            dataDataset.Properties.LinkedServiceName = dummyTable.Properties.LinkedServiceName;
                            datasets.Add(dataDataset);
                        }
                    }

                    // register the input or output in the activity
                    if (dummyDataset is Dummy.ActivityInput)
                    {
                        activity.Inputs.Add(new CommonModels.ActivityInput(dummyDataset.Name));
                    }

                    if (dummyDataset is Dummy.ActivityOutput)
                    {
                        activity.Outputs.Add(new CommonModels.ActivityOutput(dummyDataset.Name));
                    }

                    // parse the linked service json source for later use
                    var servicePath    = Path.Combine(Path.GetDirectoryName(pipelinePath), dummyTable.Properties.LinkedServiceName + ".json");
                    var serviceJson    = File.ReadAllText(servicePath);
                    var storageService = JsonConvert.DeserializeObject <Dummy.StorageService>(serviceJson);

                    dummyServices.Add(storageService);
                }

                // parse the hd insight service json source
                {
                    var servicePath    = Path.Combine(Path.GetDirectoryName(pipelinePath), dummyActivity.LinkedServiceName + ".json");
                    var serviceJson    = File.ReadAllText(servicePath);
                    var computeService = JsonConvert.DeserializeObject <Dummy.ComputeService>(serviceJson);

                    dummyServices.Add(computeService);
                }

                // init the services
                foreach (var dummyService in dummyServices)
                {
                    Models.LinkedService linkedService = null;

                    // init if it is a storage service
                    if (dummyService is Dummy.StorageService)
                    {
                        var dummyStorageService = dummyService as Dummy.StorageService;

                        var service = new Models.AzureStorageLinkedService();
                        service.ConnectionString = dummyStorageService.Properties.TypeProperties.ConnectionString;
                        linkedService            = new Models.LinkedService(
                            dummyService.Name,
                            new Models.LinkedServiceProperties(service)
                            );
                    }

                    // init if it is a hd insight service
                    if (dummyService is Dummy.ComputeService)
                    {
                        var service = new Models.HDInsightLinkedService();
                        linkedService = new Models.LinkedService(
                            dummyService.Name,
                            new Models.LinkedServiceProperties(service)
                            );
                    }

                    linkedServices.Add(linkedService);
                }
            }

            if (activity.Name == null)
            {
                throw new Exception(string.Format("Activity {0} not found.", activityName));
            }
        }