Ejemplo n.º 1
0
        /// <summary>
        /// Execute method is the only method of IDotNetActivity interface you must implement.
        /// In this sample, the method invokes the Calculate method to perform the core logic.
        /// </summary>

        public IDictionary <string, string> Execute(
            IEnumerable <LinkedService> linkedServices,
            IEnumerable <Dataset> datasets,
            Activity activity,
            IActivityLogger logger)
        {
            /////////////////
            // Log input parameters

            // to get extended properties (for example: SliceStart)
            foreach (LinkedService ls in linkedServices)
            {
                logger.Write("linkedServices: {0}, {1}, {2}, {3}", ls.Name, ls.Properties.Type,
                             ls.Properties.Description, ls.Properties.ErrorMessage);
            }



            var sliceYear  = ((DotNetActivity)activity.TypeProperties).ExtendedProperties["Year"];
            var sliceMonth = ((DotNetActivity)activity.TypeProperties).ExtendedProperties["Month"];
            var sliceDay   = ((DotNetActivity)activity.TypeProperties).ExtendedProperties["Day"];

            logger.Write("dataSlice: {0}-{1}-{2}", sliceYear, sliceMonth, sliceDay);


            /////////////////
            // Open up input Blob


            var inputDataset       = datasets.Single(dataset => dataset.Name == activity.Inputs.Single().Name);
            var inputLinkedService = linkedServices.Single(
                linkedService =>
                linkedService.Name ==
                inputDataset.Properties.LinkedServiceName);

            var inputLocation = new BlobLocation(inputLinkedService, inputDataset, sliceYear, sliceMonth, sliceDay);


            var inputContainer = new CloudBlobContainer(inputLocation.ConnectionSasUri);
            var sourceBlob     = inputContainer.GetBlobReferenceFromServer(inputLocation.BlobFullPath);

            ////////////////
            // Get output location

            var outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name);

            var outputLinkedService = linkedServices.Single(
                linkedService =>
                linkedService.Name ==
                outputDataset.Properties.LinkedServiceName);

            var outputLocation = new BlobLocation(outputLinkedService, outputDataset, sliceYear, sliceMonth, sliceDay);

            CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(outputLocation.ConnectionString);
            CloudBlobClient     outputClient         = outputStorageAccount.CreateCloudBlobClient();
            var outContainer = outputClient.GetContainerReference(outputLocation.ContainerName);

            outContainer.CreateIfNotExists();

            //format output path string
            var outputFilenameFormatString = outputLocation.BlobFullPath;


            using (var sourceBlobStream = sourceBlob.OpenRead())
                using (var unZipStream = new System.IO.Compression.GZipStream(sourceBlobStream, System.IO.Compression.CompressionMode.Decompress))
                    using (var tarStream = new TarStream(unZipStream))
                    {
                        logger.Write("BlobRead: {0}/{1}", inputLocation.ContainerName, inputLocation.BlobFullPath);
                        while (tarStream.NextFile())
                        {
                            var tableName          = Path.GetFileNameWithoutExtension(tarStream.CurrentFilename);
                            var taredFileExtention = Path.GetExtension(tarStream.CurrentFilename);

                            if (taredFileExtention == ".bson")
                            {
                                int             fileNumber    = 0;
                                var             outputBlob    = outContainer.GetBlockBlobReference(outputFilenameFormatString.Replace("{EventName}", tableName).Replace("{Number}", fileNumber.ToString("D4")));
                                CloudBlobStream outBlobStream = null;
                                GZipStream      gzipOut       = null;
                                StreamWriter    outText       = null;

                                try
                                {
                                    outBlobStream = outputBlob.OpenWrite();
                                    gzipOut       = new GZipStream(outBlobStream, System.IO.Compression.CompressionLevel.Optimal);
                                    outText       = new StreamWriter(gzipOut, Encoding.UTF8);
                                    using (var reader = new BsonReader(tarStream))
                                    {
                                        logger.Write("BlobWrite: {0}/{1}", outputLocation.ContainerName, outputBlob.Name);

                                        reader.CloseInput = false;

                                        var jsonSerializer = new JsonSerializer();


                                        reader.ReadRootValueAsArray   = false;
                                        reader.SupportMultipleContent = true;

                                        while (reader.Read())
                                        {
                                            var row = (JObject)jsonSerializer.Deserialize(reader);

                                            var outString = row.ToString(Formatting.None);

                                            outText.WriteLine(outString);

                                            if (outBlobStream.Position > 1024 * 1024 * 1024)
                                            {
                                                outText.Close();
                                                gzipOut.Close();
                                                outBlobStream.Close();

                                                fileNumber++;

                                                outputBlob = outContainer.GetBlockBlobReference(outputFilenameFormatString.Replace("{EventName}", tableName).Replace("{Number}", fileNumber.ToString("D4")));
                                                logger.Write("BlobWrite: {0}/{1}", outputLocation.ContainerName, outputBlob.Name);

                                                outBlobStream = outputBlob.OpenWrite();
                                                gzipOut       = new GZipStream(outBlobStream, System.IO.Compression.CompressionLevel.Optimal);
                                                outText       = new StreamWriter(gzipOut, Encoding.UTF8);
                                            }
                                        }
                                    }
                                } finally
                                {
                                    if (outText != null)
                                    {
                                        outText.Close();
                                    }
                                    if (gzipOut != null)
                                    {
                                        gzipOut.Close();
                                    }
                                    if (outBlobStream != null)
                                    {
                                        outBlobStream.Close();
                                    }
                                }
                            }
                        }
                        ;
                    }



            // return a new Dictionary object
            return(new Dictionary <string, string>());
        }
        /// <summary>
        /// Execute method is the only method of IDotNetActivity interface you must implement. 
        /// In this sample, the method invokes the Calculate method to perform the core logic.  
        /// </summary>

        public IDictionary<string, string> Execute(
            IEnumerable<LinkedService> linkedServices,
            IEnumerable<Dataset> datasets,
            Activity activity,
            IActivityLogger logger)
        {
            
            /////////////////
            // Log input parameters

            // to get extended properties (for example: SliceStart)
            foreach (LinkedService ls in linkedServices)
            {
                logger.Write("linkedServices: {0}, {1}, {2}, {3}", ls.Name, ls.Properties.Type,
                    ls.Properties.Description, ls.Properties.ErrorMessage);
            }



            var sliceYear = ((DotNetActivity)activity.TypeProperties).ExtendedProperties["Year"];
            var sliceMonth = ((DotNetActivity)activity.TypeProperties).ExtendedProperties["Month"];
            var sliceDay = ((DotNetActivity)activity.TypeProperties).ExtendedProperties["Day"];

            logger.Write("dataSlice: {0}-{1}-{2}", sliceYear, sliceMonth, sliceDay);


            /////////////////
            // Open up input Blob


            var inputDataset = datasets.Single(dataset => dataset.Name == activity.Inputs.Single().Name);
            var inputLinkedService = linkedServices.Single(
                linkedService =>
                linkedService.Name ==
                inputDataset.Properties.LinkedServiceName);

            var inputLocation = new BlobLocation(inputLinkedService, inputDataset, sliceYear, sliceMonth, sliceDay);


            var inputContainer = new CloudBlobContainer(inputLocation.ConnectionSasUri);
            var sourceBlob = inputContainer.GetBlobReferenceFromServer(inputLocation.BlobFullPath);

            ////////////////
            // Get output location

            var outputDataset = datasets.Single(dataset => dataset.Name == activity.Outputs.Single().Name);

            var outputLinkedService = linkedServices.Single(
                linkedService =>
                linkedService.Name ==
                outputDataset.Properties.LinkedServiceName);

            var outputLocation = new BlobLocation(outputLinkedService, outputDataset, sliceYear, sliceMonth, sliceDay);
            
            CloudStorageAccount outputStorageAccount = CloudStorageAccount.Parse(outputLocation.ConnectionString);
            CloudBlobClient outputClient = outputStorageAccount.CreateCloudBlobClient();
            var outContainer = outputClient.GetContainerReference(outputLocation.ContainerName);

            outContainer.CreateIfNotExists();

            //format output path string
            var outputFilenameFormatString = outputLocation.BlobFullPath;


            using (var sourceBlobStream = sourceBlob.OpenRead())
            using (var unZipStream = new System.IO.Compression.GZipStream(sourceBlobStream, System.IO.Compression.CompressionMode.Decompress))
            using (var tarStream = new TarStream(unZipStream))
            {

                logger.Write("BlobRead: {0}/{1}", inputLocation.ContainerName, inputLocation.BlobFullPath);
                while (tarStream.NextFile())
                {
                    var tableName = Path.GetFileNameWithoutExtension(tarStream.CurrentFilename);
                    var taredFileExtention = Path.GetExtension(tarStream.CurrentFilename);
                    
                    if (taredFileExtention == ".bson")
                    {
                        
                        var outputBlob = outContainer.GetBlockBlobReference(outputFilenameFormatString.Replace("{EventName}", tableName));

                        using (var outBlobStream = outputBlob.OpenWrite())
                        using (var gzipOut = new GZipStream(outBlobStream, System.IO.Compression.CompressionLevel.Optimal))
                        using (var outText = new StreamWriter(gzipOut, Encoding.UTF8))
                        using (var reader = new BsonReader(tarStream))
                        {

                            logger.Write("BlobWrite: {0}/{1}", outputLocation.ContainerName, outputBlob.Name);

                            reader.CloseInput = false;

                            var jsonSerializer = new JsonSerializer();


                            reader.ReadRootValueAsArray = false;
                            reader.SupportMultipleContent = true;

                            while (reader.Read())
                            {
                                var row = (JObject)jsonSerializer.Deserialize(reader);

                                var outString = row.ToString(Formatting.None);

                                outText.WriteLine(outString);
                            }
                        }
                    }
                } ;
            }



            // return a new Dictionary object
            return new Dictionary<string, string>();
        }