C# (CSharp) SparkSession.Active Examples

Programming Language: C# (CSharp)

Class/Type: SparkSession

Method/Function: Active

Examples at hotexamples.com: 6

C# (CSharp) SparkSession.Active - 6 examples found. These are the top rated real world C# (CSharp) examples of SparkSession.Active extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Builder(30)

Sql(30)

CreateDataFrame(30)

Read(30)

Stop(23)

ReadStream(23)

Udf(16)

Conf(9)

Range(8)

Active(6)

NewSession(3)

GetActiveSession(3)

GetDefaultSession(2)

SetActiveSession(2)

ClearActiveSession(2)

Streams(2)

Table(2)

Catalog(2)

GetAssemblyInfo(1)

EnableHyperspace(1)

SetDefaultSession(1)

ClearDefaultSession(1)

Version(1)

Example #1

Show file

        public static DataFrame ToDataFrame(this IEnumerable <SimpleCheckResultOutput> simpleChecks)
        {
            List <GenericRow> elements = new List <GenericRow>();

            foreach (SimpleCheckResultOutput check in simpleChecks)
            {
                elements.Add(
                    new GenericRow(new[]
                {
                    check.CheckDescription, check.CheckLevel, check.CheckStatus, check.Constraint,
                    check.ConstraintStatus, check.ConstraintMessage
                }));
            }

            StructType schema = new StructType(
                new List <StructField>
            {
                new StructField("check", new StringType()),
                new StructField("check_level", new StringType()),
                new StructField("check_status", new StringType()),
                new StructField("constraint", new StringType()),
                new StructField("constraint_status", new StringType()),
                new StructField("constraint_message", new StringType())
            });

            return(SparkSession.Active().CreateDataFrame(elements, schema));
        }

Example #2

Show file

File: Program.cs Project: GoEddie/Spark-Dotnet-TF-IDF

        static DataFrame toDF(List <Document> docs)
        {
            var rows = new List <GenericRow>();

            var spark = SparkSession.Active();

            foreach (var row in docs)
            {
                rows.Add(new GenericRow(new object[] { row.Path, row.Content }));
            }

            var schema = new StructType(new List <StructField>()
            {
                new StructField("Path", new StringType()),
                new StructField("Content", new StringType())
            });

            return(spark.CreateDataFrame(rows, schema));
        }

Example #3

Show file

        public void TestSignaturesV2_4_X()
        {
            Assert.IsType <SparkContext>(_spark.SparkContext);

            Assert.IsType <Builder>(SparkSession.Builder());

            SparkSession.ClearActiveSession();
            SparkSession.SetActiveSession(_spark);
            Assert.IsType <SparkSession>(SparkSession.GetActiveSession());

            SparkSession.ClearDefaultSession();
            SparkSession.SetDefaultSession(_spark);
            Assert.IsType <SparkSession>(SparkSession.GetDefaultSession());

            Assert.IsType <RuntimeConfig>(_spark.Conf());

            Assert.IsType <StreamingQueryManager>(_spark.Streams());

            Assert.IsType <SparkSession>(_spark.NewSession());

            Assert.IsType <DataFrameReader>(_spark.Read());

            Assert.IsType <DataFrame>(_spark.Range(10));
            Assert.IsType <DataFrame>(_spark.Range(10, 100));
            Assert.IsType <DataFrame>(_spark.Range(10, 100, 10));
            Assert.IsType <DataFrame>(_spark.Range(10, 100, 10, 5));

            _spark.Range(10).CreateOrReplaceTempView("testView");
            Assert.IsType <DataFrame>(_spark.Table("testView"));

            Assert.IsType <DataStreamReader>(_spark.ReadStream());

            Assert.IsType <UdfRegistration>(_spark.Udf());

            Assert.IsType <Catalog>(_spark.Catalog);

            Assert.NotNull(_spark.Version());

            Assert.IsType <SparkSession>(SparkSession.Active());
        }

Example #4

Show file

 public void TestSignaturesV2_4_X()
 {
     Assert.IsType <SparkSession>(SparkSession.Active());
 }

Example #5

Show file

        public static void AnomalyDetectionExample()
        {
            // Anomaly detection operates on metrics stored in a metric repository, so lets create one
            IMetricsRepository metricsRepository = new InMemoryMetricsRepository();
            // This is the key which we use to store the metrics for the dataset from yesterday
            ResultKey yesterdayKeys =
                new ResultKey(new DateTime().Ticks - 24 * 60 * 1000);

            /* In this simple example, we assume that we compute metrics on a dataset every day and we want
             * to ensure that they don't change drastically. For sake of simplicity, we just look at the
             * size of the data */

            /* Yesterday, the data had only two rows */
            var yesterdaysDataset = LoadData(new List <object[]>
            {
                new object[] { 1, "Thingy A", "awesome thing.", "high", 0 },
                new object[] { 2, "Thingy B", "available at http://thingb.com", null, 0 }
            });

            /* We test for anomalies in the size of the data, it should not increase by more than 2x. Note
             * that we store the resulting metrics in our repository */
            new VerificationSuite()
            .OnData(yesterdaysDataset)
            .UseRepository(metricsRepository)
            .SaveOrAppendResult(yesterdayKeys)
            .AddAnomalyCheck(
                new RelativeRateOfChangeStrategy(2.0),
                Size()
                )
            .Run();


            /* Todays data has five rows, so the data size more than doubled and our anomaly check should
             * catch this */
            var todaysDataset = LoadData(new List <object[]>
            {
                new object[] { 1, "Thingy A", "awesome thing.", "high", 0 },
                new object[] { 2, "Thingy B", "available at http://thingb.com", null, 0 },
                new object[] { 3, null, null, "low", 5 },
                new object[] { 4, "Thingy D", "checkout https://thingd.ca", "low", 10 },
                new object[] { 5, "Thingy W", null, "high", 12 }
            });


            /* The key for today's result */
            var todaysKey = new ResultKey(new DateTime().Ticks - 24 * 60 * 1000);

            /* Repeat the anomaly check for today's data */
            var verificationResult = new VerificationSuite()
                                     .OnData(todaysDataset)
                                     .UseRepository(metricsRepository)
                                     .SaveOrAppendResult(todaysKey)
                                     .AddAnomalyCheck(
                new RelativeRateOfChangeStrategy(maxRateIncrease: 2.0),
                Size()
                )
                                     .Run()
                                     .Debug();



            /* Did we find an anomaly? */
            if (verificationResult.Status != CheckStatus.Success)
            {
                Console.WriteLine("Anomaly detected in the Size() metric!");

                /* Lets have a look at the actual metrics. */
                metricsRepository
                .Load()
                .ForAnalyzers(new[] { Size() })
                .GetSuccessMetricsAsDataFrame(SparkSession.Active())
                .Show();
            }
        }

Example #6

Show file

File: Program.cs Project: duyhai707/SparkMLDocCategorization

        /// <summary>
        /// Runs the Spark job.
        /// </summary>
        private static void RunJob()
        {
            const string wordList                = nameof(wordList);
            const string word                    = nameof(word);
            const string count                   = nameof(count);
            const string docFrequency            = nameof(docFrequency);
            const string total                   = nameof(total);
            const string inverseDocFrequency     = nameof(inverseDocFrequency);
            const string termFreq_inverseDocFreq = nameof(termFreq_inverseDocFreq);

            Console.WriteLine("Starting Spark job to analyze words...");

            var spark = SparkSession.Active();

            filesHelper.NewModelSession();

            // everything
            var docs = spark.Read().HasHeader().Csv(filesHelper.TempDataFile);

            docs.CreateOrReplaceTempView(nameof(docs));

            // all docs in corpus
            var totalDocs = docs.Count();

            // easy reference
            var fileCol = nameof(FileDataParse.File).AsColumn();

            // split words and group by count
            var words = docs

                        // transform words into an array of words
                        .Select(
                fileCol,
                Functions.Split(
                    nameof(FileDataParse.Words).AsColumn(), " ")
                .Alias(wordList))

                        // flatten into one row per word
                        .Select(
                fileCol,
                Functions.Explode(
                    wordList.AsColumn())
                .Alias(word));

            // get frequency of word per document
            var termFrequency = words

                                // group by attributes of file plus word
                                .GroupBy(fileCol, Functions.Lower(word.AsColumn()).Alias(word))

                                // generate count
                                .Count()

                                // order by word count per file descending
                                .OrderBy(fileCol, count.AsColumn().Desc());

            // count by word
            termFrequency.CreateOrReplaceTempView(nameof(termFrequency));

            // now count frequency of word across all documents
            var documentFrequency = words
                                    .GroupBy(Functions.Lower(word.AsColumn()).Alias(word))
                                    .Agg(Functions.CountDistinct(fileCol).Alias(docFrequency));

            documentFrequency.CreateOrReplaceTempView(nameof(documentFrequency));