Example #1
0
 /// <name>Structure</name>
 /// <type>Constructor</type>
 /// <summary>
 /// Creates a new structure object with initialization.
 /// </summary>
 /// <param name="type">Type of the structure</param>
 public Structure(StructType type)
     : base()
 {
     this.type = type;
 }
Example #2
0
        public AbstractType TryDeduce(DSymbol ds, IEnumerable<ISemantic> templateArguments)
        {
            TemplateTypeParameter tp;
            var t = ds as TemplateType;
            if (t == null)
                return null;

            var orig = ds.Definition;
            var tupleStruct = new DClassLike(DTokens.Struct) {
                NameHash = ds.NameHash,
                Parent = orig.Parent,
                Location = orig.Location,
                EndLocation = orig.EndLocation,
                NameLocation = orig.NameLocation
            };

            var ded = new Templates.DeducedTypeDictionary(tupleStruct);

            if (templateArguments != null)
            {
                var typeList = new List<AbstractType>();

                var en = templateArguments.GetEnumerator();
                if(en.MoveNext())
                {
                    var next = en.Current;
                    int i = 0;
                    for (; ; i++)
                    {
                        var fieldType = AbstractType.Get(next);

                        if (fieldType == null)
                            break;

                        fieldType.NonStaticAccess = true;

                        typeList.Add(fieldType);

                        if (!en.MoveNext())
                            break;

                        next = en.Current;

                        if (next is ArrayValue && (next as ArrayValue).IsString)
                        {
                            var name = (next as ArrayValue).StringValue;
                            var templateParamName = "_" + i.ToString();
                            tp = new TemplateTypeParameter(templateParamName, CodeLocation.Empty, tupleStruct);
                            ded[tp] = new TemplateParameterSymbol(tp, fieldType);

                            tupleStruct.Add(new DVariable { Name = name, Type = new IdentifierDeclaration(templateParamName) });

                            if (!en.MoveNext())
                                break;

                            next = en.Current;
                        }
                    }
                }

                var tupleName = "Types";
                tp = new TemplateTypeParameter(tupleName, CodeLocation.Empty, tupleStruct);
                ded[tp] = new TemplateParameterSymbol(tp, new DTuple(null, typeList));

                tupleStruct.Add(new DVariable { NameHash = DVariable.AliasThisIdentifierHash, IsAlias = true, IsAliasThis = true, Type = new IdentifierDeclaration(tupleName) });
            }

            var res = new StructType(tupleStruct, ds.DeclarationOrExpressionBase, ded.Count != 0 ? ded.Values : null);

            resultStore.Add(res, tupleStruct);

            //TODO: Ensure renaming and other AST-based things run properly

            return res;
        }
Example #3
0
        public void Test()
        {
            StructType s2 = new StructType(5);
            s2.Increment();
            AssertEqual(new StackFrame(), 6, s2.Value);

            StructType.StaticValue = 5;
            StructType.StaticIncrement();
            AssertEqual(new StackFrame(), 6, StructType.StaticValue);

            d = new D(DMethod);
            d+=new D(DMethod);
            d-=new D(DMethod);
            d = ReturnDMethod(d);
            s2 = d(d(new StructType[]{s2}))[0];
            AssertEqual(new StackFrame(), 6, StructType.StaticValue);

            uint i = this;
            AssertEqual(new StackFrame(), 1u, i);

            i = this + 2u;
            AssertEqual(new StackFrame(), 3u, i);

            try{ throw new MyException();}
            catch{}

            int[] results = DoubleValues(new int[]{1, 2, 3});
            AssertEqual(new StackFrame(), 4, results[1]);

            results = Sums(new int[,]{{1,2}, {3,4}});
            AssertEqual(new StackFrame(), 7, results[1]);

            int[,] otherResults = DoubleValues(new int[,]{{1,2}, {3,4}});
            AssertEqual(new StackFrame(), 4, otherResults[0, 1]);

            AssertEqual(new StackFrame(), Values.One, Echo (Values.One));

            int first = 3, second = 4;
            Swap(ref first, ref second);
            AssertEqual(new StackFrame(), 3, second);

            BaseType bt = new ChildType();
            AssertEqual(new StackFrame(), 4, bt.Compute(1));

            AssertEqual(new StackFrame(), 4, LockingMethod());

            try{
                LockingThrowingMethod();
                AssertTrue(new StackFrame(), false);
            }
            catch{}
        }
 /// <summary>
 /// Specifies the schema by using <see cref="StructType"/>.
 /// </summary>
 /// <remarks>
 /// Some data sources (e.g. JSON) can infer the input schema automatically
 /// from data. By specifying the schema here, the underlying data source can
 /// skip the schema inference step, and thus speed up data loading.
 /// </remarks>
 /// <param name="schema">The input schema</param>
 /// <returns>This DataFrameReader object</returns>
 public DataFrameReader Schema(StructType schema)
 {
     Reference.Invoke("schema", DataType.FromJson(Reference.Jvm, schema.Json));
     return(this);
 }
Example #5
0
        public void TestCreateDataFrame()
        {
            // Calling CreateDataFrame with schema
            {
                var data = new List <GenericRow>
                {
                    new GenericRow(new object[] { "Alice", 20, new Date(2020, 1, 1) }),
                    new GenericRow(new object[] { "Bob", 30, new Date(2020, 1, 2) })
                };

                var schema = new StructType(new List <StructField>()
                {
                    new StructField("Name", new StringType()),
                    new StructField("Age", new IntegerType()),
                    new StructField("Date", new DateType())
                });
                DataFrame df = _spark.CreateDataFrame(data, schema);
                ValidateDataFrame(df, data.Select(a => a.Values), schema);
            }

            // Calling CreateDataFrame(IEnumerable<string> _) without schema
            {
                var        data   = new string[] { "Alice", "Bob", null };
                StructType schema = SchemaWithSingleColumn(new StringType());

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }

            // Calling CreateDataFrame(IEnumerable<int> _) without schema
            {
                var        data   = new int[] { 1, 2 };
                StructType schema = SchemaWithSingleColumn(new IntegerType(), false);

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }

            // Calling CreateDataFrame(IEnumerable<int?> _) without schema
            {
                var        data   = new int?[] { 1, 2, null };
                StructType schema = SchemaWithSingleColumn(new IntegerType());

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }

            // Calling CreateDataFrame(IEnumerable<double> _) without schema
            {
                var        data   = new double[] { 1.2, 2.3 };
                StructType schema = SchemaWithSingleColumn(new DoubleType(), false);

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }

            // Calling CreateDataFrame(IEnumerable<double?> _) without schema
            {
                var        data   = new double?[] { 1.2, 2.3, null };
                StructType schema = SchemaWithSingleColumn(new DoubleType());

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }

            // Calling CreateDataFrame(IEnumerable<bool> _) without schema
            {
                var        data   = new bool[] { true, false };
                StructType schema = SchemaWithSingleColumn(new BooleanType(), false);

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }

            // Calling CreateDataFrame(IEnumerable<bool?> _) without schema
            {
                var        data   = new bool?[] { true, false, null };
                StructType schema = SchemaWithSingleColumn(new BooleanType());

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }

            // Calling CreateDataFrame(IEnumerable<Date> _) without schema
            {
                var data = new Date[]
                {
                    new Date(2020, 1, 1),
                    new Date(2020, 1, 2),
                    null
                };
                StructType schema = SchemaWithSingleColumn(new DateType());

                DataFrame df = _spark.CreateDataFrame(data);
                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
            }
        }
Example #6
0
 /// <summary>
 /// Write instruction operands into bytecode stream.
 /// </summary>
 /// <param name="writer">Bytecode writer.</param>
 public override void WriteOperands(WordWriter writer)
 {
     StructType.Write(writer);
     Member.Write(writer);
     Decoration.Write(writer);
 }
Example #7
0
 public ISymbolValue VisitStructType(StructType t)
 {
     return(new TypeValue(t));
 }
Example #8
0
        public void Run(string[] args)
        {
            string kafkaBrokers = args[0];
            double maxSpeed     = double.Parse(args[1]);

            // Obtém a referência ao contexto de execução do Spark
            SparkSession spark = SparkSession
                                 .Builder()
                                 .AppName("Credit Card Fraud")
                                 .GetOrCreate();

            spark.Conf().Set("spark.sql.shuffle.partitions", "1");  // sem essa configuração, cada stage ficou com 200 tasks, o que levou uns 4 minutos pra cada batch executar

            // Criando um dataframe pra receber dados do Kafka
            DataFrame df = spark
                           .ReadStream()
                           .Format("kafka")
                           .Option("kafka.bootstrap.servers", kafkaBrokers)
                           .Option("subscribe", "transactions")
                           .Load()
                           .SelectExpr("CAST(value AS STRING)");

            /* Criando schema pra validar o JSON que virá nas mensagens do Kafka
             * Exemplo do JSON:
             * {
             *      "transaction":"431",
             *      "number":"0015-0000-0000-0000",
             *      "lat":-23.1618,
             *      "lng":-46.47201,
             *      "amount":91.01487,
             *      "category":"pets",
             *      "eventTime":"2021-01-05T19:07:19.3888"
             * }
             */
            var schema = new StructType(new[]
            {
                new StructField("transaction", new StringType()),
                new StructField("number", new StringType()),
                new StructField("lat", new DoubleType()),
                new StructField("lng", new DoubleType()),
                new StructField("amount", new DoubleType()),
                new StructField("category", new StringType()),
                new StructField("eventTime", new TimestampType())
            });

            // Fazendo o parse do JSON pra um array ...
            df = df.WithColumn("json", FromJson(
                                   df.Col("value"),
                                   schema.SimpleString)
                               )
                 .Select("json.*"); // ... e retornando todas as colunas do array como um novo dataframe

            // Gerando dois dataframes distintos para poder fazer o join e analisar a correção entre as transações
            DataFrame df1 = df
                            .WithWatermark("eventTime", "7 minutes");
            DataFrame df2 = df
                            .WithColumnRenamed("transaction", "transaction2")
                            .WithColumnRenamed("lat", "lat2")
                            .WithColumnRenamed("lng", "lng2")
                            .WithColumnRenamed("eventTime", "eventTime2")
                            .WithWatermark("eventTime2", "7 minutes");

            // Efetuando o join para verificar a correlação de transações dos cartões de crédito
            DataFrame dfJoin = df1.Join(df2,
                                        df1.Col("number").EqualTo(df2.Col("number"))
                                        .And(Col("transaction").NotEqual(Col("transaction2")))
                                        .And(Col("eventTime2").Between(Col("eventTime"), Col("eventTime") + Expr("interval 5 minutes")))
                                        );

            //Registrando uma função personalizada pra ser usada no dataframe
            spark.Udf().Register <double, double, double, double, double>("CalculateDistance", (lat1, lng1, lat2, lng2) => CalculateDistance(lat1, lng1, lat2, lng2));
            spark.Udf().Register <double, Timestamp, Timestamp, double>("CalculateSpeed", (dist, eventTime, eventTime2) => CalculateSpeed(dist, eventTime, eventTime2));

            // Criando novas colunas para armazenar a execução do código da UDF
            dfJoin = dfJoin.WithColumn("dist", CallUDF("CalculateDistance", Col("lat"), Col("lng"), Col("lat2"), Col("lng2")));
            dfJoin = dfJoin.WithColumn("speed", CallUDF("CalculateSpeed", Col("dist"), Col("eventTime"), Col("eventTime2")));

            // Filtrando as transações que tiverem a velocidade acima do esperado (parâmetro "maxSpeed")
            dfJoin = dfJoin.Where(Col("speed").Gt(maxSpeed));

            // Colocando o streaming pra funcionar

            StreamingQuery query = dfJoin
                                   .WriteStream()
                                   .Format("console")
                                   .Option("truncate", "false")
                                   .OutputMode(OutputMode.Append)
                                   .Start();

            query.AwaitTermination();
        }
 private CheckApplicability IsCheckApplicableToData(Check check, StructType schema, SparkSession sparkSession) =>
 new Applicability(sparkSession).IsApplicable(check, schema);
 private AnalyzersApplicability AreCheckApplicableToData(IEnumerable <IAnalyzer <IMetric> > analyzers,
                                                         StructType schema, SparkSession sparkSession) =>
 new Applicability(sparkSession).IsApplicable(analyzers, schema);
Example #11
0
        public void TestComplex()
        {
            /*
             * def kju() : Unit {
             *  struct P {
             *      x : [Int];
             *  };
             *
             *  struct S {
             *    x : Int;
             *    y : P;
             *  };
             *
             *  var s : [S] = new(S, 10);
             *  var arr : [[Bool]] = new([Bool], 20);
             * }
             */

            var dummyRange = new Core.Lexer.Range(new StringLocation(0), new StringLocation(0));

            var pFields = new List <StructField>()
            {
                new StructField(dummyRange, "x", new ArrayType(IntType.Instance))
            };

            var pDeclaration = new StructDeclaration(dummyRange, "P", pFields);
            var pType        = StructType.GetInstance(pDeclaration);

            var sFields = new List <StructField>()
            {
                new StructField(dummyRange, "x", IntType.Instance),
                new StructField(dummyRange, "y", pType)
            };

            var sDeclaration = new StructDeclaration(dummyRange, "S", sFields);
            var sType        = StructType.GetInstance(sDeclaration);

            var sAlloc = new ArrayAlloc(
                dummyRange, sType, new IntegerLiteral(dummyRange, 10));
            var sVarDeclaration = new VariableDeclaration(
                dummyRange,
                new ArrayType(sType),
                "s",
                sAlloc);

            var arrAlloc = new ArrayAlloc(
                dummyRange, new ArrayType(BoolType.Instance), new IntegerLiteral(dummyRange, 20));
            var arrVarDeclaration = new VariableDeclaration(
                dummyRange,
                new ArrayType(new ArrayType(BoolType.Instance)),
                "arr",
                arrAlloc);

            var kjuInstructions = new List <Expression> {
                pDeclaration,
                sDeclaration,
                sVarDeclaration,
                arrVarDeclaration
            };

            var kjuDeclaration = new FunctionDeclaration(
                dummyRange,
                "kju",
                new ArrayType(UnitType.Instance),
                new List <VariableDeclaration>(),
                new InstructionBlock(dummyRange, kjuInstructions),
                false);

            var root = new Program(dummyRange, new List <StructDeclaration>(), new List <FunctionDeclaration> {
                kjuDeclaration
            });

            var expectedTypes = new HashSet <DataType>()
            {
                pType,
                sType,
                new ArrayType(BoolType.Instance),
                new ArrayType(IntType.Instance),
                new ArrayType(sType),
                new ArrayType(new ArrayType(BoolType.Instance))
            };

            this.CheckAnswer(root, expectedTypes);
        }
Example #12
0
 public Context Enter(StructType type)
 {
     return(new Context(Level + 1, type));
 }
Example #13
0
 public Context(int level, StructType current)
 {
     Level   = level;
     Current = current;
 }
Example #14
0
 public StructDecoder(StructType structType)
 {
     _names    = structType.Fields.Select(x => x.Name).ToArray();
     _decoders = SchemaToDecoder.FieldsToDecoders(structType.Fields);
 }
Example #15
0
 /// <name>Structure</name>
 /// <type>Constructor</type>
 /// <summary>
 /// Creates a new structure object with initialization.
 /// </summary>
 /// <param name="type">Type of the structure</param>
 public Structure(StructType type) : base()
 {
     this.type = type;
 }
Example #16
0
        public void CreateClass(AstClass astClass, Namescope parent, IEnumerable <AstBlockMember> parentItems)
        {
            var sources = new List <string>();

            if (astClass.Modifiers.HasFlag(Modifiers.Partial))
            {
                astClass = FlattenClass(astClass, sources, parentItems);
            }

            DataType result;

            if (_cachedClasses.TryGetValue(astClass, out result))
            {
                return;
            }

            var src       = astClass.Name.Source;
            var modifiers = GetTypeModifiers(parent, astClass.Modifiers);

            switch (astClass.Type)
            {
            case AstClassType.Struct:
                result = new StructType(src, parent, astClass.DocComment, modifiers, astClass.Name.Symbol);
                break;

            case AstClassType.Class:
                result = new ClassType(src, parent, astClass.DocComment, modifiers, astClass.Name.Symbol);
                break;

            case AstClassType.Interface:
                if (modifiers.HasFlag(Modifiers.Abstract))
                {
                    Log.Error(src, ErrorCode.E0000, "'abstract' is not valid for interface");
                }

                result = new InterfaceType(src, parent, astClass.DocComment, modifiers | Modifiers.Abstract, astClass.Name.Symbol);
                break;

            default:
                Log.Error(src, ErrorCode.I3045, "<" + astClass.Type + "> is not a class, struct or interface type");
                return;
            }

            if (parent is DataType)
            {
                (parent as DataType).NestedTypes.Add(result);
            }
            else if (parent is Namespace)
            {
                (parent as Namespace).Types.Add(result);
            }
            else
            {
                Log.Error(result.Source, ErrorCode.I3046, "<" + astClass.Type + "> is not allowed in this context");
            }

            _cachedClasses.Add(astClass, result);

            if (astClass.OptionalGeneric != null)
            {
                CreateGenericSignature(result, astClass.OptionalGeneric, false);
            }

            result.SetBlock(new Block(src, result, null, result.Modifiers & Modifiers.ProtectionModifiers, ".block"));

            foreach (var s in sources)
            {
                result.SourceFiles.Add(s);
            }

            foreach (var b in astClass.Members)
            {
                if (b is AstBlockBase)
                {
                    _compiler.AstProcessor.CreateBlock(b as AstBlockBase, result, astClass.Members);
                }
            }

            if (astClass.Attributes.Count > 0)
            {
                EnqueueAttributes(result, x =>
                {
                    result.SetAttributes(_compiler.CompileAttributes(result.Parent, astClass.Attributes));

                    // Remove default constructor if TargetSpecificType
                    if (result.HasAttribute(_ilf.Essentials.TargetSpecificTypeAttribute) &&
                        result.Constructors.Count == 1 && result.Constructors[0].IsGenerated)
                    {
                        result.Constructors.Clear();
                    }
                });
            }

            EnqueueType(result,
                        x => CompileBaseTypes(x, astClass.Bases),
                        x => PopulateClass(astClass, x));
        }
Example #17
0
 /// <summary>
 /// Check transform validity and derive the output schema from the input schema.
 ///
 /// This checks for validity of interactions between parameters during Transform and
 /// raises an exception if any parameter value is invalid.
 ///
 /// Typical implementation should first conduct verification on schema change and parameter
 /// validity, including complex parameter interaction checks.
 /// </summary>
 /// <param name="value">
 /// The <see cref="StructType"/> of the <see cref="DataFrame"/> which will be transformed.
 /// </param>
 /// <returns>
 /// The <see cref="StructType"/> of the output schema that would have been derived from the
 /// input schema, if Transform had been called.
 /// </returns>
 public override StructType TransformSchema(StructType value) =>
 new StructType(
     (JvmObjectReference)Reference.Invoke(
         "transformSchema",
         DataType.FromJson(Reference.Jvm, value.Json)));
Example #18
0
 /// <summary>
 /// Specifies the schema by using <see cref="StructType"/>.
 /// </summary>
 /// <remarks>
 /// Some data sources (e.g. JSON) can infer the input schema automatically
 /// from data. By specifying the schema here, the underlying data source can
 /// skip the schema inference step, and thus speed up data loading.
 /// </remarks>
 /// <param name="schema">The input schema</param>
 /// <returns>This DataFrameReader object</returns>
 public DataFrameReader Schema(StructType schema)
 {
     _jvmObject.Invoke("schema", DataType.FromJson(_jvmObject.Jvm, schema.Json));
     return(this);
 }
Example #19
0
 /// <summary>
 /// Create an external table from the given path based on a data source, a schema and
 /// a set of options.Then, returns the corresponding DataFrame.
 /// </summary>
 /// <param name="tableName">Name of the table</param>
 /// <param name="source">Data source</param>
 /// <param name="schema">Schema of the table</param>
 /// <param name="options">Options to create table</param>
 /// <returns></returns>
 public DataFrame CreateExternalTable(string tableName, string source, StructType schema, Dictionary<string, string> options)
 {
     return catalogProxy.CreateExternalTable(tableName, source, schema, options);
 }
Example #20
0
        public void Initialize(bool canBeEdit, AgentType agent, StructType structType, PropertyDef prop, bool canBePar)
        {
            Debug.Check(agent != null || structType != null);

            _initialized = false;

            _isModified = false;
            _shouldCheckMembersInWorkspace = false;
            _isNew            = (prop == null);
            _agent            = agent;
            _structType       = structType;
            _originalProperty = prop;

            setTypes();

            if (_isNew)
            {
                this.Text = canBeEdit ? Resources.AddProperty : Resources.ViewProperty;

                if (_structType == null)
                {
                    if (agent != null)
                    {
                        _property = new PropertyDef(agent, null, agent.Name, "", "", "");
                    }
                }
                else
                {
                    _property = new PropertyDef(null, null, _structType.Name, "", "", "");
                }

                _property.IsPublic = false;

                resetProperty(_property, _property.IsPar);
            }
            else
            {
                this.Text = canBeEdit ? Resources.EditProperty : Resources.ViewProperty;

                resetProperty(prop, prop.IsPar);
            }

            //this.customizedCheckBox.Visible = canBeEdit && !_property.IsInherited && agent != null;
            this.customizedCheckBox.Visible = false;
            this.isLocalCheckBox.Checked    = _structType == null && _property.IsPar;
            this.isLocalCheckBox.Visible    = canBePar && _structType == null && !_property.IsMember;
            this.isLocalCheckBox.Enabled    = canBeEdit;
            this.nameTextBox.Enabled        = canBeEdit;
            this.arrayCheckBox.Enabled      = canBeEdit || (_structType == null || _structType.IsCustomized) && _property.IsChangeableType;
            this.typeComboBox.Enabled       = canBeEdit || (_structType == null || _structType.IsCustomized) && _property.IsChangeableType;
            this.isStaticCheckBox.Enabled   = canBeEdit;
            this.isPublicCheckBox.Enabled   = canBeEdit;
            this.isConstCheckBox.Enabled    = canBeEdit;
            this.dispTextBox.Enabled        = canBeEdit;
            this.descTextBox.Enabled        = canBeEdit;

            this.nameTextBox.Focus();

            if (this.nameTextBox.TextLength > 0)
            {
                this.nameTextBox.SelectionStart = this.nameTextBox.TextLength;
            }
            else
            {
                this.nameTextBox.Select();
            }

            _initialized = true;
        }
Example #21
0
 /// <summary>
 /// Creates a <see cref="DataFrame"/> from an <see cref="IEnumerable"/> containing
 /// <see cref="GenericRow"/>s using the given schema.
 /// It is important to make sure that the structure of every <see cref="GenericRow"/> of
 /// the provided <see cref="IEnumerable"/> matches
 /// the provided schema. Otherwise, there will be runtime exception.
 /// </summary>
 /// <param name="data">List of Row objects</param>
 /// <param name="schema">Schema as StructType</param>
 /// <returns>DataFrame object</returns>
 public DataFrame CreateDataFrame(IEnumerable <GenericRow> data, StructType schema) =>
 new DataFrame((JvmObjectReference)_jvmObject.Invoke(
                   "createDataFrame",
                   data,
                   DataType.FromJson(_jvmObject.Jvm, schema.Json)));
Example #22
0
        public void TestSignaturesV2_3_X()
        {
            Assert.IsType <Column>(_df["name"]);
            Assert.IsType <Column>(_df["age"]);

            Assert.IsType <DataFrame>(_df.ToDF());
            Assert.IsType <DataFrame>(_df.ToDF("name2", "age2"));

            StructType schema = _df.Schema();

            Assert.NotNull(schema);

            _df.PrintSchema();

            _df.Explain();
            _df.Explain(true);
            _df.Explain(false);

            Assert.Equal(2, _df.Columns().ToArray().Length);

            var expected = new List <Tuple <string, string> >
            {
                new Tuple <string, string>("age", "integer"),
                new Tuple <string, string>("name", "string")
            };

            Assert.Equal(expected, _df.DTypes());

            Assert.IsType <bool>(_df.IsLocal());

            Assert.IsType <bool>(_df.IsStreaming());

            using (var tempDir = new TemporaryDirectory())
            {
                // The following is required for *CheckPoint().
                _spark.SparkContext.SetCheckpointDir(tempDir.Path);

                Assert.IsType <DataFrame>(_df.Checkpoint());
                Assert.IsType <DataFrame>(_df.Checkpoint(false));

                Assert.IsType <DataFrame>(_df.LocalCheckpoint());
                Assert.IsType <DataFrame>(_df.LocalCheckpoint(false));
            }

            Assert.IsType <DataFrame>(_df.WithWatermark("time", "10 minutes"));

            _df.Show();
            _df.Show(10);
            _df.Show(10, 10);
            _df.Show(10, 10, true);

            Assert.IsType <DataFrame>(_df.Join(_df));
            Assert.IsType <DataFrame>(_df.Join(_df, "name"));
            Assert.IsType <DataFrame>(_df.Join(_df, new[] { "name" }));
            Assert.IsType <DataFrame>(_df.Join(_df, new[] { "name" }, "outer"));
            Assert.IsType <DataFrame>(_df.Join(_df, _df["age"] == _df["age"]));
            Assert.IsType <DataFrame>(_df.Join(_df, _df["age"] == _df["age"], "outer"));

            Assert.IsType <DataFrame>(_df.CrossJoin(_df));

            Assert.IsType <DataFrame>(_df.SortWithinPartitions("age"));
            Assert.IsType <DataFrame>(_df.SortWithinPartitions("age", "name"));
            Assert.IsType <DataFrame>(_df.SortWithinPartitions());
            Assert.IsType <DataFrame>(_df.SortWithinPartitions(_df["age"]));
            Assert.IsType <DataFrame>(_df.SortWithinPartitions(_df["age"], _df["name"]));

            Assert.IsType <DataFrame>(_df.Sort("age"));
            Assert.IsType <DataFrame>(_df.Sort("age", "name"));
            Assert.IsType <DataFrame>(_df.Sort());
            Assert.IsType <DataFrame>(_df.Sort(_df["age"]));
            Assert.IsType <DataFrame>(_df.Sort(_df["age"], _df["name"]));

            Assert.IsType <DataFrame>(_df.OrderBy("age"));
            Assert.IsType <DataFrame>(_df.OrderBy("age", "name"));
            Assert.IsType <DataFrame>(_df.OrderBy());
            Assert.IsType <DataFrame>(_df.OrderBy(_df["age"]));
            Assert.IsType <DataFrame>(_df.OrderBy(_df["age"], _df["name"]));

            Assert.IsType <DataFrame>(_df.Hint("broadcast"));
            Assert.IsType <DataFrame>(_df.Hint("broadcast", new[] { "hello", "world" }));

            Assert.IsType <Column>(_df.Col("age"));

            Assert.IsType <Column>(_df.ColRegex("age"));

            Assert.IsType <DataFrame>(_df.As("alias"));

            Assert.IsType <DataFrame>(_df.Alias("alias"));

            Assert.IsType <DataFrame>(_df.Select("age"));
            Assert.IsType <DataFrame>(_df.Select("age", "name"));
            Assert.IsType <DataFrame>(_df.Select());
            Assert.IsType <DataFrame>(_df.Select(_df["age"]));
            Assert.IsType <DataFrame>(_df.Select(_df["age"], _df["name"]));

            Assert.IsType <DataFrame>(_df.SelectExpr());
            Assert.IsType <DataFrame>(_df.SelectExpr("age * 2"));
            Assert.IsType <DataFrame>(_df.SelectExpr("age * 2", "abs(age)"));

            Assert.IsType <DataFrame>(_df.Filter(_df["age"] > 21));
            Assert.IsType <DataFrame>(_df.Filter("age > 21"));

            Assert.IsType <DataFrame>(_df.Where(_df["age"] > 21));
            Assert.IsType <DataFrame>(_df.Where("age > 21"));

            Assert.IsType <RelationalGroupedDataset>(_df.GroupBy("age"));
            Assert.IsType <RelationalGroupedDataset>(_df.GroupBy("age", "name"));
            Assert.IsType <RelationalGroupedDataset>(_df.GroupBy());
            Assert.IsType <RelationalGroupedDataset>(_df.GroupBy(_df["age"]));
            Assert.IsType <RelationalGroupedDataset>(_df.GroupBy(_df["age"], _df["name"]));

            {
                RelationalGroupedDataset df =
                    _df.WithColumn("tempAge", _df["age"]).GroupBy("name");

                Assert.IsType <DataFrame>(df.Mean("age"));
                Assert.IsType <DataFrame>(df.Mean("age", "tempAge"));

                Assert.IsType <DataFrame>(df.Max("age"));
                Assert.IsType <DataFrame>(df.Max("age", "tempAge"));

                Assert.IsType <DataFrame>(df.Avg("age"));
                Assert.IsType <DataFrame>(df.Avg("age", "tempAge"));

                Assert.IsType <DataFrame>(df.Min("age"));
                Assert.IsType <DataFrame>(df.Min("age", "tempAge"));

                Assert.IsType <DataFrame>(df.Sum("age"));
                Assert.IsType <DataFrame>(df.Sum("age", "tempAge"));
            }

            Assert.IsType <RelationalGroupedDataset>(_df.Rollup("age"));
            Assert.IsType <RelationalGroupedDataset>(_df.Rollup("age", "name"));
            Assert.IsType <RelationalGroupedDataset>(_df.Rollup());
            Assert.IsType <RelationalGroupedDataset>(_df.Rollup(_df["age"]));
            Assert.IsType <RelationalGroupedDataset>(_df.Rollup(_df["age"], _df["name"]));

            Assert.IsType <RelationalGroupedDataset>(_df.Cube("age"));
            Assert.IsType <RelationalGroupedDataset>(_df.Cube("age", "name"));
            Assert.IsType <RelationalGroupedDataset>(_df.Cube());
            Assert.IsType <RelationalGroupedDataset>(_df.Cube(_df["age"]));
            Assert.IsType <RelationalGroupedDataset>(_df.Cube(_df["age"], _df["name"]));

            Assert.IsType <DataFrame>(_df.Agg(Avg(_df["age"])));
            Assert.IsType <DataFrame>(_df.Agg(Avg(_df["age"]), Avg(_df["name"])));

            Assert.IsType <DataFrame>(_df.Limit(10));

            Assert.IsType <DataFrame>(_df.Union(_df));

            Assert.IsType <DataFrame>(_df.UnionByName(_df));

            Assert.IsType <DataFrame>(_df.Intersect(_df));

            Assert.IsType <DataFrame>(_df.Except(_df));

            Assert.IsType <DataFrame>(_df.Sample(0.5));
            Assert.IsType <DataFrame>(_df.Sample(0.5, true));
            Assert.IsType <DataFrame>(_df.Sample(0.5, false, 12345));

            Assert.IsType <DataFrame[]>(_df.RandomSplit(new[] { 0.2, 0.8 }));
            Assert.IsType <DataFrame[]>(_df.RandomSplit(new[] { 0.2, 0.8 }, 12345));

            Assert.IsType <DataFrame>(_df.WithColumn("age2", _df["age"]));

            Assert.IsType <DataFrame>(_df.WithColumnRenamed("age", "age2"));

            Assert.IsType <DataFrame>(_df.Drop());
            Assert.IsType <DataFrame>(_df.Drop("age"));
            Assert.IsType <DataFrame>(_df.Drop("age", "name"));

            Assert.IsType <DataFrame>(_df.Drop(_df["age"]));

            Assert.IsType <DataFrame>(_df.DropDuplicates());
            Assert.IsType <DataFrame>(_df.DropDuplicates("age"));
            Assert.IsType <DataFrame>(_df.DropDuplicates("age", "name"));

            Assert.IsType <DataFrame>(_df.Describe());
            Assert.IsType <DataFrame>(_df.Describe("age"));
            Assert.IsType <DataFrame>(_df.Describe("age", "name"));

            Assert.IsType <DataFrame>(_df.Summary());
            Assert.IsType <DataFrame>(_df.Summary("count"));
            Assert.IsType <DataFrame>(_df.Summary("count", "mean"));

            Assert.IsType <Row[]>(_df.Head(2).ToArray());
            Assert.IsType <Row>(_df.Head());

            Assert.IsType <Row>(_df.First());

            Assert.IsType <Row[]>(_df.Take(3).ToArray());

            Assert.IsType <Row[]>(_df.Collect().ToArray());

            Assert.IsType <Row[]>(_df.ToLocalIterator().ToArray());

            Assert.IsType <long>(_df.Count());

            Assert.IsType <DataFrame>(_df.Repartition(2));
            Assert.IsType <DataFrame>(_df.Repartition(2, _df["age"]));
            Assert.IsType <DataFrame>(_df.Repartition(_df["age"]));
            Assert.IsType <DataFrame>(_df.Repartition());

            Assert.IsType <DataFrame>(_df.RepartitionByRange(2, _df["age"]));
            Assert.IsType <DataFrame>(_df.RepartitionByRange(_df["age"]));

            Assert.IsType <DataFrame>(_df.Coalesce(1));

            Assert.IsType <DataFrame>(_df.Distinct());

            Assert.IsType <DataFrame>(_df.Persist());

            Assert.IsType <DataFrame>(_df.Persist(StorageLevel.DISK_ONLY));

            Assert.IsType <DataFrame>(_df.Cache());

            Assert.IsType <StorageLevel>(_df.StorageLevel());

            Assert.IsType <DataFrame>(_df.Unpersist());

            _df.CreateTempView("view");
            _df.CreateOrReplaceTempView("view");

            _df.CreateGlobalTempView("global_view");
            _df.CreateOrReplaceGlobalTempView("global_view");
        }
Example #23
0
 public IDataFrameProxy ReadDataFrame(string path, StructType schema, System.Collections.Generic.Dictionary <string, string> options)
 {
     throw new NotImplementedException();
 }
Example #24
0
 public StructType[] DMethod(StructType[] s)
 {
     return s;
 }
Example #25
0
 public IDataFrameProxy TextFile(string path, StructType schema, string delimiter)
 {
     return(new MockDataFrameProxy(new object[] { path, schema, delimiter }, this));
 }
Example #26
0
 public LanguageStructModel(StructType intIDType, LanguageStructModel objParent, CompilationUnitModel objCompilationUnit = null)
 {
     IDType          = intIDType;
     Parent          = objParent;
     CompilationUnit = objCompilationUnit;
 }
Example #27
0
        public void TestSignaturesV2_3_X()
        {
            Column col = _df["name"];

            col = _df["age"];

            DataFrame df = _df.ToDF();

            df = df.ToDF("name2", "age2");

            StructType schema = _df.Schema();

            Assert.NotNull(schema);

            _df.PrintSchema();

            _df.Explain();
            _df.Explain(true);
            _df.Explain(false);

            Assert.Equal(2, _df.Columns().ToArray().Length);

            _df.IsLocal();

            _df.IsStreaming();

            using (var tempDir = new TemporaryDirectory())
            {
                // The following is required for *CheckPoint().
                _spark.SparkContext.SetCheckpointDir(tempDir.Path);

                _df.Checkpoint();
                _df.Checkpoint(false);

                _df.LocalCheckpoint();
                _df.LocalCheckpoint(false);
            }

            _df.WithWatermark("time", "10 minutes");

            _df.Show();
            _df.Show(10);
            _df.Show(10, 10);
            _df.Show(10, 10, true);

            _df.Join(_df);
            _df.Join(_df, "name");
            _df.Join(_df, new[] { "name" });
            _df.Join(_df, new[] { "name" }, "outer");
            _df.Join(_df, _df["age"] == _df["age"]);
            _df.Join(_df, _df["age"] == _df["age"], "outer");

            _df.CrossJoin(_df);

            _df.SortWithinPartitions("age");
            _df.SortWithinPartitions("age", "name");
            _df.SortWithinPartitions();
            _df.SortWithinPartitions(_df["age"]);
            _df.SortWithinPartitions(_df["age"], _df["name"]);

            _df.Sort("age");
            _df.Sort("age", "name");
            _df.Sort();
            _df.Sort(_df["age"]);
            _df.Sort(_df["age"], _df["name"]);

            _df.OrderBy("age");
            _df.OrderBy("age", "name");
            _df.OrderBy();
            _df.OrderBy(_df["age"]);
            _df.OrderBy(_df["age"], _df["name"]);

            _df.Hint("broadcast");
            _df.Hint("broadcast", new[] { "hello", "world" });

            _df.Col("age");

            _df.ColRegex("age");

            _df.As("alias");

            _df.Alias("alias");

            _df.Select("age");
            _df.Select("age", "name");
            _df.Select();
            _df.Select(_df["age"]);
            _df.Select(_df["age"], _df["name"]);

            _df.SelectExpr();
            _df.SelectExpr("age * 2");
            _df.SelectExpr("age * 2", "abs(age)");

            _df.Filter(_df["age"] > 21);
            _df.Filter("age > 21");

            _df.Where(_df["age"] > 21);
            _df.Where("age > 21");

            _df.GroupBy("age");
            _df.GroupBy("age", "name");
            _df.GroupBy();
            _df.GroupBy(_df["age"]);
            _df.GroupBy(_df["age"], _df["name"]);

            _df.Rollup("age");
            _df.Rollup("age", "name");
            _df.Rollup();
            _df.Rollup(_df["age"]);
            _df.Rollup(_df["age"], _df["name"]);

            _df.Cube("age");
            _df.Cube("age", "name");
            _df.Cube();
            _df.Cube(_df["age"]);
            _df.Cube(_df["age"], _df["name"]);

            _df.Agg(Avg(_df["age"]));
            _df.Agg(Avg(_df["age"]), Avg(_df["name"]));

            _df.Limit(10);

            _df.Union(_df);

            _df.UnionByName(_df);

            _df.Intersect(_df);

            _df.Except(_df);

            _df.Sample(0.5);
            _df.Sample(0.5, true);
            _df.Sample(0.5, false, 12345);

            _df.RandomSplit(new[] { 0.2, 0.8 });
            _df.RandomSplit(new[] { 0.2, 0.8 }, 12345);

            _df.WithColumn("age2", _df["age"]);

            _df.WithColumnRenamed("age", "age2");

            _df.Drop();
            _df.Drop("age");
            _df.Drop("age", "name");

            _df.Drop(_df["age"]);

            _df.DropDuplicates();
            _df.DropDuplicates("age");
            _df.DropDuplicates("age", "name");

            _df.Describe();
            _df.Describe("age");
            _df.Describe("age", "name");

            _df.Summary();
            _df.Summary("count");
            _df.Summary("count", "mean");

            _df.Head(2);
            _df.Head();

            _df.First();

            _df.Take(3).ToArray();

            _df.Collect().ToArray();

            _df.ToLocalIterator().ToArray();

            _df.Count();

            _df.Repartition(2);
            _df.Repartition(2, _df["age"]);
            _df.Repartition(_df["age"]);
            _df.Repartition();

            _df.RepartitionByRange(2, _df["age"]);
            _df.RepartitionByRange(_df["age"]);

            _df.Coalesce(1);

            _df.Distinct();

            _df.Persist();

            _df.Cache();

            _df.Unpersist();

            _df.CreateTempView("view");
            _df.CreateOrReplaceTempView("view");

            _df.CreateGlobalTempView("global_view");
            _df.CreateOrReplaceGlobalTempView("global_view");
        }
        /// <summary>
        /// The variable's or method's base type will be resolved (if auto type, the intializer's type will be taken).
        /// A class' base class will be searched.
        /// etc..
        /// </summary>
        public static AbstractType HandleNodeMatch(
			INode m,
			ResolverContextStack ctxt,
			AbstractType resultBase = null,
			object typeBase = null)
        {
            stackNum_HandleNodeMatch++;

            bool popAfterwards = m.Parent != ctxt.ScopedBlock && m.Parent is IBlockNode;
            if (popAfterwards)
                ctxt.PushNewScope((IBlockNode)m.Parent);

            //HACK: Really dirty stack overflow prevention via manually counting call depth
            var canResolveBaseGenerally = stackNum_HandleNodeMatch < 6;

            var DoResolveBaseType = canResolveBaseGenerally &&
                !ctxt.Options.HasFlag(ResolutionOptions.DontResolveBaseClasses) &&
                (m.Type == null || m.Type.ToString(false) != m.Name);

            AbstractType ret = null;

            // To support resolving type parameters to concrete types if the context allows this, introduce all deduced parameters to the current context
            if (canResolveBaseGenerally && resultBase is DSymbol)
                ctxt.CurrentContext.IntroduceTemplateParameterTypes((DSymbol)resultBase);

            // Only import symbol aliases are allowed to search in the parse cache
            if (m is ImportSymbolAlias)
            {
                var isa = (ImportSymbolAlias)m;

                if (isa.IsModuleAlias ? isa.Type != null : isa.Type.InnerDeclaration != null)
                {
                    var mods = new List<DModule>();
                    var td=isa.IsModuleAlias ? isa.Type : isa.Type.InnerDeclaration;
                    foreach (var mod in ctxt.ParseCache.LookupModuleName(td.ToString()))
                        mods.Add(mod as DModule);

                    if(mods.Count == 0)
                            ctxt.LogError(new NothingFoundError(isa.Type));
                    else if(mods.Count > 1)
                    {
                        var m__=new List<ISemantic>();

                        foreach(var mod in mods)
             							m__.Add(new ModuleSymbol(mod, isa.Type));

                        ctxt.LogError(new AmbiguityError(isa.Type,m__));
                    }

                    var bt=mods.Count != 0 ? (AbstractType)new ModuleSymbol(mods[0], td) : null;

                    //TODO: Is this correct behaviour?
                    if (!isa.IsModuleAlias){
                        var furtherId = ResolveFurtherTypeIdentifier(isa.Type.ToString(false), new[]{ bt }, ctxt, isa.Type);

                        ctxt.CheckForSingleResult(furtherId, isa.Type);

                        if (furtherId != null && furtherId.Length != 0)
                            bt = furtherId[0];
                        else
                            bt = null;
                    }

                    ret = new AliasedType(isa, bt, isa.Type);
                }
            }
            else if (m is DVariable)
            {
                var v = (DVariable)m;
                AbstractType bt = null;

                if (DoResolveBaseType)
                {
                    var bts = TypeDeclarationResolver.Resolve(v.Type, ctxt);

                    if (bts != null && bts.Length != 0 && ctxt.CheckForSingleResult(bts, v.Type))
                        bt = bts[0];

                    // For auto variables, use the initializer to get its type
                    else if (v.Initializer != null)
                        bt = ExpressionSemantics.Evaluation.EvaluateType(v.Initializer, ctxt);

                    // Check if inside an foreach statement header
                    if (bt == null && ctxt.ScopedStatement != null)
                        bt = GetForeachIteratorType(v, ctxt);
                }

                // Note: Also works for aliases! In this case, we simply try to resolve the aliased type, otherwise the variable's base type
                ret=v.IsAlias ?
                    (DSymbol)new AliasedType(v, bt, typeBase as ISyntaxRegion) :
                    new MemberSymbol(v, bt, typeBase as ISyntaxRegion);
            }
            else if (m is DMethod)
            {
                ret = new MemberSymbol((DNode)m,
                    DoResolveBaseType ? GetMethodReturnType((DMethod)m, ctxt) : null
                    , typeBase as ISyntaxRegion);
            }
            else if (m is DClassLike)
            {
                UserDefinedType udt = null;
                var dc=(DClassLike)m;

                switch (dc.ClassType)
                {
                    case DTokens.Struct:
                        udt = new StructType(dc, typeBase as ISyntaxRegion);
                        break;
                    case DTokens.Union:
                        udt = new UnionType(dc, typeBase as ISyntaxRegion);
                        break;
                    case DTokens.Class:
                        udt = new ClassType(dc, typeBase as ISyntaxRegion, null);
                        break;
                    case DTokens.Template:
                        udt = new TemplateType(dc, typeBase as ISyntaxRegion);
                        break;
                    case DTokens.Interface:
                        udt = new InterfaceType(dc, typeBase as ISyntaxRegion);
                        break;
                    default:
                        ctxt.LogError(new ResolutionError(m, "Unknown type ("+DTokens.GetTokenString(dc.ClassType)+")"));
                        break;
                }

                if (canResolveBaseGenerally && !ctxt.Options.HasFlag(ResolutionOptions.DontResolveBaseClasses))
                    ret = DResolver.ResolveBaseClasses(udt, ctxt);
                else
                    ret = udt;
            }
            else if (m is IAbstractSyntaxTree)
            {
                var mod = (IAbstractSyntaxTree)m;
                if (typeBase != null && typeBase.ToString() != mod.ModuleName)
                {
                    var pack = ctxt.ParseCache.LookupPackage(typeBase.ToString()).First();
                    if (pack != null)
                        ret = new PackageSymbol(pack, typeBase as ISyntaxRegion);
                }
                else
                    ret = new ModuleSymbol(m as DModule, typeBase as ISyntaxRegion);
            }
            else if (m is DEnum)
                ret = new EnumType((DEnum)m, typeBase as ISyntaxRegion);
            else if (m is TemplateParameterNode)
            {
                var tmp = ((TemplateParameterNode)m).TemplateParameter;

                //ResolveResult[] templateParameterType = null;

                //TODO: Resolve the specialization type
                //var templateParameterType = TemplateInstanceHandler.ResolveTypeSpecialization(tmp, ctxt);

                ret = new MemberSymbol((DNode)m, null, typeBase as ISyntaxRegion);
            }

            if (canResolveBaseGenerally && resultBase is DSymbol)
                ctxt.CurrentContext.RemoveParamTypesFromPreferredLocals((DSymbol)resultBase);

            if (popAfterwards)
                ctxt.Pop();

            stackNum_HandleNodeMatch--;
            return ret;
        }
Example #29
0
 public ISymbolValue VisitStructType(StructType t)
 {
     throw new NotImplementedException();
 }
		static AbstractType HandleClassLikeMatch (DClassLike dc, ResolutionContext ctxt, object typeBase, bool canResolveBase)
		{
			AbstractType ret;
			UserDefinedType udt = null;
			var invisibleTypeParams = GetInvisibleTypeParameters (dc, ctxt);

			switch (dc.ClassType) {
				case DTokens.Struct:
					ret = new StructType (dc, typeBase as ISyntaxRegion, invisibleTypeParams);
					break;
				case DTokens.Union:
					ret = new UnionType (dc, typeBase as ISyntaxRegion, invisibleTypeParams);
					break;
				case DTokens.Class:
					udt = new ClassType (dc, typeBase as ISyntaxRegion, null, null, invisibleTypeParams);
					ret = null;
					break;
				case DTokens.Interface:
					udt = new InterfaceType (dc, typeBase as ISyntaxRegion, null, invisibleTypeParams);
					ret = null;
					break;
				case DTokens.Template:
					if (dc.ContainsAttribute (DTokens.Mixin))
						ret = new MixinTemplateType (dc, typeBase as ISyntaxRegion, invisibleTypeParams);
					else
						ret = new TemplateType (dc, typeBase as ISyntaxRegion, invisibleTypeParams);
					break;
				default:
					ret = null;
					ctxt.LogError (new ResolutionError (dc, "Unknown type (" + DTokens.GetTokenString (dc.ClassType) + ")"));
					break;
			}
			if (dc.ClassType == DTokens.Class || dc.ClassType == DTokens.Interface)
				ret = canResolveBase ? DResolver.ResolveBaseClasses (udt, ctxt) : udt;
			return ret;
		}
Example #31
0
 /// <summary>
 /// Check transform validity and derive the output schema from the input schema.
 ///
 /// This checks for validity of interactions between parameters during Transform and
 /// raises an exception if any parameter value is invalid.
 ///
 /// Typical implementation should first conduct verification on schema change and parameter
 /// validity, including complex parameter interaction checks.
 /// </summary>
 /// <param name="value">
 /// The <see cref="StructType"/> of the <see cref="DataFrame"/> which will be transformed.
 /// </param>
 /// <returns>
 /// The <see cref="StructType"/> of the output schema that would have been derived from the
 /// input schema, if Transform had been called.
 /// </returns>
 public StructType TransformSchema(StructType value) =>
 new StructType(
     (JvmObjectReference)_jvmObject.Invoke(
         "transformSchema",
         DataType.FromJson(_jvmObject.Jvm, value.Json)));