/// <name>Structure</name> /// <type>Constructor</type> /// <summary> /// Creates a new structure object with initialization. /// </summary> /// <param name="type">Type of the structure</param> public Structure(StructType type) : base() { this.type = type; }
public AbstractType TryDeduce(DSymbol ds, IEnumerable<ISemantic> templateArguments) { TemplateTypeParameter tp; var t = ds as TemplateType; if (t == null) return null; var orig = ds.Definition; var tupleStruct = new DClassLike(DTokens.Struct) { NameHash = ds.NameHash, Parent = orig.Parent, Location = orig.Location, EndLocation = orig.EndLocation, NameLocation = orig.NameLocation }; var ded = new Templates.DeducedTypeDictionary(tupleStruct); if (templateArguments != null) { var typeList = new List<AbstractType>(); var en = templateArguments.GetEnumerator(); if(en.MoveNext()) { var next = en.Current; int i = 0; for (; ; i++) { var fieldType = AbstractType.Get(next); if (fieldType == null) break; fieldType.NonStaticAccess = true; typeList.Add(fieldType); if (!en.MoveNext()) break; next = en.Current; if (next is ArrayValue && (next as ArrayValue).IsString) { var name = (next as ArrayValue).StringValue; var templateParamName = "_" + i.ToString(); tp = new TemplateTypeParameter(templateParamName, CodeLocation.Empty, tupleStruct); ded[tp] = new TemplateParameterSymbol(tp, fieldType); tupleStruct.Add(new DVariable { Name = name, Type = new IdentifierDeclaration(templateParamName) }); if (!en.MoveNext()) break; next = en.Current; } } } var tupleName = "Types"; tp = new TemplateTypeParameter(tupleName, CodeLocation.Empty, tupleStruct); ded[tp] = new TemplateParameterSymbol(tp, new DTuple(null, typeList)); tupleStruct.Add(new DVariable { NameHash = DVariable.AliasThisIdentifierHash, IsAlias = true, IsAliasThis = true, Type = new IdentifierDeclaration(tupleName) }); } var res = new StructType(tupleStruct, ds.DeclarationOrExpressionBase, ded.Count != 0 ? ded.Values : null); resultStore.Add(res, tupleStruct); //TODO: Ensure renaming and other AST-based things run properly return res; }
public void Test() { StructType s2 = new StructType(5); s2.Increment(); AssertEqual(new StackFrame(), 6, s2.Value); StructType.StaticValue = 5; StructType.StaticIncrement(); AssertEqual(new StackFrame(), 6, StructType.StaticValue); d = new D(DMethod); d+=new D(DMethod); d-=new D(DMethod); d = ReturnDMethod(d); s2 = d(d(new StructType[]{s2}))[0]; AssertEqual(new StackFrame(), 6, StructType.StaticValue); uint i = this; AssertEqual(new StackFrame(), 1u, i); i = this + 2u; AssertEqual(new StackFrame(), 3u, i); try{ throw new MyException();} catch{} int[] results = DoubleValues(new int[]{1, 2, 3}); AssertEqual(new StackFrame(), 4, results[1]); results = Sums(new int[,]{{1,2}, {3,4}}); AssertEqual(new StackFrame(), 7, results[1]); int[,] otherResults = DoubleValues(new int[,]{{1,2}, {3,4}}); AssertEqual(new StackFrame(), 4, otherResults[0, 1]); AssertEqual(new StackFrame(), Values.One, Echo (Values.One)); int first = 3, second = 4; Swap(ref first, ref second); AssertEqual(new StackFrame(), 3, second); BaseType bt = new ChildType(); AssertEqual(new StackFrame(), 4, bt.Compute(1)); AssertEqual(new StackFrame(), 4, LockingMethod()); try{ LockingThrowingMethod(); AssertTrue(new StackFrame(), false); } catch{} }
/// <summary> /// Specifies the schema by using <see cref="StructType"/>. /// </summary> /// <remarks> /// Some data sources (e.g. JSON) can infer the input schema automatically /// from data. By specifying the schema here, the underlying data source can /// skip the schema inference step, and thus speed up data loading. /// </remarks> /// <param name="schema">The input schema</param> /// <returns>This DataFrameReader object</returns> public DataFrameReader Schema(StructType schema) { Reference.Invoke("schema", DataType.FromJson(Reference.Jvm, schema.Json)); return(this); }
public void TestCreateDataFrame() { // Calling CreateDataFrame with schema { var data = new List <GenericRow> { new GenericRow(new object[] { "Alice", 20, new Date(2020, 1, 1) }), new GenericRow(new object[] { "Bob", 30, new Date(2020, 1, 2) }) }; var schema = new StructType(new List <StructField>() { new StructField("Name", new StringType()), new StructField("Age", new IntegerType()), new StructField("Date", new DateType()) }); DataFrame df = _spark.CreateDataFrame(data, schema); ValidateDataFrame(df, data.Select(a => a.Values), schema); } // Calling CreateDataFrame(IEnumerable<string> _) without schema { var data = new string[] { "Alice", "Bob", null }; StructType schema = SchemaWithSingleColumn(new StringType()); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } // Calling CreateDataFrame(IEnumerable<int> _) without schema { var data = new int[] { 1, 2 }; StructType schema = SchemaWithSingleColumn(new IntegerType(), false); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } // Calling CreateDataFrame(IEnumerable<int?> _) without schema { var data = new int?[] { 1, 2, null }; StructType schema = SchemaWithSingleColumn(new IntegerType()); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } // Calling CreateDataFrame(IEnumerable<double> _) without schema { var data = new double[] { 1.2, 2.3 }; StructType schema = SchemaWithSingleColumn(new DoubleType(), false); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } // Calling CreateDataFrame(IEnumerable<double?> _) without schema { var data = new double?[] { 1.2, 2.3, null }; StructType schema = SchemaWithSingleColumn(new DoubleType()); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } // Calling CreateDataFrame(IEnumerable<bool> _) without schema { var data = new bool[] { true, false }; StructType schema = SchemaWithSingleColumn(new BooleanType(), false); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } // Calling CreateDataFrame(IEnumerable<bool?> _) without schema { var data = new bool?[] { true, false, null }; StructType schema = SchemaWithSingleColumn(new BooleanType()); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } // Calling CreateDataFrame(IEnumerable<Date> _) without schema { var data = new Date[] { new Date(2020, 1, 1), new Date(2020, 1, 2), null }; StructType schema = SchemaWithSingleColumn(new DateType()); DataFrame df = _spark.CreateDataFrame(data); ValidateDataFrame(df, data.Select(a => new object[] { a }), schema); } }
/// <summary> /// Write instruction operands into bytecode stream. /// </summary> /// <param name="writer">Bytecode writer.</param> public override void WriteOperands(WordWriter writer) { StructType.Write(writer); Member.Write(writer); Decoration.Write(writer); }
public ISymbolValue VisitStructType(StructType t) { return(new TypeValue(t)); }
public void Run(string[] args) { string kafkaBrokers = args[0]; double maxSpeed = double.Parse(args[1]); // Obtém a referência ao contexto de execução do Spark SparkSession spark = SparkSession .Builder() .AppName("Credit Card Fraud") .GetOrCreate(); spark.Conf().Set("spark.sql.shuffle.partitions", "1"); // sem essa configuração, cada stage ficou com 200 tasks, o que levou uns 4 minutos pra cada batch executar // Criando um dataframe pra receber dados do Kafka DataFrame df = spark .ReadStream() .Format("kafka") .Option("kafka.bootstrap.servers", kafkaBrokers) .Option("subscribe", "transactions") .Load() .SelectExpr("CAST(value AS STRING)"); /* Criando schema pra validar o JSON que virá nas mensagens do Kafka * Exemplo do JSON: * { * "transaction":"431", * "number":"0015-0000-0000-0000", * "lat":-23.1618, * "lng":-46.47201, * "amount":91.01487, * "category":"pets", * "eventTime":"2021-01-05T19:07:19.3888" * } */ var schema = new StructType(new[] { new StructField("transaction", new StringType()), new StructField("number", new StringType()), new StructField("lat", new DoubleType()), new StructField("lng", new DoubleType()), new StructField("amount", new DoubleType()), new StructField("category", new StringType()), new StructField("eventTime", new TimestampType()) }); // Fazendo o parse do JSON pra um array ... df = df.WithColumn("json", FromJson( df.Col("value"), schema.SimpleString) ) .Select("json.*"); // ... e retornando todas as colunas do array como um novo dataframe // Gerando dois dataframes distintos para poder fazer o join e analisar a correção entre as transações DataFrame df1 = df .WithWatermark("eventTime", "7 minutes"); DataFrame df2 = df .WithColumnRenamed("transaction", "transaction2") .WithColumnRenamed("lat", "lat2") .WithColumnRenamed("lng", "lng2") .WithColumnRenamed("eventTime", "eventTime2") .WithWatermark("eventTime2", "7 minutes"); // Efetuando o join para verificar a correlação de transações dos cartões de crédito DataFrame dfJoin = df1.Join(df2, df1.Col("number").EqualTo(df2.Col("number")) .And(Col("transaction").NotEqual(Col("transaction2"))) .And(Col("eventTime2").Between(Col("eventTime"), Col("eventTime") + Expr("interval 5 minutes"))) ); //Registrando uma função personalizada pra ser usada no dataframe spark.Udf().Register <double, double, double, double, double>("CalculateDistance", (lat1, lng1, lat2, lng2) => CalculateDistance(lat1, lng1, lat2, lng2)); spark.Udf().Register <double, Timestamp, Timestamp, double>("CalculateSpeed", (dist, eventTime, eventTime2) => CalculateSpeed(dist, eventTime, eventTime2)); // Criando novas colunas para armazenar a execução do código da UDF dfJoin = dfJoin.WithColumn("dist", CallUDF("CalculateDistance", Col("lat"), Col("lng"), Col("lat2"), Col("lng2"))); dfJoin = dfJoin.WithColumn("speed", CallUDF("CalculateSpeed", Col("dist"), Col("eventTime"), Col("eventTime2"))); // Filtrando as transações que tiverem a velocidade acima do esperado (parâmetro "maxSpeed") dfJoin = dfJoin.Where(Col("speed").Gt(maxSpeed)); // Colocando o streaming pra funcionar StreamingQuery query = dfJoin .WriteStream() .Format("console") .Option("truncate", "false") .OutputMode(OutputMode.Append) .Start(); query.AwaitTermination(); }
private CheckApplicability IsCheckApplicableToData(Check check, StructType schema, SparkSession sparkSession) => new Applicability(sparkSession).IsApplicable(check, schema);
private AnalyzersApplicability AreCheckApplicableToData(IEnumerable <IAnalyzer <IMetric> > analyzers, StructType schema, SparkSession sparkSession) => new Applicability(sparkSession).IsApplicable(analyzers, schema);
public void TestComplex() { /* * def kju() : Unit { * struct P { * x : [Int]; * }; * * struct S { * x : Int; * y : P; * }; * * var s : [S] = new(S, 10); * var arr : [[Bool]] = new([Bool], 20); * } */ var dummyRange = new Core.Lexer.Range(new StringLocation(0), new StringLocation(0)); var pFields = new List <StructField>() { new StructField(dummyRange, "x", new ArrayType(IntType.Instance)) }; var pDeclaration = new StructDeclaration(dummyRange, "P", pFields); var pType = StructType.GetInstance(pDeclaration); var sFields = new List <StructField>() { new StructField(dummyRange, "x", IntType.Instance), new StructField(dummyRange, "y", pType) }; var sDeclaration = new StructDeclaration(dummyRange, "S", sFields); var sType = StructType.GetInstance(sDeclaration); var sAlloc = new ArrayAlloc( dummyRange, sType, new IntegerLiteral(dummyRange, 10)); var sVarDeclaration = new VariableDeclaration( dummyRange, new ArrayType(sType), "s", sAlloc); var arrAlloc = new ArrayAlloc( dummyRange, new ArrayType(BoolType.Instance), new IntegerLiteral(dummyRange, 20)); var arrVarDeclaration = new VariableDeclaration( dummyRange, new ArrayType(new ArrayType(BoolType.Instance)), "arr", arrAlloc); var kjuInstructions = new List <Expression> { pDeclaration, sDeclaration, sVarDeclaration, arrVarDeclaration }; var kjuDeclaration = new FunctionDeclaration( dummyRange, "kju", new ArrayType(UnitType.Instance), new List <VariableDeclaration>(), new InstructionBlock(dummyRange, kjuInstructions), false); var root = new Program(dummyRange, new List <StructDeclaration>(), new List <FunctionDeclaration> { kjuDeclaration }); var expectedTypes = new HashSet <DataType>() { pType, sType, new ArrayType(BoolType.Instance), new ArrayType(IntType.Instance), new ArrayType(sType), new ArrayType(new ArrayType(BoolType.Instance)) }; this.CheckAnswer(root, expectedTypes); }
public Context Enter(StructType type) { return(new Context(Level + 1, type)); }
public Context(int level, StructType current) { Level = level; Current = current; }
public StructDecoder(StructType structType) { _names = structType.Fields.Select(x => x.Name).ToArray(); _decoders = SchemaToDecoder.FieldsToDecoders(structType.Fields); }
public void CreateClass(AstClass astClass, Namescope parent, IEnumerable <AstBlockMember> parentItems) { var sources = new List <string>(); if (astClass.Modifiers.HasFlag(Modifiers.Partial)) { astClass = FlattenClass(astClass, sources, parentItems); } DataType result; if (_cachedClasses.TryGetValue(astClass, out result)) { return; } var src = astClass.Name.Source; var modifiers = GetTypeModifiers(parent, astClass.Modifiers); switch (astClass.Type) { case AstClassType.Struct: result = new StructType(src, parent, astClass.DocComment, modifiers, astClass.Name.Symbol); break; case AstClassType.Class: result = new ClassType(src, parent, astClass.DocComment, modifiers, astClass.Name.Symbol); break; case AstClassType.Interface: if (modifiers.HasFlag(Modifiers.Abstract)) { Log.Error(src, ErrorCode.E0000, "'abstract' is not valid for interface"); } result = new InterfaceType(src, parent, astClass.DocComment, modifiers | Modifiers.Abstract, astClass.Name.Symbol); break; default: Log.Error(src, ErrorCode.I3045, "<" + astClass.Type + "> is not a class, struct or interface type"); return; } if (parent is DataType) { (parent as DataType).NestedTypes.Add(result); } else if (parent is Namespace) { (parent as Namespace).Types.Add(result); } else { Log.Error(result.Source, ErrorCode.I3046, "<" + astClass.Type + "> is not allowed in this context"); } _cachedClasses.Add(astClass, result); if (astClass.OptionalGeneric != null) { CreateGenericSignature(result, astClass.OptionalGeneric, false); } result.SetBlock(new Block(src, result, null, result.Modifiers & Modifiers.ProtectionModifiers, ".block")); foreach (var s in sources) { result.SourceFiles.Add(s); } foreach (var b in astClass.Members) { if (b is AstBlockBase) { _compiler.AstProcessor.CreateBlock(b as AstBlockBase, result, astClass.Members); } } if (astClass.Attributes.Count > 0) { EnqueueAttributes(result, x => { result.SetAttributes(_compiler.CompileAttributes(result.Parent, astClass.Attributes)); // Remove default constructor if TargetSpecificType if (result.HasAttribute(_ilf.Essentials.TargetSpecificTypeAttribute) && result.Constructors.Count == 1 && result.Constructors[0].IsGenerated) { result.Constructors.Clear(); } }); } EnqueueType(result, x => CompileBaseTypes(x, astClass.Bases), x => PopulateClass(astClass, x)); }
/// <summary> /// Check transform validity and derive the output schema from the input schema. /// /// This checks for validity of interactions between parameters during Transform and /// raises an exception if any parameter value is invalid. /// /// Typical implementation should first conduct verification on schema change and parameter /// validity, including complex parameter interaction checks. /// </summary> /// <param name="value"> /// The <see cref="StructType"/> of the <see cref="DataFrame"/> which will be transformed. /// </param> /// <returns> /// The <see cref="StructType"/> of the output schema that would have been derived from the /// input schema, if Transform had been called. /// </returns> public override StructType TransformSchema(StructType value) => new StructType( (JvmObjectReference)Reference.Invoke( "transformSchema", DataType.FromJson(Reference.Jvm, value.Json)));
/// <summary> /// Specifies the schema by using <see cref="StructType"/>. /// </summary> /// <remarks> /// Some data sources (e.g. JSON) can infer the input schema automatically /// from data. By specifying the schema here, the underlying data source can /// skip the schema inference step, and thus speed up data loading. /// </remarks> /// <param name="schema">The input schema</param> /// <returns>This DataFrameReader object</returns> public DataFrameReader Schema(StructType schema) { _jvmObject.Invoke("schema", DataType.FromJson(_jvmObject.Jvm, schema.Json)); return(this); }
/// <summary> /// Create an external table from the given path based on a data source, a schema and /// a set of options.Then, returns the corresponding DataFrame. /// </summary> /// <param name="tableName">Name of the table</param> /// <param name="source">Data source</param> /// <param name="schema">Schema of the table</param> /// <param name="options">Options to create table</param> /// <returns></returns> public DataFrame CreateExternalTable(string tableName, string source, StructType schema, Dictionary<string, string> options) { return catalogProxy.CreateExternalTable(tableName, source, schema, options); }
public void Initialize(bool canBeEdit, AgentType agent, StructType structType, PropertyDef prop, bool canBePar) { Debug.Check(agent != null || structType != null); _initialized = false; _isModified = false; _shouldCheckMembersInWorkspace = false; _isNew = (prop == null); _agent = agent; _structType = structType; _originalProperty = prop; setTypes(); if (_isNew) { this.Text = canBeEdit ? Resources.AddProperty : Resources.ViewProperty; if (_structType == null) { if (agent != null) { _property = new PropertyDef(agent, null, agent.Name, "", "", ""); } } else { _property = new PropertyDef(null, null, _structType.Name, "", "", ""); } _property.IsPublic = false; resetProperty(_property, _property.IsPar); } else { this.Text = canBeEdit ? Resources.EditProperty : Resources.ViewProperty; resetProperty(prop, prop.IsPar); } //this.customizedCheckBox.Visible = canBeEdit && !_property.IsInherited && agent != null; this.customizedCheckBox.Visible = false; this.isLocalCheckBox.Checked = _structType == null && _property.IsPar; this.isLocalCheckBox.Visible = canBePar && _structType == null && !_property.IsMember; this.isLocalCheckBox.Enabled = canBeEdit; this.nameTextBox.Enabled = canBeEdit; this.arrayCheckBox.Enabled = canBeEdit || (_structType == null || _structType.IsCustomized) && _property.IsChangeableType; this.typeComboBox.Enabled = canBeEdit || (_structType == null || _structType.IsCustomized) && _property.IsChangeableType; this.isStaticCheckBox.Enabled = canBeEdit; this.isPublicCheckBox.Enabled = canBeEdit; this.isConstCheckBox.Enabled = canBeEdit; this.dispTextBox.Enabled = canBeEdit; this.descTextBox.Enabled = canBeEdit; this.nameTextBox.Focus(); if (this.nameTextBox.TextLength > 0) { this.nameTextBox.SelectionStart = this.nameTextBox.TextLength; } else { this.nameTextBox.Select(); } _initialized = true; }
/// <summary> /// Creates a <see cref="DataFrame"/> from an <see cref="IEnumerable"/> containing /// <see cref="GenericRow"/>s using the given schema. /// It is important to make sure that the structure of every <see cref="GenericRow"/> of /// the provided <see cref="IEnumerable"/> matches /// the provided schema. Otherwise, there will be runtime exception. /// </summary> /// <param name="data">List of Row objects</param> /// <param name="schema">Schema as StructType</param> /// <returns>DataFrame object</returns> public DataFrame CreateDataFrame(IEnumerable <GenericRow> data, StructType schema) => new DataFrame((JvmObjectReference)_jvmObject.Invoke( "createDataFrame", data, DataType.FromJson(_jvmObject.Jvm, schema.Json)));
public void TestSignaturesV2_3_X() { Assert.IsType <Column>(_df["name"]); Assert.IsType <Column>(_df["age"]); Assert.IsType <DataFrame>(_df.ToDF()); Assert.IsType <DataFrame>(_df.ToDF("name2", "age2")); StructType schema = _df.Schema(); Assert.NotNull(schema); _df.PrintSchema(); _df.Explain(); _df.Explain(true); _df.Explain(false); Assert.Equal(2, _df.Columns().ToArray().Length); var expected = new List <Tuple <string, string> > { new Tuple <string, string>("age", "integer"), new Tuple <string, string>("name", "string") }; Assert.Equal(expected, _df.DTypes()); Assert.IsType <bool>(_df.IsLocal()); Assert.IsType <bool>(_df.IsStreaming()); using (var tempDir = new TemporaryDirectory()) { // The following is required for *CheckPoint(). _spark.SparkContext.SetCheckpointDir(tempDir.Path); Assert.IsType <DataFrame>(_df.Checkpoint()); Assert.IsType <DataFrame>(_df.Checkpoint(false)); Assert.IsType <DataFrame>(_df.LocalCheckpoint()); Assert.IsType <DataFrame>(_df.LocalCheckpoint(false)); } Assert.IsType <DataFrame>(_df.WithWatermark("time", "10 minutes")); _df.Show(); _df.Show(10); _df.Show(10, 10); _df.Show(10, 10, true); Assert.IsType <DataFrame>(_df.Join(_df)); Assert.IsType <DataFrame>(_df.Join(_df, "name")); Assert.IsType <DataFrame>(_df.Join(_df, new[] { "name" })); Assert.IsType <DataFrame>(_df.Join(_df, new[] { "name" }, "outer")); Assert.IsType <DataFrame>(_df.Join(_df, _df["age"] == _df["age"])); Assert.IsType <DataFrame>(_df.Join(_df, _df["age"] == _df["age"], "outer")); Assert.IsType <DataFrame>(_df.CrossJoin(_df)); Assert.IsType <DataFrame>(_df.SortWithinPartitions("age")); Assert.IsType <DataFrame>(_df.SortWithinPartitions("age", "name")); Assert.IsType <DataFrame>(_df.SortWithinPartitions()); Assert.IsType <DataFrame>(_df.SortWithinPartitions(_df["age"])); Assert.IsType <DataFrame>(_df.SortWithinPartitions(_df["age"], _df["name"])); Assert.IsType <DataFrame>(_df.Sort("age")); Assert.IsType <DataFrame>(_df.Sort("age", "name")); Assert.IsType <DataFrame>(_df.Sort()); Assert.IsType <DataFrame>(_df.Sort(_df["age"])); Assert.IsType <DataFrame>(_df.Sort(_df["age"], _df["name"])); Assert.IsType <DataFrame>(_df.OrderBy("age")); Assert.IsType <DataFrame>(_df.OrderBy("age", "name")); Assert.IsType <DataFrame>(_df.OrderBy()); Assert.IsType <DataFrame>(_df.OrderBy(_df["age"])); Assert.IsType <DataFrame>(_df.OrderBy(_df["age"], _df["name"])); Assert.IsType <DataFrame>(_df.Hint("broadcast")); Assert.IsType <DataFrame>(_df.Hint("broadcast", new[] { "hello", "world" })); Assert.IsType <Column>(_df.Col("age")); Assert.IsType <Column>(_df.ColRegex("age")); Assert.IsType <DataFrame>(_df.As("alias")); Assert.IsType <DataFrame>(_df.Alias("alias")); Assert.IsType <DataFrame>(_df.Select("age")); Assert.IsType <DataFrame>(_df.Select("age", "name")); Assert.IsType <DataFrame>(_df.Select()); Assert.IsType <DataFrame>(_df.Select(_df["age"])); Assert.IsType <DataFrame>(_df.Select(_df["age"], _df["name"])); Assert.IsType <DataFrame>(_df.SelectExpr()); Assert.IsType <DataFrame>(_df.SelectExpr("age * 2")); Assert.IsType <DataFrame>(_df.SelectExpr("age * 2", "abs(age)")); Assert.IsType <DataFrame>(_df.Filter(_df["age"] > 21)); Assert.IsType <DataFrame>(_df.Filter("age > 21")); Assert.IsType <DataFrame>(_df.Where(_df["age"] > 21)); Assert.IsType <DataFrame>(_df.Where("age > 21")); Assert.IsType <RelationalGroupedDataset>(_df.GroupBy("age")); Assert.IsType <RelationalGroupedDataset>(_df.GroupBy("age", "name")); Assert.IsType <RelationalGroupedDataset>(_df.GroupBy()); Assert.IsType <RelationalGroupedDataset>(_df.GroupBy(_df["age"])); Assert.IsType <RelationalGroupedDataset>(_df.GroupBy(_df["age"], _df["name"])); { RelationalGroupedDataset df = _df.WithColumn("tempAge", _df["age"]).GroupBy("name"); Assert.IsType <DataFrame>(df.Mean("age")); Assert.IsType <DataFrame>(df.Mean("age", "tempAge")); Assert.IsType <DataFrame>(df.Max("age")); Assert.IsType <DataFrame>(df.Max("age", "tempAge")); Assert.IsType <DataFrame>(df.Avg("age")); Assert.IsType <DataFrame>(df.Avg("age", "tempAge")); Assert.IsType <DataFrame>(df.Min("age")); Assert.IsType <DataFrame>(df.Min("age", "tempAge")); Assert.IsType <DataFrame>(df.Sum("age")); Assert.IsType <DataFrame>(df.Sum("age", "tempAge")); } Assert.IsType <RelationalGroupedDataset>(_df.Rollup("age")); Assert.IsType <RelationalGroupedDataset>(_df.Rollup("age", "name")); Assert.IsType <RelationalGroupedDataset>(_df.Rollup()); Assert.IsType <RelationalGroupedDataset>(_df.Rollup(_df["age"])); Assert.IsType <RelationalGroupedDataset>(_df.Rollup(_df["age"], _df["name"])); Assert.IsType <RelationalGroupedDataset>(_df.Cube("age")); Assert.IsType <RelationalGroupedDataset>(_df.Cube("age", "name")); Assert.IsType <RelationalGroupedDataset>(_df.Cube()); Assert.IsType <RelationalGroupedDataset>(_df.Cube(_df["age"])); Assert.IsType <RelationalGroupedDataset>(_df.Cube(_df["age"], _df["name"])); Assert.IsType <DataFrame>(_df.Agg(Avg(_df["age"]))); Assert.IsType <DataFrame>(_df.Agg(Avg(_df["age"]), Avg(_df["name"]))); Assert.IsType <DataFrame>(_df.Limit(10)); Assert.IsType <DataFrame>(_df.Union(_df)); Assert.IsType <DataFrame>(_df.UnionByName(_df)); Assert.IsType <DataFrame>(_df.Intersect(_df)); Assert.IsType <DataFrame>(_df.Except(_df)); Assert.IsType <DataFrame>(_df.Sample(0.5)); Assert.IsType <DataFrame>(_df.Sample(0.5, true)); Assert.IsType <DataFrame>(_df.Sample(0.5, false, 12345)); Assert.IsType <DataFrame[]>(_df.RandomSplit(new[] { 0.2, 0.8 })); Assert.IsType <DataFrame[]>(_df.RandomSplit(new[] { 0.2, 0.8 }, 12345)); Assert.IsType <DataFrame>(_df.WithColumn("age2", _df["age"])); Assert.IsType <DataFrame>(_df.WithColumnRenamed("age", "age2")); Assert.IsType <DataFrame>(_df.Drop()); Assert.IsType <DataFrame>(_df.Drop("age")); Assert.IsType <DataFrame>(_df.Drop("age", "name")); Assert.IsType <DataFrame>(_df.Drop(_df["age"])); Assert.IsType <DataFrame>(_df.DropDuplicates()); Assert.IsType <DataFrame>(_df.DropDuplicates("age")); Assert.IsType <DataFrame>(_df.DropDuplicates("age", "name")); Assert.IsType <DataFrame>(_df.Describe()); Assert.IsType <DataFrame>(_df.Describe("age")); Assert.IsType <DataFrame>(_df.Describe("age", "name")); Assert.IsType <DataFrame>(_df.Summary()); Assert.IsType <DataFrame>(_df.Summary("count")); Assert.IsType <DataFrame>(_df.Summary("count", "mean")); Assert.IsType <Row[]>(_df.Head(2).ToArray()); Assert.IsType <Row>(_df.Head()); Assert.IsType <Row>(_df.First()); Assert.IsType <Row[]>(_df.Take(3).ToArray()); Assert.IsType <Row[]>(_df.Collect().ToArray()); Assert.IsType <Row[]>(_df.ToLocalIterator().ToArray()); Assert.IsType <long>(_df.Count()); Assert.IsType <DataFrame>(_df.Repartition(2)); Assert.IsType <DataFrame>(_df.Repartition(2, _df["age"])); Assert.IsType <DataFrame>(_df.Repartition(_df["age"])); Assert.IsType <DataFrame>(_df.Repartition()); Assert.IsType <DataFrame>(_df.RepartitionByRange(2, _df["age"])); Assert.IsType <DataFrame>(_df.RepartitionByRange(_df["age"])); Assert.IsType <DataFrame>(_df.Coalesce(1)); Assert.IsType <DataFrame>(_df.Distinct()); Assert.IsType <DataFrame>(_df.Persist()); Assert.IsType <DataFrame>(_df.Persist(StorageLevel.DISK_ONLY)); Assert.IsType <DataFrame>(_df.Cache()); Assert.IsType <StorageLevel>(_df.StorageLevel()); Assert.IsType <DataFrame>(_df.Unpersist()); _df.CreateTempView("view"); _df.CreateOrReplaceTempView("view"); _df.CreateGlobalTempView("global_view"); _df.CreateOrReplaceGlobalTempView("global_view"); }
public IDataFrameProxy ReadDataFrame(string path, StructType schema, System.Collections.Generic.Dictionary <string, string> options) { throw new NotImplementedException(); }
public StructType[] DMethod(StructType[] s) { return s; }
public IDataFrameProxy TextFile(string path, StructType schema, string delimiter) { return(new MockDataFrameProxy(new object[] { path, schema, delimiter }, this)); }
public LanguageStructModel(StructType intIDType, LanguageStructModel objParent, CompilationUnitModel objCompilationUnit = null) { IDType = intIDType; Parent = objParent; CompilationUnit = objCompilationUnit; }
public void TestSignaturesV2_3_X() { Column col = _df["name"]; col = _df["age"]; DataFrame df = _df.ToDF(); df = df.ToDF("name2", "age2"); StructType schema = _df.Schema(); Assert.NotNull(schema); _df.PrintSchema(); _df.Explain(); _df.Explain(true); _df.Explain(false); Assert.Equal(2, _df.Columns().ToArray().Length); _df.IsLocal(); _df.IsStreaming(); using (var tempDir = new TemporaryDirectory()) { // The following is required for *CheckPoint(). _spark.SparkContext.SetCheckpointDir(tempDir.Path); _df.Checkpoint(); _df.Checkpoint(false); _df.LocalCheckpoint(); _df.LocalCheckpoint(false); } _df.WithWatermark("time", "10 minutes"); _df.Show(); _df.Show(10); _df.Show(10, 10); _df.Show(10, 10, true); _df.Join(_df); _df.Join(_df, "name"); _df.Join(_df, new[] { "name" }); _df.Join(_df, new[] { "name" }, "outer"); _df.Join(_df, _df["age"] == _df["age"]); _df.Join(_df, _df["age"] == _df["age"], "outer"); _df.CrossJoin(_df); _df.SortWithinPartitions("age"); _df.SortWithinPartitions("age", "name"); _df.SortWithinPartitions(); _df.SortWithinPartitions(_df["age"]); _df.SortWithinPartitions(_df["age"], _df["name"]); _df.Sort("age"); _df.Sort("age", "name"); _df.Sort(); _df.Sort(_df["age"]); _df.Sort(_df["age"], _df["name"]); _df.OrderBy("age"); _df.OrderBy("age", "name"); _df.OrderBy(); _df.OrderBy(_df["age"]); _df.OrderBy(_df["age"], _df["name"]); _df.Hint("broadcast"); _df.Hint("broadcast", new[] { "hello", "world" }); _df.Col("age"); _df.ColRegex("age"); _df.As("alias"); _df.Alias("alias"); _df.Select("age"); _df.Select("age", "name"); _df.Select(); _df.Select(_df["age"]); _df.Select(_df["age"], _df["name"]); _df.SelectExpr(); _df.SelectExpr("age * 2"); _df.SelectExpr("age * 2", "abs(age)"); _df.Filter(_df["age"] > 21); _df.Filter("age > 21"); _df.Where(_df["age"] > 21); _df.Where("age > 21"); _df.GroupBy("age"); _df.GroupBy("age", "name"); _df.GroupBy(); _df.GroupBy(_df["age"]); _df.GroupBy(_df["age"], _df["name"]); _df.Rollup("age"); _df.Rollup("age", "name"); _df.Rollup(); _df.Rollup(_df["age"]); _df.Rollup(_df["age"], _df["name"]); _df.Cube("age"); _df.Cube("age", "name"); _df.Cube(); _df.Cube(_df["age"]); _df.Cube(_df["age"], _df["name"]); _df.Agg(Avg(_df["age"])); _df.Agg(Avg(_df["age"]), Avg(_df["name"])); _df.Limit(10); _df.Union(_df); _df.UnionByName(_df); _df.Intersect(_df); _df.Except(_df); _df.Sample(0.5); _df.Sample(0.5, true); _df.Sample(0.5, false, 12345); _df.RandomSplit(new[] { 0.2, 0.8 }); _df.RandomSplit(new[] { 0.2, 0.8 }, 12345); _df.WithColumn("age2", _df["age"]); _df.WithColumnRenamed("age", "age2"); _df.Drop(); _df.Drop("age"); _df.Drop("age", "name"); _df.Drop(_df["age"]); _df.DropDuplicates(); _df.DropDuplicates("age"); _df.DropDuplicates("age", "name"); _df.Describe(); _df.Describe("age"); _df.Describe("age", "name"); _df.Summary(); _df.Summary("count"); _df.Summary("count", "mean"); _df.Head(2); _df.Head(); _df.First(); _df.Take(3).ToArray(); _df.Collect().ToArray(); _df.ToLocalIterator().ToArray(); _df.Count(); _df.Repartition(2); _df.Repartition(2, _df["age"]); _df.Repartition(_df["age"]); _df.Repartition(); _df.RepartitionByRange(2, _df["age"]); _df.RepartitionByRange(_df["age"]); _df.Coalesce(1); _df.Distinct(); _df.Persist(); _df.Cache(); _df.Unpersist(); _df.CreateTempView("view"); _df.CreateOrReplaceTempView("view"); _df.CreateGlobalTempView("global_view"); _df.CreateOrReplaceGlobalTempView("global_view"); }
/// <summary> /// The variable's or method's base type will be resolved (if auto type, the intializer's type will be taken). /// A class' base class will be searched. /// etc.. /// </summary> public static AbstractType HandleNodeMatch( INode m, ResolverContextStack ctxt, AbstractType resultBase = null, object typeBase = null) { stackNum_HandleNodeMatch++; bool popAfterwards = m.Parent != ctxt.ScopedBlock && m.Parent is IBlockNode; if (popAfterwards) ctxt.PushNewScope((IBlockNode)m.Parent); //HACK: Really dirty stack overflow prevention via manually counting call depth var canResolveBaseGenerally = stackNum_HandleNodeMatch < 6; var DoResolveBaseType = canResolveBaseGenerally && !ctxt.Options.HasFlag(ResolutionOptions.DontResolveBaseClasses) && (m.Type == null || m.Type.ToString(false) != m.Name); AbstractType ret = null; // To support resolving type parameters to concrete types if the context allows this, introduce all deduced parameters to the current context if (canResolveBaseGenerally && resultBase is DSymbol) ctxt.CurrentContext.IntroduceTemplateParameterTypes((DSymbol)resultBase); // Only import symbol aliases are allowed to search in the parse cache if (m is ImportSymbolAlias) { var isa = (ImportSymbolAlias)m; if (isa.IsModuleAlias ? isa.Type != null : isa.Type.InnerDeclaration != null) { var mods = new List<DModule>(); var td=isa.IsModuleAlias ? isa.Type : isa.Type.InnerDeclaration; foreach (var mod in ctxt.ParseCache.LookupModuleName(td.ToString())) mods.Add(mod as DModule); if(mods.Count == 0) ctxt.LogError(new NothingFoundError(isa.Type)); else if(mods.Count > 1) { var m__=new List<ISemantic>(); foreach(var mod in mods) m__.Add(new ModuleSymbol(mod, isa.Type)); ctxt.LogError(new AmbiguityError(isa.Type,m__)); } var bt=mods.Count != 0 ? (AbstractType)new ModuleSymbol(mods[0], td) : null; //TODO: Is this correct behaviour? if (!isa.IsModuleAlias){ var furtherId = ResolveFurtherTypeIdentifier(isa.Type.ToString(false), new[]{ bt }, ctxt, isa.Type); ctxt.CheckForSingleResult(furtherId, isa.Type); if (furtherId != null && furtherId.Length != 0) bt = furtherId[0]; else bt = null; } ret = new AliasedType(isa, bt, isa.Type); } } else if (m is DVariable) { var v = (DVariable)m; AbstractType bt = null; if (DoResolveBaseType) { var bts = TypeDeclarationResolver.Resolve(v.Type, ctxt); if (bts != null && bts.Length != 0 && ctxt.CheckForSingleResult(bts, v.Type)) bt = bts[0]; // For auto variables, use the initializer to get its type else if (v.Initializer != null) bt = ExpressionSemantics.Evaluation.EvaluateType(v.Initializer, ctxt); // Check if inside an foreach statement header if (bt == null && ctxt.ScopedStatement != null) bt = GetForeachIteratorType(v, ctxt); } // Note: Also works for aliases! In this case, we simply try to resolve the aliased type, otherwise the variable's base type ret=v.IsAlias ? (DSymbol)new AliasedType(v, bt, typeBase as ISyntaxRegion) : new MemberSymbol(v, bt, typeBase as ISyntaxRegion); } else if (m is DMethod) { ret = new MemberSymbol((DNode)m, DoResolveBaseType ? GetMethodReturnType((DMethod)m, ctxt) : null , typeBase as ISyntaxRegion); } else if (m is DClassLike) { UserDefinedType udt = null; var dc=(DClassLike)m; switch (dc.ClassType) { case DTokens.Struct: udt = new StructType(dc, typeBase as ISyntaxRegion); break; case DTokens.Union: udt = new UnionType(dc, typeBase as ISyntaxRegion); break; case DTokens.Class: udt = new ClassType(dc, typeBase as ISyntaxRegion, null); break; case DTokens.Template: udt = new TemplateType(dc, typeBase as ISyntaxRegion); break; case DTokens.Interface: udt = new InterfaceType(dc, typeBase as ISyntaxRegion); break; default: ctxt.LogError(new ResolutionError(m, "Unknown type ("+DTokens.GetTokenString(dc.ClassType)+")")); break; } if (canResolveBaseGenerally && !ctxt.Options.HasFlag(ResolutionOptions.DontResolveBaseClasses)) ret = DResolver.ResolveBaseClasses(udt, ctxt); else ret = udt; } else if (m is IAbstractSyntaxTree) { var mod = (IAbstractSyntaxTree)m; if (typeBase != null && typeBase.ToString() != mod.ModuleName) { var pack = ctxt.ParseCache.LookupPackage(typeBase.ToString()).First(); if (pack != null) ret = new PackageSymbol(pack, typeBase as ISyntaxRegion); } else ret = new ModuleSymbol(m as DModule, typeBase as ISyntaxRegion); } else if (m is DEnum) ret = new EnumType((DEnum)m, typeBase as ISyntaxRegion); else if (m is TemplateParameterNode) { var tmp = ((TemplateParameterNode)m).TemplateParameter; //ResolveResult[] templateParameterType = null; //TODO: Resolve the specialization type //var templateParameterType = TemplateInstanceHandler.ResolveTypeSpecialization(tmp, ctxt); ret = new MemberSymbol((DNode)m, null, typeBase as ISyntaxRegion); } if (canResolveBaseGenerally && resultBase is DSymbol) ctxt.CurrentContext.RemoveParamTypesFromPreferredLocals((DSymbol)resultBase); if (popAfterwards) ctxt.Pop(); stackNum_HandleNodeMatch--; return ret; }
public ISymbolValue VisitStructType(StructType t) { throw new NotImplementedException(); }
static AbstractType HandleClassLikeMatch (DClassLike dc, ResolutionContext ctxt, object typeBase, bool canResolveBase) { AbstractType ret; UserDefinedType udt = null; var invisibleTypeParams = GetInvisibleTypeParameters (dc, ctxt); switch (dc.ClassType) { case DTokens.Struct: ret = new StructType (dc, typeBase as ISyntaxRegion, invisibleTypeParams); break; case DTokens.Union: ret = new UnionType (dc, typeBase as ISyntaxRegion, invisibleTypeParams); break; case DTokens.Class: udt = new ClassType (dc, typeBase as ISyntaxRegion, null, null, invisibleTypeParams); ret = null; break; case DTokens.Interface: udt = new InterfaceType (dc, typeBase as ISyntaxRegion, null, invisibleTypeParams); ret = null; break; case DTokens.Template: if (dc.ContainsAttribute (DTokens.Mixin)) ret = new MixinTemplateType (dc, typeBase as ISyntaxRegion, invisibleTypeParams); else ret = new TemplateType (dc, typeBase as ISyntaxRegion, invisibleTypeParams); break; default: ret = null; ctxt.LogError (new ResolutionError (dc, "Unknown type (" + DTokens.GetTokenString (dc.ClassType) + ")")); break; } if (dc.ClassType == DTokens.Class || dc.ClassType == DTokens.Interface) ret = canResolveBase ? DResolver.ResolveBaseClasses (udt, ctxt) : udt; return ret; }
/// <summary> /// Check transform validity and derive the output schema from the input schema. /// /// This checks for validity of interactions between parameters during Transform and /// raises an exception if any parameter value is invalid. /// /// Typical implementation should first conduct verification on schema change and parameter /// validity, including complex parameter interaction checks. /// </summary> /// <param name="value"> /// The <see cref="StructType"/> of the <see cref="DataFrame"/> which will be transformed. /// </param> /// <returns> /// The <see cref="StructType"/> of the output schema that would have been derived from the /// input schema, if Transform had been called. /// </returns> public StructType TransformSchema(StructType value) => new StructType( (JvmObjectReference)_jvmObject.Invoke( "transformSchema", DataType.FromJson(_jvmObject.Jvm, value.Json)));