/// <summary> /// Gets the splits that were set using SetSplits /// </summary> /// <returns>double[], the splits to be used to bucket the input column</returns> public double[] GetSplits() { return((double[])_jvmObject.Invoke("getSplits")); }
/// <summary> /// Specifies how data of a streaming DataFrame is written to a streaming sink. /// </summary> /// <remarks> /// The following mode is supported: /// "append": Only the new rows in the streaming DataFrame/Dataset will be written to /// the sink. /// "complete": All the rows in the streaming DataFrame/Dataset will be written to the sink /// every time there are some updates. /// "update": Only the rows that were updated in the streaming DataFrame will /// be written to the sink every time there are some updates. If the query /// doesn't contain aggregations, it will be equivalent to `append` mode. /// </remarks> /// <param name="outputMode">Output mode name</param> /// <returns>This DataStreamWriter object</returns> public DataStreamWriter OutputMode(string outputMode) { _jvmObject.Invoke("outputMode", outputMode); return(this); }
/// <summary> /// Selects column based on the column name. /// </summary> /// <param name="columnName">Column name</param> /// <returns>Column object</returns> public Column this[string columnName] { get { return(WrapAsColumn(_jvmObject.Invoke("col", columnName))); } }
/// <summary> /// Returns the JVM toString value rather than the .NET ToString default /// </summary> /// <returns>JVM toString() value</returns> public override string ToString() => (string)_jvmObject.Invoke("toString");
/// <summary> /// Collect all the index metadata. /// </summary> /// <returns>All index metadata as a <see cref="DataFrame"/>.</returns> public DataFrame Indexes() => new DataFrame((JvmObjectReference)_jvmObject.Invoke("indexes"));
/// <summary> /// Runtime configuration interface for Spark. /// <remarks> /// This is the interface through which the user can get and set all Spark and Hadoop /// configurations that are relevant to Spark SQL. When getting the value of a config, /// this defaults to the value set in the underlying SparkContext, if any. /// </remarks> /// </summary> /// <returns>The RuntimeConfig object</returns> public RuntimeConfig Conf() => new RuntimeConfig((JvmObjectReference)_jvmObject.Invoke("conf"));
/// <summary> /// Control our logLevel. This overrides any user-defined log settings. /// </summary> /// <remarks> /// Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN /// </remarks> /// <param name="logLevel">The desired log level as a string.</param> public void SetLogLevel(string logLevel) { _jvmObject.Invoke("setLogLevel", logLevel); }
internal DataFrame ToDF() => new DataFrame((JvmObjectReference)_jvmObject.Invoke("toDF"));
internal Column Apply(params Column[] columns) { return(new Column((JvmObjectReference)_jvmObject.Invoke("apply", (object)columns))); }
/// <summary> /// Returns a new `DataFrame` that drops rows containing any null or NaN values. /// </summary> /// <returns>DataFrame object</returns> public DataFrame Drop() => WrapAsDataFrame(_jvmObject.Invoke("drop"));
/// <summary> /// Insert a new row to the target table based on the rules defined by <c>values</c>. /// </summary> /// <param name="values">Rules to insert a row as a map between target column names and /// corresponding expressions as Column objects.</param> /// <returns>DeltaMergeBuilder object.</returns> public DeltaMergeBuilder Insert(Dictionary <string, Column> values) => new DeltaMergeBuilder((JvmObjectReference)_jvmObject.Invoke("insert", values));
public Builder IndexName(string indexName) { _jvmObject.Invoke("indexName", indexName); return(this); }
/// <summary> /// Caches the specified table in-memory. /// /// Spark SQL can cache tables using an in-memory columnar format by calling /// `CacheTable("tableName")` or `DataFrame.Cache()`. Spark SQL will scan only required /// columns and will automatically tune compression to minimize memory usage and GC /// pressure. You can call `UncacheTable("tableName")` to remove the table from memory. /// </summary> /// <param name="tableName">Is either a qualified or unqualified name that designates a /// table. If no database identifier is provided, it refers to a table in the current /// database.</param> public void CacheTable(string tableName) => _jvmObject.Invoke("cacheTable", tableName);
public DeltaMergeBuilder Update(Dictionary <string, Column> set) => new DeltaMergeBuilder((JvmObjectReference)_jvmObject.Invoke("update", set));
/// <summary> /// Compute aggregates by specifying a series of aggregate columns. /// </summary> /// <param name="expr">Column to aggregate on</param> /// <param name="exprs">Additional columns to aggregate on</param> /// <returns>New DataFrame object with aggregation applied</returns> public DataFrame Agg(Column expr, params Column[] exprs) => new DataFrame((JvmObjectReference)_jvmObject.Invoke("agg", expr, exprs));
/// <summary> /// Apply an alias to the DeltaTable. This is similar to <c>Dataset.As(alias)</c> or SQL /// <c>tableName AS alias</c>. /// </summary> /// <param name="alias">The table alias.</param> /// <returns>Aliased DeltaTable.</returns> public DeltaTable As(string alias) => new DeltaTable((JvmObjectReference)_jvmObject.Invoke("as", alias));
public DeltaMergeMatchedActionBuilder WhenMatched() => new DeltaMergeMatchedActionBuilder( (JvmObjectReference)_jvmObject.Invoke("whenMatched"));
public override bool Equals(object that) => (bool)_jvmObject.Invoke("equals", that);
/// <summary> /// Specifies the input data source format. /// </summary> /// <param name="source">Name of the data source</param> /// <returns>This DataFrameReader object</returns> public DataFrameReader Format(string source) { _jvmObject.Invoke("format", source); return(this); }
public T Apply(int index) => (T)_jvmObject.Invoke("apply", index);
/// <summary> /// Constructor where SparkContext object is already created. /// </summary> /// <param name="jvmObject">JVM object reference for this SparkContext object</param> internal SparkContext(JvmObjectReference jvmObject) { _jvmObject = jvmObject; _conf = new SparkConf((JvmObjectReference)_jvmObject.Invoke("getConf")); }
/// <summary> /// Gets the underlying Expression object of the <see cref="Column"/>. /// </summary> internal JvmObjectReference Expr() { return((JvmObjectReference)_jvmObject.Invoke("expr")); }
/// <summary> /// The master URL to connect to, such as "local" to run locally with one thread, /// "local[4]" to run locally with 4 cores, or "spark://master:7077" to run on a Spark /// standalone cluster. /// </summary> /// <param name="master">Spark master</param> public SparkConf SetMaster(string master) { _jvmObject.Invoke("setMaster", master); return(this); }
internal Broadcast(SparkContext sc, T value) { _path = CreateTempFilePath(sc.GetConf()); _jvmObject = CreateBroadcast(sc, value); _bid = (long)_jvmObject.Invoke("id"); }
/// <summary> /// Register a Java UDF class using reflection. /// </summary> /// <typeparam name="TResult">Return type</typeparam> /// <param name="name">Name of the UDF</param> /// <param name="className">Class name that defines UDF</param> public void RegisterJava <TResult>(string name, string className) { _jvmObject.Invoke("registerJava", name, className, GetDataType <TResult>()); }
/// <summary> /// Asynchronously delete cached copies of this broadcast on the executors. /// If the broadcast is used after this is called, it will need to be re-sent to each /// executor. /// </summary> public void Unpersist() { _jvmObject.Invoke("unpersist"); }
/// <summary> /// Start a new session with isolated SQL configurations, temporary tables, registered /// functions are isolated, but sharing the underlying SparkContext and cached data. /// </summary> /// <remarks> /// Other than the SparkContext, all shared state is initialized lazily. /// This method will force the initialization of the shared state to ensure that parent /// and child sessions are set up with the same shared state. If the underlying catalog /// implementation is Hive, this will initialize the metastore, which may take some time. /// </remarks> /// <returns>New SparkSession object</returns> public SparkSession NewSession() => new SparkSession((JvmObjectReference)_jvmObject.Invoke("newSession"));
/// <summary> /// Returns true if the option is None, false otherwise. /// </summary> /// <returns>true if the option is None, false otherwise</returns> internal bool IsEmpty() => (bool)_jvmObject.Invoke("isEmpty");
/// <summary> /// Calculates the approximate quantiles of a numerical column of a DataFrame. /// </summary> /// <remarks> /// This method implements a variation of the Greenwald-Khanna algorithm /// (with some speed optimizations). /// </remarks> /// <param name="columnName">Column name</param> /// <param name="probabilities">A list of quantile probabilities</param> /// <param name="relativeError"> /// The relative target precision to achieve (greater than or equal to 0) /// </param> /// <returns>The approximate quantiles at the given probabilities</returns> public double[] ApproxQuantile( string columnName, IEnumerable <double> probabilities, double relativeError) => (double[])_jvmObject.Invoke( "approxQuantile", columnName, probabilities, relativeError);
/// <summary> /// Defines the partitioning columns in a `WindowSpec`. /// </summary> /// <param name="colName">Name of column</param> /// <param name="colNames">Additional column names</param> /// <returns>WindowSpec object</returns> public WindowSpec PartitionBy(string colName, params string[] colNames) => WrapAsWindowSpec(_jvmObject.Invoke("partitionBy", colName, colNames));