예제 #1
0
 /// <summary>
 /// Gets the splits that were set using SetSplits
 /// </summary>
 /// <returns>double[], the splits to be used to bucket the input column</returns>
 public double[] GetSplits()
 {
     return((double[])_jvmObject.Invoke("getSplits"));
 }
예제 #2
0
 /// <summary>
 /// Specifies how data of a streaming DataFrame is written to a streaming sink.
 /// </summary>
 /// <remarks>
 /// The following mode is supported:
 /// "append": Only the new rows in the streaming DataFrame/Dataset will be written to
 ///           the sink.
 /// "complete": All the rows in the streaming DataFrame/Dataset will be written to the sink
 ///             every time there are some updates.
 /// "update": Only the rows that were updated in the streaming DataFrame will
 ///           be written to the sink every time there are some updates. If the query
 ///           doesn't contain aggregations, it will be equivalent to `append` mode.
 /// </remarks>
 /// <param name="outputMode">Output mode name</param>
 /// <returns>This DataStreamWriter object</returns>
 public DataStreamWriter OutputMode(string outputMode)
 {
     _jvmObject.Invoke("outputMode", outputMode);
     return(this);
 }
예제 #3
0
 /// <summary>
 /// Selects column based on the column name.
 /// </summary>
 /// <param name="columnName">Column name</param>
 /// <returns>Column object</returns>
 public Column this[string columnName]
 {
     get
     {
         return(WrapAsColumn(_jvmObject.Invoke("col", columnName)));
     }
 }
예제 #4
0
 /// <summary>
 /// Returns the JVM toString value rather than the .NET ToString default
 /// </summary>
 /// <returns>JVM toString() value</returns>
 public override string ToString() => (string)_jvmObject.Invoke("toString");
예제 #5
0
 /// <summary>
 /// Collect all the index metadata.
 /// </summary>
 /// <returns>All index metadata as a <see cref="DataFrame"/>.</returns>
 public DataFrame Indexes() =>
 new DataFrame((JvmObjectReference)_jvmObject.Invoke("indexes"));
예제 #6
0
 /// <summary>
 /// Runtime configuration interface for Spark.
 /// <remarks>
 /// This is the interface through which the user can get and set all Spark and Hadoop
 /// configurations that are relevant to Spark SQL. When getting the value of a config,
 /// this defaults to the value set in the underlying SparkContext, if any.
 /// </remarks>
 /// </summary>
 /// <returns>The RuntimeConfig object</returns>
 public RuntimeConfig Conf() =>
 new RuntimeConfig((JvmObjectReference)_jvmObject.Invoke("conf"));
예제 #7
0
 /// <summary>
 /// Control our logLevel. This overrides any user-defined log settings.
 /// </summary>
 /// <remarks>
 /// Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
 /// </remarks>
 /// <param name="logLevel">The desired log level as a string.</param>
 public void SetLogLevel(string logLevel)
 {
     _jvmObject.Invoke("setLogLevel", logLevel);
 }
예제 #8
0
 internal DataFrame ToDF() => new DataFrame((JvmObjectReference)_jvmObject.Invoke("toDF"));
예제 #9
0
 internal Column Apply(params Column[] columns)
 {
     return(new Column((JvmObjectReference)_jvmObject.Invoke("apply", (object)columns)));
 }
예제 #10
0
 /// <summary>
 /// Returns a new `DataFrame` that drops rows containing any null or NaN values.
 /// </summary>
 /// <returns>DataFrame object</returns>
 public DataFrame Drop() => WrapAsDataFrame(_jvmObject.Invoke("drop"));
 /// <summary>
 /// Insert a new row to the target table based on the rules defined by <c>values</c>.
 /// </summary>
 /// <param name="values">Rules to insert a row as a map between target column names and
 /// corresponding expressions as Column objects.</param>
 /// <returns>DeltaMergeBuilder object.</returns>
 public DeltaMergeBuilder Insert(Dictionary <string, Column> values) =>
 new DeltaMergeBuilder((JvmObjectReference)_jvmObject.Invoke("insert", values));
예제 #12
0
 public Builder IndexName(string indexName)
 {
     _jvmObject.Invoke("indexName", indexName);
     return(this);
 }
예제 #13
0
 /// <summary>
 /// Caches the specified table in-memory.
 ///
 /// Spark SQL can cache tables using an in-memory columnar format by calling
 /// `CacheTable("tableName")` or `DataFrame.Cache()`. Spark SQL will scan only required
 /// columns and will automatically tune compression to minimize memory usage and GC
 /// pressure. You can call `UncacheTable("tableName")` to remove the table from memory.
 /// </summary>
 /// <param name="tableName">Is either a qualified or unqualified name that designates a
 /// table. If no database identifier is provided, it refers to a table in the current
 /// database.</param>
 public void CacheTable(string tableName) => _jvmObject.Invoke("cacheTable", tableName);
 public DeltaMergeBuilder Update(Dictionary <string, Column> set) =>
 new DeltaMergeBuilder((JvmObjectReference)_jvmObject.Invoke("update", set));
예제 #15
0
 /// <summary>
 /// Compute aggregates by specifying a series of aggregate columns.
 /// </summary>
 /// <param name="expr">Column to aggregate on</param>
 /// <param name="exprs">Additional columns to aggregate on</param>
 /// <returns>New DataFrame object with aggregation applied</returns>
 public DataFrame Agg(Column expr, params Column[] exprs) =>
 new DataFrame((JvmObjectReference)_jvmObject.Invoke("agg", expr, exprs));
예제 #16
0
 /// <summary>
 /// Apply an alias to the DeltaTable. This is similar to <c>Dataset.As(alias)</c> or SQL
 /// <c>tableName AS alias</c>.
 /// </summary>
 /// <param name="alias">The table alias.</param>
 /// <returns>Aliased DeltaTable.</returns>
 public DeltaTable As(string alias) =>
 new DeltaTable((JvmObjectReference)_jvmObject.Invoke("as", alias));
예제 #17
0
 public DeltaMergeMatchedActionBuilder WhenMatched() =>
 new DeltaMergeMatchedActionBuilder(
     (JvmObjectReference)_jvmObject.Invoke("whenMatched"));
예제 #18
0
 public override bool Equals(object that) => (bool)_jvmObject.Invoke("equals", that);
예제 #19
0
 /// <summary>
 /// Specifies the input data source format.
 /// </summary>
 /// <param name="source">Name of the data source</param>
 /// <returns>This DataFrameReader object</returns>
 public DataFrameReader Format(string source)
 {
     _jvmObject.Invoke("format", source);
     return(this);
 }
예제 #20
0
 public T Apply(int index) => (T)_jvmObject.Invoke("apply", index);
예제 #21
0
 /// <summary>
 /// Constructor where SparkContext object is already created.
 /// </summary>
 /// <param name="jvmObject">JVM object reference for this SparkContext object</param>
 internal SparkContext(JvmObjectReference jvmObject)
 {
     _jvmObject = jvmObject;
     _conf      = new SparkConf((JvmObjectReference)_jvmObject.Invoke("getConf"));
 }
예제 #22
0
 /// <summary>
 /// Gets the underlying Expression object of the <see cref="Column"/>.
 /// </summary>
 internal JvmObjectReference Expr()
 {
     return((JvmObjectReference)_jvmObject.Invoke("expr"));
 }
예제 #23
0
 /// <summary>
 /// The master URL to connect to, such as "local" to run locally with one thread,
 /// "local[4]" to run locally with 4 cores, or "spark://master:7077" to run on a Spark
 /// standalone cluster.
 /// </summary>
 /// <param name="master">Spark master</param>
 public SparkConf SetMaster(string master)
 {
     _jvmObject.Invoke("setMaster", master);
     return(this);
 }
예제 #24
0
파일: Broadcast.cs 프로젝트: sbabaei/spark
 internal Broadcast(SparkContext sc, T value)
 {
     _path      = CreateTempFilePath(sc.GetConf());
     _jvmObject = CreateBroadcast(sc, value);
     _bid       = (long)_jvmObject.Invoke("id");
 }
예제 #25
0
 /// <summary>
 /// Register a Java UDF class using reflection.
 /// </summary>
 /// <typeparam name="TResult">Return type</typeparam>
 /// <param name="name">Name of the UDF</param>
 /// <param name="className">Class name that defines UDF</param>
 public void RegisterJava <TResult>(string name, string className)
 {
     _jvmObject.Invoke("registerJava", name, className, GetDataType <TResult>());
 }
예제 #26
0
파일: Broadcast.cs 프로젝트: sbabaei/spark
 /// <summary>
 /// Asynchronously delete cached copies of this broadcast on the executors.
 /// If the broadcast is used after this is called, it will need to be re-sent to each
 /// executor.
 /// </summary>
 public void Unpersist()
 {
     _jvmObject.Invoke("unpersist");
 }
예제 #27
0
 /// <summary>
 /// Start a new session with isolated SQL configurations, temporary tables, registered
 /// functions are isolated, but sharing the underlying SparkContext and cached data.
 /// </summary>
 /// <remarks>
 /// Other than the SparkContext, all shared state is initialized lazily.
 /// This method will force the initialization of the shared state to ensure that parent
 /// and child sessions are set up with the same shared state. If the underlying catalog
 /// implementation is Hive, this will initialize the metastore, which may take some time.
 /// </remarks>
 /// <returns>New SparkSession object</returns>
 public SparkSession NewSession() =>
 new SparkSession((JvmObjectReference)_jvmObject.Invoke("newSession"));
예제 #28
0
 /// <summary>
 /// Returns true if the option is None, false otherwise.
 /// </summary>
 /// <returns>true if the option is None, false otherwise</returns>
 internal bool IsEmpty() => (bool)_jvmObject.Invoke("isEmpty");
예제 #29
0
 /// <summary>
 /// Calculates the approximate quantiles of a numerical column of a DataFrame.
 /// </summary>
 /// <remarks>
 /// This method implements a variation of the Greenwald-Khanna algorithm
 /// (with some speed optimizations).
 /// </remarks>
 /// <param name="columnName">Column name</param>
 /// <param name="probabilities">A list of quantile probabilities</param>
 /// <param name="relativeError">
 /// The relative target precision to achieve (greater than or equal to 0)
 /// </param>
 /// <returns>The approximate quantiles at the given probabilities</returns>
 public double[] ApproxQuantile(
     string columnName,
     IEnumerable <double> probabilities,
     double relativeError) =>
 (double[])_jvmObject.Invoke(
     "approxQuantile", columnName, probabilities, relativeError);
예제 #30
0
 /// <summary>
 /// Defines the partitioning columns in a `WindowSpec`.
 /// </summary>
 /// <param name="colName">Name of column</param>
 /// <param name="colNames">Additional column names</param>
 /// <returns>WindowSpec object</returns>
 public WindowSpec PartitionBy(string colName, params string[] colNames) =>
 WrapAsWindowSpec(_jvmObject.Invoke("partitionBy", colName, colNames));