예제 #1
0
        internal void Q8d()
        {
            Func <Column, Column>         getYear   = Udf <string, string>(x => x.Substring(0, 4));
            Func <Column, Column, Column> discPrice = VectorUdf <DoubleDataFrameColumn, DoubleDataFrameColumn, DoubleDataFrameColumn>(
                (price, discount) => VectorDataFrameFunctions.ComputeDiscountPrice(price, discount));

            Func <Column, Column, Column> isBrazil = Udf <string, double, double>((x, y) => x == "BRAZIL" ? y : 0);

            DataFrame fregion = _region.Filter(Col("r_name") == "AMERICA");
            DataFrame forder  = _orders.Filter(Col("o_orderdate") <= "1996-12-31" & Col("o_orderdate") >= "1995-01-01");
            DataFrame fpart   = _part.Filter(Col("p_type") == "ECONOMY ANODIZED STEEL");

            DataFrame nat = _nation.Join(_supplier, Col("n_nationkey") == _supplier["s_nationkey"]);

            DataFrame line = _lineitem.Select(Col("l_partkey"), Col("l_suppkey"), Col("l_orderkey"),
                                              discPrice(Col("l_extendedprice"), Col("l_discount")).As("volume"))
                             .Join(fpart, Col("l_partkey") == fpart["p_partkey"])
                             .Join(nat, Col("l_suppkey") == nat["s_suppkey"]);

            _nation.Join(fregion, Col("n_regionkey") == fregion["r_regionkey"])
            .Select(Col("n_nationkey"))
            .Join(_customer, Col("n_nationkey") == _customer["c_nationkey"])
            .Select(Col("c_custkey"))
            .Join(forder, Col("c_custkey") == forder["o_custkey"])
            .Select(Col("o_orderkey"), Col("o_orderdate"))
            .Join(line, Col("o_orderkey") == line["l_orderkey"])
            .Select(getYear(Col("o_orderdate")).As("o_year"), Col("volume"),
                    isBrazil(Col("n_name"), Col("volume")).As("case_volume"))
            .GroupBy(Col("o_year"))
            .Agg((Sum(Col("case_volume")) / Sum("volume")).As("mkt_share"))
            .Sort(Col("o_year"))
            .Show();
        }
예제 #2
0
        internal void Q1d()
        {
            Func <Column, Column, Column> discPrice = VectorUdf <DoubleDataFrameColumn, DoubleDataFrameColumn, DoubleDataFrameColumn>(
                (price, discount) => VectorDataFrameFunctions.ComputeDiscountPrice(price, discount));

            Func <Column, Column, Column, Column> total = VectorUdf <DoubleDataFrameColumn, DoubleDataFrameColumn, DoubleDataFrameColumn, DoubleDataFrameColumn>(
                (price, discount, tax) => VectorDataFrameFunctions.ComputeTotal(price, discount, tax));

            _lineitem.Filter(Col("l_shipdate") <= "1998-09-02")
            .GroupBy(Col("l_returnflag"), Col("l_linestatus"))
            .Agg(Sum(Col("l_quantity")).As("sum_qty"), Sum(Col("l_extendedprice")).As("sum_base_price"),
                 Sum(discPrice(Col("l_extendedprice"), Col("l_discount"))).As("sum_disc_price"),
                 Sum(total(Col("l_extendedprice"), Col("l_discount"), Col("l_tax"))).As("sum_charge"),
                 Avg(Col("l_quantity")).As("avg_qty"),
                 Avg(Col("l_extendedprice")).As("avg_price"),
                 Avg(Col("l_discount")).As("avg_disc"),
                 Count(Col("l_quantity")).As("count_order")
                 )
            .Sort(Col("l_returnflag"), Col("l_linestatus"))
            .Show();
        }