}     // _Ordered_Comparer_impl_ class

        public static DataSet getOrdered(DataSet dataSet, params OrderByItem[] orderByItems)
        {
            List <Row>      values          = new List <Row>();
            IComparer <Row> valueComparator = null;

            if (orderByItems != null && orderByItems.Length != 0)
            {
                int[] sortIndexes = new int[orderByItems.Length];
                for (int i = 0; i < orderByItems.Length; i++)
                {
                    OrderByItem item     = orderByItems[i];
                    int         index_of = dataSet.indexOf(item.getSelectItem());
                    sortIndexes[i] = index_of;
                }

                values = readDataSetFull(dataSet);
                if (values.IsEmpty())
                {
                    return(new EmptyDataSet(dataSet.getSelectItems()));
                }

                valueComparator = ObjectComparator.getComparator();

                // create a comparator for doing the actual sorting/ordering
                IComparer <Row> comparator = new _Ordered_Comparer_impl_(valueComparator, orderByItems);
            }
            values.Sort(valueComparator);     // Collections.sort(data, comparator);

            dataSet = new InMemoryDataSet(values);
            return(dataSet);
        }     // getOrdered()
 public static IDataSet <T, double> MapTwoFeaturesDataSetToDegree <T>(IDataSet <T, double> dataSet, int degree)
     where T : IEquatable <T>
 {
     return(InMemoryDataSet.Create((from data in dataSet.GetData()
                                    let x1 = data.Attributes[0]
                                             let x2 = data.Attributes[1]
                                                      select TrainingSample.Create(data.Category, MapTwoFeaturesToDegrees(x1, x2, degree), data.Count))
                                   .ToArray()));
 }
        private static IDataSet <bool, double> GetDataSet(string resourceName)
        {
            var dataLines = GetResourceLines(resourceName);

            var parsedDataLines = ParseDoublesLines(dataLines, new[] { ',' });

            var dataSet1 = InMemoryDataSet.Create(parsedDataLines
                                                  .Select(pl => TrainingSample.Create(pl[2] != 0, new[] { pl[0], pl[1] }))
                                                  .ToArray());

            return(dataSet1);
        }
Example #4
0
        public void CorrectlyClassifiesTestDataSetFromExample13Dot1()
        {
            var trainingData = InMemoryDataSet.Create(new[]
            {
                TrainingSample.Create("China", new[] { "Chinese", "Beijing", "Chinese" }),
                TrainingSample.Create("China", new[] { "Chinese", "Chinese", "Shanghai" }),
                TrainingSample.Create("China", new[] { "Chinese", "Makao" }),
                TrainingSample.Create("Not China", new[] { "Tokio", "Japan", "Chinese" }),
            });

            var classifier = MultinomialNaiveBayesClassifier.Create(trainingData);

            Assert.AreEqual("China", classifier.Classify(new[] { "Chinese", "Chinese", "Chinese", "Tokio", "Japan" }));
            Assert.AreEqual("Not China", classifier.Classify(new[] { "Tokio" }));
            Assert.AreEqual("China", classifier.Classify(new[] { "Chinese", "Tokio" }));
            Assert.AreEqual("China", classifier.Classify(new[] { "Unknown", "Chinese", "Tokio" }));
            Assert.AreEqual("Not China", classifier.Classify(new[] { "Chinese", "Tokio", "Japan" }));
        }
        } // getInformationSchema()

        public virtual DataSet materializeInformationSchemaTable(Table table, List <SelectItem> selectItems)
        {
            String tableName = table.getName();

            SelectItem[]        columnSelectItems = MetaModelHelper.createSelectItems(table.getColumns());
            SimpleDataSetHeader header            = new SimpleDataSetHeader(columnSelectItems);

            Table[]    tables = getDefaultSchema().getTables(false);
            List <Row> data   = new List <Row>();

            if ("tables".Equals(tableName))
            {
                // "tables" columns: name, type, num_columns, remarks
                foreach (Table t in tables)
                {
                    String typeString = null;
                    if (t.GetType() != null)
                    {
                        typeString = t.getType().ToString();
                    }
                    data.Add(new DefaultRow(header,
                                            new Object[] { t.getName(), typeString, t.getColumnCount(), t.getRemarks() }));
                }
            }
            else if ("columns".Equals(tableName))
            {
                // "columns" columns: name, type, native_type, size, nullable,
                // indexed, table, remarks
                foreach (Table t in tables)
                {
                    foreach (Column c in t.getColumns())
                    {
                        String typeString = null;
                        if (t.GetType() != null)
                        {
                            typeString = c.getType().ToString();
                        }
                        data.Add(new DefaultRow(header, new Object[] { c.getName(), typeString, c.getNativeType(),
                                                                       c.getColumnSize(), c.isNullable(), c.isIndexed(), t.getName(), c.getRemarks() }));
                    }
                }
            }
            else if ("relationships".Equals(tableName))
            {
                // "relationships" columns: primary_table, primary_column,
                // foreign_table, foreign_column
                foreach (Relationship r in getDefaultSchema().getRelationships())
                {
                    Column[] primaryColumns = r.getPrimaryColumns();
                    Column[] foreignColumns = r.getForeignColumns();
                    Table    pTable         = r.getPrimaryTable();
                    Table    fTable         = r.getForeignTable();
                    for (int i = 0; i < primaryColumns.Length; i++)
                    {
                        Column pColumn = primaryColumns[i];
                        Column fColumn = foreignColumns[i];
                        data.Add(new DefaultRow(header,
                                                new Object[] { pTable.getName(), pColumn.getName(), fTable.getName(), fColumn.getName() }));
                    }
                }
            }
            else
            {
                throw new ArgumentException("Cannot materialize non information_schema table: " + table);
            }

            DataSet dataSet;

            if (data.IsEmpty())
            {
                dataSet = new EmptyDataSet(selectItems);
            }
            else
            {
                dataSet = new InMemoryDataSet(header, data);
            }

            // Handle column subset
            DataSet selectionDataSet = MetaModelHelper.getSelection(selectItems, dataSet);

            dataSet = selectionDataSet;

            return(dataSet);
        } // materializeInformationSchemaTable()
        }                                                           // getGrouped()

        public static DataSet getGrouped(List <SelectItem> selectItems, DataSet dataSet, GroupByItem[] groupByItems)
        {
            DataSet result = dataSet;

            if (groupByItems != null && groupByItems.Length > 0)
            {
                Dictionary <Row, Dictionary <SelectItem, List <Object> > > uniqueRows = new Dictionary <Row, Dictionary <SelectItem, List <Object> > >();

                SelectItem[] groupBySelects = new SelectItem[groupByItems.Length];
                for (int i = 0; i < groupBySelects.Length; i++)
                {
                    groupBySelects[i] = groupByItems[i].getSelectItem();
                }
                DataSetHeader groupByHeader = new CachingDataSetHeader(groupBySelects);

                // Creates a list of SelectItems that have aggregate functions
                List <SelectItem> functionItems = getAggregateFunctionSelectItems(selectItems);

                // Loop through the dataset and identify groups
                while (dataSet.next())
                {
                    Row row = dataSet.getRow();

                    // Subselect a row prototype with only the unique values that
                    // define the group
                    Row uniqueRow = row.getSubSelection(groupByHeader);

                    // function input is the values used for calculating aggregate
                    // functions in the group
                    Dictionary <SelectItem, List <Object> > functionInput;
                    if (!uniqueRows.ContainsKey(uniqueRow))
                    {
                        // If this group already exist, use an existing function
                        // input
                        functionInput = new Dictionary <SelectItem, List <Object> >();
                        foreach (SelectItem item in functionItems)
                        {
                            functionInput.Add(item, new List <Object>());
                        }
                        uniqueRows.Add(uniqueRow, functionInput);
                    }
                    else
                    {
                        // If this is a new group, create a new function input
                        functionInput = uniqueRows[uniqueRow];
                    }

                    // Loop through aggregate functions to check for validity
                    foreach (SelectItem item in functionItems)
                    {
                        List <Object> objects = functionInput[item];
                        Column        column  = item.getColumn();
                        if (column != null)
                        {
                            Object value = row.getValue(new SelectItem(column));
                            objects.Add(value);
                        }
                        else if (SelectItem.isCountAllItem(item))
                        {
                            // Just use the empty string, since COUNT(*) don't
                            // evaluate values (but null values should be prevented)
                            objects.Add("");
                        }
                        else
                        {
                            throw new ArgumentException("Expression function not supported: " + item);
                        }
                    }
                }

                dataSet.close();
                List <Row>    resultData   = new List <Row>();
                DataSetHeader resultHeader = new CachingDataSetHeader(selectItems);

                int count = uniqueRows.Count;
                // Loop through the groups to generate aggregates
                foreach (KeyValuePair <Row, Dictionary <SelectItem, List <Object> > > key_value in uniqueRows)
                {
                    Row row = key_value.Key;
                    Dictionary <SelectItem, List <Object> > functionInput = key_value.Value;
                    Object[] resultRow = new Object[selectItems.Count];
                    // Loop through select items to generate a row
                    int i = 0;
                    foreach (SelectItem item in selectItems)
                    {
                        int uniqueRowIndex = row.indexOf(item);
                        if (uniqueRowIndex != -1)
                        {
                            // If there's already a value for the select item in the
                            // row, keep it (it's one of the grouped by columns)
                            resultRow[i] = row.getValue(uniqueRowIndex);
                        }
                        else
                        {
                            // Use the function input to calculate the aggregate
                            // value
                            List <Object> objects = functionInput[item];
                            if (objects != null)
                            {
                                Object functionResult = item.getAggregateFunction().evaluate(objects.ToArray());
                                resultRow[i] = functionResult;
                            }
                            else
                            {
                                if (item.getAggregateFunction() != null)
                                {
                                    logger.error("No function input found for SelectItem: {}", item);
                                }
                            }
                        }
                        i++;
                    }
                    resultData.Add(new DefaultRow(resultHeader, resultRow, null));
                }

                if (resultData.IsEmpty())
                {
                    result = new EmptyDataSet(selectItems);
                }
                else
                {
                    result = new InMemoryDataSet(resultHeader, resultData);
                }
            }
            result = getSelection(selectItems, result);
            return(result);
        }     // getGrouped()