コード例 #1
0
ファイル: RowCluster.cs プロジェクト: pwdlugosz/Horse
        public override RecordSet Initialize(DataSet Data, Predicate Where, FNodeSet Fields, int Clusters)
        {

            // Get the min of each field //
            AggregateSet set1 = new AggregateSet();
            for (int i = 0; i < Fields.Count; i++)
            {
                set1.Add(new AggregateMin(Fields[i].CloneOfMe()), Fields.Alias(i));
            }

            // Get the max of each field //
            AggregateSet set2 = new AggregateSet();
            for (int i = 0; i < Fields.Count; i++)
            {
                set2.Add(new AggregateMax(Fields[i].CloneOfMe()), Fields.Alias(i));
            }

            // Render the min and max //
            RecordSet rs1 = AggregatePlan.Render(Data, Where, new FNodeSet(), set1);
            RecordSet rs2 = AggregatePlan.Render(Data, Where, new FNodeSet(), set2);

            // Create the output means table //
            RecordSet rs = new RecordSet(Schema.Join(new Schema("key int, count double"), rs1.Columns));

            // Fill in the gaps //
            for (int i = 0; i < Clusters; i++)
            {

                if (i == 0)
                {
                    RecordBuilder rb = new RecordBuilder();
                    rb.Add(0);
                    rb.Add(0D);
                    rb.Add(rs1[0]);
                    rs.Add(rb.ToRecord());
                }
                else if (i == Clusters - 1)
                {
                    RecordBuilder rb = new RecordBuilder();
                    rb.Add(Clusters - 1);
                    rb.Add(0D);
                    rb.Add(rs2[0]);
                    rs.Add(rb.ToRecord());
                }
                else
                {

                    RecordBuilder rb = new RecordBuilder();
                    rb.Add(i);
                    rb.Add(0D);
                    for (int j = 0; j < rs1.Columns.Count; j++)
                    {
                        double clus = (double)Clusters;
                        double jay = (double)j;
                        rb.Add(rs1[0][j].DOUBLE + (rs2[0][j].DOUBLE - rs1[0][j].DOUBLE) / clus * jay);
                    }
                    rs.Add(rb.ToRecord());

                }

            }

            return rs;

        }
コード例 #2
0
ファイル: RowCluster.cs プロジェクト: pwdlugosz/Horse
        private bool ItterateOnce()
        {

            // Create the cluster mapping FNode; this node does the nearest neighbor test //
            FNodeSet keys = new FNodeSet();
            FNode n = new FNodeResult(null, new RowClusterCellFunction(this._rule, this._means));
            foreach (FNode t in this._fields.Nodes)
            {
                n.AddChildNode(t.CloneOfMe());
            }
            keys.Add("CLUSTER_ID", n);

            // Create the aggregate//
            AggregateSet set = new AggregateSet();

            // Add a counter to the aggregate //
            set.Add(new AggregateSum(FNodeFactory.Value(1D)), "CLUSTER_ELEMENT_COUNT");

            // load the aggregate with the mean aggregates //
            for (int i = 0; i < this._fields.Count; i++)
            {
                set.Add(new AggregateAverage(this._fields[i].CloneOfMe()), this._fields.Alias(i));
            }

            // Load the aggregate with the variance aggregates //
            for (int i = 0; i < this._fields.Count; i++)
            {
                set.Add(new AggregateVarianceP(this._fields[i].CloneOfMe()), "VAR_" + this._fields.Alias(i));
            }

            // Run the aggregate; this is basically a horse aggregate step with the cluster node mapping as the key, and averaging as the value
            RecordSet rs = AggregatePlan.Render(this._data, this._where, keys, set);

            // Need to chop up the recordset we just created //
            Key mean_keeper = Key.Build(this._means.Columns.Count);
            RecordSet means = FastReadPlan.Render(rs, Predicate.TrueForAll, mean_keeper, long.MaxValue);
            Key stat_keeper = new Key(0,1); // keep the id and the count
            for (int i = mean_keeper.Count; i < rs.Columns.Count; i++)
            {
                stat_keeper.Add(i);
            }
            this._stats = FastReadPlan.Render(rs, Predicate.TrueForAll, stat_keeper, long.MaxValue);
            
            // Check for cluster misses; cluster misses occur when no node maps to a cluster correctly //
            if (means.Count != this._means.Count)
            {
                this.HandleNullCluster(means);
            }

            // Compare the changes between itterations
            double change = this.CompareChanges(this._means, means);
            
            // Set the means to the newly calculated means //
            this._means = means;
            
            // Return a boolean indicating if we failed or not
            return change < this._exit_condition;

        }
コード例 #3
0
ファイル: RowCluster.cs プロジェクト: pwdlugosz/Horse
        public override RecordSet Initialize(DataSet Data, Predicate Where, FNodeSet Fields,  int Clusters)
        {

            AggregateSet set = new AggregateSet();
            set.Add(new AggregateSum(FNodeFactory.Value(1D)), "CLUSTER_ELEMENT_COUNT");
            for (int i = 0; i < Fields.Count; i++)
            {
                set.Add(new AggregateAverage(Fields[i].CloneOfMe()), Fields.Alias(i));
            }

            FNode rnd = new FNodeResult(null, new CellRandomInt());
            rnd.AddChildNode(new FNodeValue(rnd, new Cell(this.Seed)));
            rnd.AddChildNode(new FNodeValue(rnd, new Cell(0)));
            rnd.AddChildNode(new FNodeValue(rnd, new Cell(Clusters)));
            FNodeSet keys = new FNodeSet();
            keys.Add(rnd);

            RecordSet rs = AggregatePlan.Render(Data, Where, keys, set);
            return rs;

        }
コード例 #4
0
ファイル: AggregateSet.cs プロジェクト: pwdlugosz/Horse
        public AggregateSet CloneOfMe()
        {

            AggregateSet agg = new AggregateSet();
            for (int i = 0; i < this.Count; i++)
            {
                agg.Add(this._cache[i].CloneOfMe(), this._alias[i]);
            }
            return agg;

        }