public SmallIndexSet(DataSet Data, Key K) : base(BuildSchema(K, Data.Columns)) { // Open readers/writers // RecordWriter rw = this.OpenWriter(); RecordReader rr = Data.OpenReader(); // Main loop // while (!rr.EndOfData) { // Need to pull the id and position here because read next will advance the stream int pos = rr.Position; long id = rr.SetID; Record q = rr.ReadNext(); Record r = Record.Stitch(new Cell(id), new Cell(pos), new Cell(q.GetHashCode(K))); r = Record.Join(r, Record.Split(q, K)); rw.Insert(r); } rw.Close(); // Sort table // Key sort = new Key(2); for (int i = 0; i < K.Count; i++) sort.Add(3 + i); this.Sort(sort); }
public BigIndexSet(string SinkDir, RecordReader Stream, Key K) : base(SinkDir, TableHeader.TempName(), BuildSchema(K, Stream.SourceSchema)) { // Open readers/writers // RecordWriter rw = this.OpenWriter(); // Main loop // while (!Stream.EndOfData) { // Need to pull the id and position here because read next will advance the stream int pos = Stream.Position; long id = Stream.SetID; Record q = Stream.ReadNext(); Record r = Record.Stitch(new Cell(id), new Cell(pos), new Cell(q.GetHashCode(K))); r = Record.Join(r, Record.Split(q, K)); rw.Insert(r); } rw.Close(); // Sort table // Key sort = new Key(2); for (int i = 0; i < K.Count; i++) sort.Add(3 + i); this.Sort(sort); }
private static void Update(Record Data, Key K, FNodeSet Fields) { int idx = 0; for (int i = 0; i < K.Count; i++) { idx = K[i]; Data[idx] = Fields[i].Evaluate(); } }
public FNodeSet(Schema Columns, Key Fields) : this() { this.AllowNameDotName = false; for (int i = 0; i < Fields.Count; i++) { this.Add(Columns.ColumnName(Fields[i]), new FNodeFieldRef(null, Fields[i], Columns.ColumnAffinity(Fields[i]), Columns.ColumnSize(Fields[i]), null)); } }
public UpdatePlan(DataSet Data, Key K, FNodeSet Fields, Predicate BaseDataFilter) : base() { this._data = Data; this._keys = K; this._values = Fields; this._where = BaseDataFilter; this.Name = "UPDATE"; }
// Text serialization // public static string ToString(RecordSet Data, Key K, char ColumnDelim, char RowDelim) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < Data.Count; i++) { sb.Append(Data[i].ToString(K, ColumnDelim)); if (i != Data.Count - 1) sb.Append(RowDelim); } return sb.ToString(); }
// Constructor // public IndexSet(RecordSet Data, Key K) : base(new Schema(SCHEMA_SQL)) { // Main loop // for (int i = 0; i < Data.Count; i++) { Record r = Record.Stitch(new Cell(Data[i].GetHashCode(K)), new Cell(i)); this.Add(r); } // Sort table // this.Sort(new Key(0)); }
// Constructor // public RecordSet(Schema NewColumns, Header NewHeader, List<Record> NewCache, Key NewOrderBy) { this._Columns = NewColumns; this._Cache = NewCache; this._OrderBy = NewOrderBy; this._Head = NewHeader; if (NewHeader != null) { this._MaxRecordCount = NewHeader.MaxRecordCount; this._GhostName = NewHeader.Name; } else { this._MaxRecordCount = EstimateMaxRecords(NewColumns); this._GhostName = "CHUNK"; } }
internal static MergeAlgorithm Optimize(RecordSet T1, Key J1, RecordSet T2, Key J2) { double n1 = (double)T1.Count; double n2 = (double)T2.Count; double p = Math.Min(n1, n2) / Math.Max(n1, n2); // Test for sort merge // if (Key.EqualsStrict(T1.SortBy, J1) && Key.EqualsStrict(T2.SortBy, J2)) return MergeAlgorithm.SortMerge; // Test for nested loop // if (p <= NESTED_LOOP_RATIO) return MergeAlgorithm.NestedLoop; // Otherwise // return MergeAlgorithm.HashTable; }
public static long Update(DataSet Data, Key K, FNodeSet Fields, Predicate BaseDataFilter) { // Check that the field indicies and the maps have the same length // if (K.Count != Fields.Count) throw new Exception(string.Format("Field collection passed [{0}] has fewer elements than the map collection passed [{0}]", K.Count, Fields.Count)); // Create the total append count // long CountOf = 0; // Loop through each extent // foreach (RecordSet rs in Data.Extents) { // Open a stream // RecordReader rr = new RecordReader(rs, BaseDataFilter); // Create a register // Register mem = new StreamRegister(rr); // Assign the register to the fields // Fields.AssignRegister(mem); // Update the data // while (!rr.EndOfData) { Update(rr.Read(), K, Fields); CountOf++; rr.Advance(); } // if (rs.IsAttached) BinarySerializer.Flush(rs); } // No need to flush the data set // return CountOf; }
private static Schema BuildSchema(Key K, Schema S) { Schema t = new Schema("set_id int, row_id int, hash int"); return Schema.Join(t, Schema.Split(S, K)); }
/// <summary> /// Parses a string into a key /// </summary> /// <param name="Text">The text list of columns</param> /// <returns>A key</returns> public Key KeyParse(string Text) { Key k = new Key(); if (Text == "*") return Key.Build(this.Count); string[] t = Text.Split(','); foreach (string s in t) { // Parse out the 'NAME KEY_AFFINITY' logic // string[] u = s.Trim().Split(' '); string v = u[0]; // column name string w = "A"; // affinity (Optional) if (u.Length > 1) w = u[1]; // get index and affinity int j = this.ColumnIndex(v); KeyAffinity a = Key.ParseAffinity(w); // Add values // if (j != -1) k.Add(j, a); else if (v.ToList().TrueForAll((c) => { return "1234567890".Contains(c); })) k.Add(int.Parse(v), a); } return k; }
/// <summary> /// Parses a string into a key /// </summary> /// <param name="Columns">A variable list of columns</param> /// <returns>A key</returns> public Key KeyParse(string[] Columns) { Key k = new Key(); foreach (string s in Columns) { k.Add(this.ColumnIndex(s)); } return k; }
public NN_Layer(NodeReduction Connector, ScalarFunction Activator, Key Fields) : this() { // Check if rendered // if (this._IsRendered) throw new Exception("Layer already rendered"); // Add the references // for (int i = 0; i < Fields.Count; i++) this._Nodes.Add(new NeuralNodePrediction("Y" + i.ToString(), Activator, Connector, Fields[i])); // Tag as rendered // this._IsRendered = true; }
public NN_Layer(bool Bias, Key Fields) : this() { // Check if rendered // if (this._IsRendered) throw new Exception("Layer already rendered"); // Add the bias node // if (Bias) this._Nodes.Add(new NeuralNodeStatic("DATA_BIAS", 1)); // Add the references // for (int i = 0; i < Fields.Count; i++) this._Nodes.Add(new NeuralNodeReference("X" + i.ToString(), Fields[i])); // Tag as rendered // this._IsRendered = true; }
private static Table ReadTable(BinaryReader Reader) { /* * Read: * Header * Schema * SortKey * Record Collection */ // Read header // TableHeader h = new TableHeader(BinarySerializer.ReadRecord(Reader, 10)); // Read schema // Schema s = new Schema(BinarySerializer.ReadRecords(Reader, h.ColumnCount, 4)); // Read key // Key k = new Key(BinarySerializer.ReadRecord(Reader, (int)h.KeyCount)); // Read record cache // List<Record> l = BinarySerializer.ReadRecords(Reader, h.Size, 2); // Return recordset // return new Table(h, s, l, k); }
/// <summary> /// Returns a string of column affinities /// </summary> /// <param name="K">A key to filter on</param> /// <param name="Delim">A character to deliminate the fields</param> /// <returns>A string</returns> public string ToAffinityString(Key K, char Delim) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < K.Count; i++) { sb.Append(this.ColumnAffinity(K[i]).ToString()); if (i != K.Count - 1) sb.Append(Delim); } return sb.ToString(); }
private static Table ReadTableSafe2(byte[] Mem, int Location) { /* * Read: * Header * Schema * SortKey * Record Collection */ // Read header // Record rh; Location = ReadRecordSafe2(Mem, Location, 11, out rh); TableHeader h = new TableHeader(rh); // Read schema // List<Record> s_cache = new List<Record>(); Location = BinarySerializer.ReadRecordsSafe2(Mem, Location, h.ColumnCount, 4, s_cache); Schema s = new Schema(s_cache); // Read key // Record rk; Location = ReadRecordSafe2(Mem, Location, (int)h.KeyCount, out rk); Key k = new Key(rk); // Read record cache // List<Record> d_cache = new List<Record>(); Location = BinarySerializer.ReadRecordsSafe2(Mem, Location, (int)h.Size, 2, d_cache); // Return recordset // return new Table(h, s, d_cache, k); }
/// <summary> /// Returns a string representation of the schema: NAME TYPE(.SIZE)? NULLABLE /// </summary> /// <param name="K">A key to filter on</param> /// <param name="Delim">A character to deliminate the fields</param> /// <returns>A string</returns> public string ToString(Key K, char Delim) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < K.Count; i++) { sb.Append(this.ColumnName(K[i]) + " " + this.ColumnAffinity(K[i]).ToString()); if (this.ColumnNull(K[i]) == true) { sb.Append(" NULL"); } else { sb.Append(" NOT NULL"); } if (i != K.Count - 1) sb.Append(Delim); } return sb.ToString(); }
public static Matrix ToMatrix(RecordSet Data, Key K) { Matrix m = new Matrix(Data.Count, K.Count); for (int i = 0; i < Data.Count; i++) { for (int j = 0; j < K.Count; j++) { int k = K[j]; m[i, j] = Data[i][k].valueDOUBLE; } } return m; }
// Methods // public abstract void Sort(Key K);
/// <summary> /// Creates a schema from another schema /// </summary> /// <param name="S">The starting point schema</param> /// <param name="K">A key representing the columns to keep</param> /// <returns>A schema</returns> public static Schema Split(Schema S, Key K) { Schema s = new Schema(); for (int i = 0; i < K.Count; i++) { s.Add(S.ColumnName(K[i]), S.ColumnAffinity(K[i]), S.ColumnNull(K[i]), S.ColumnSize(K[i])); } return s; }
/// <summary> /// Returns a string of column affinities /// </summary> /// <returns>A string</returns> public string ToAffinityString(Key K) { return this.ToAffinityString(K, SCHEMA_DELIM); }
private static RecordSet ReadRecordSet(BinaryReader Reader) { /* * Read: * Header * Schema * SortKey * Record Collection */ // Read header // Header h = new Header(BinarySerializer.ReadRecord(Reader, 10)); // Read schema // Schema s = new Schema(BinarySerializer.ReadRecords(Reader, h.ColumnCount, 4)); // Read key // Key k = new Key(BinarySerializer.ReadRecord(Reader, (int)h.KeyCount)); // Read record cache // List<Record> l = BinarySerializer.ReadRecords(Reader, h.RecordCount, s.Count); // Return recordset // return new RecordSet(s, h, l, k); }
private bool ItterateOnce() { // Create the cluster mapping FNode; this node does the nearest neighbor test // FNodeSet keys = new FNodeSet(); FNode n = new FNodeResult(null, new RowClusterCellFunction(this._rule, this._means)); foreach (FNode t in this._fields.Nodes) { n.AddChildNode(t.CloneOfMe()); } keys.Add("CLUSTER_ID", n); // Create the aggregate// AggregateSet set = new AggregateSet(); // Add a counter to the aggregate // set.Add(new AggregateSum(FNodeFactory.Value(1D)), "CLUSTER_ELEMENT_COUNT"); // load the aggregate with the mean aggregates // for (int i = 0; i < this._fields.Count; i++) { set.Add(new AggregateAverage(this._fields[i].CloneOfMe()), this._fields.Alias(i)); } // Load the aggregate with the variance aggregates // for (int i = 0; i < this._fields.Count; i++) { set.Add(new AggregateVarianceP(this._fields[i].CloneOfMe()), "VAR_" + this._fields.Alias(i)); } // Run the aggregate; this is basically a horse aggregate step with the cluster node mapping as the key, and averaging as the value RecordSet rs = AggregatePlan.Render(this._data, this._where, keys, set); // Need to chop up the recordset we just created // Key mean_keeper = Key.Build(this._means.Columns.Count); RecordSet means = FastReadPlan.Render(rs, Predicate.TrueForAll, mean_keeper, long.MaxValue); Key stat_keeper = new Key(0,1); // keep the id and the count for (int i = mean_keeper.Count; i < rs.Columns.Count; i++) { stat_keeper.Add(i); } this._stats = FastReadPlan.Render(rs, Predicate.TrueForAll, stat_keeper, long.MaxValue); // Check for cluster misses; cluster misses occur when no node maps to a cluster correctly // if (means.Count != this._means.Count) { this.HandleNullCluster(means); } // Compare the changes between itterations double change = this.CompareChanges(this._means, means); // Set the means to the newly calculated means // this._means = means; // Return a boolean indicating if we failed or not return change < this._exit_condition; }
/// <summary> /// Returns a string of column names /// </summary> /// <param name="K">A key to filter on</param> /// <returns>A string</returns> public string ToNameString(Key K) { return this.ToNameString(K, SCHEMA_DELIM); }
public abstract void SortDistinct(Key K);
public static string ToString(RecordSet Data, Key K, char ColumnDelim) { return ToString(Data, K, ColumnDelim, '\n'); }
public static Matrix ToMatrixWithIntercept(RecordSet Data, Key K) { Matrix m = new Matrix(Data.Count, K.Count + 1); for (int i = 0; i < Data.Count; i++) { m[i, 0] = 1; for (int j = 0; j < K.Count; j++) { int k = K[j]; m[i, j + 1] = Data[i][k].valueDOUBLE; } } return m; }
public static string ToString(RecordSet Data, Key K) { return ToString(Data, K, ','); }