public SmallIndexSet(DataSet Data, Key K) : base(BuildSchema(K, Data.Columns)) { // Open readers/writers // RecordWriter rw = this.OpenWriter(); RecordReader rr = Data.OpenReader(); // Main loop // while (!rr.EndOfData) { // Need to pull the id and position here because read next will advance the stream int pos = rr.Position; long id = rr.SetID; Record q = rr.ReadNext(); Record r = Record.Stitch(new Cell(id), new Cell(pos), new Cell(q.GetHashCode(K))); r = Record.Join(r, Record.Split(q, K)); rw.Insert(r); } rw.Close(); // Sort table // Key sort = new Key(2); for (int i = 0; i < K.Count; i++) sort.Add(3 + i); this.Sort(sort); }
public BigIndexSet(string SinkDir, RecordReader Stream, Key K) : base(SinkDir, TableHeader.TempName(), BuildSchema(K, Stream.SourceSchema)) { // Open readers/writers // RecordWriter rw = this.OpenWriter(); // Main loop // while (!Stream.EndOfData) { // Need to pull the id and position here because read next will advance the stream int pos = Stream.Position; long id = Stream.SetID; Record q = Stream.ReadNext(); Record r = Record.Stitch(new Cell(id), new Cell(pos), new Cell(q.GetHashCode(K))); r = Record.Join(r, Record.Split(q, K)); rw.Insert(r); } rw.Close(); // Sort table // Key sort = new Key(2); for (int i = 0; i < K.Count; i++) sort.Add(3 + i); this.Sort(sort); }
private bool ItterateOnce() { // Create the cluster mapping FNode; this node does the nearest neighbor test // FNodeSet keys = new FNodeSet(); FNode n = new FNodeResult(null, new RowClusterCellFunction(this._rule, this._means)); foreach (FNode t in this._fields.Nodes) { n.AddChildNode(t.CloneOfMe()); } keys.Add("CLUSTER_ID", n); // Create the aggregate// AggregateSet set = new AggregateSet(); // Add a counter to the aggregate // set.Add(new AggregateSum(FNodeFactory.Value(1D)), "CLUSTER_ELEMENT_COUNT"); // load the aggregate with the mean aggregates // for (int i = 0; i < this._fields.Count; i++) { set.Add(new AggregateAverage(this._fields[i].CloneOfMe()), this._fields.Alias(i)); } // Load the aggregate with the variance aggregates // for (int i = 0; i < this._fields.Count; i++) { set.Add(new AggregateVarianceP(this._fields[i].CloneOfMe()), "VAR_" + this._fields.Alias(i)); } // Run the aggregate; this is basically a horse aggregate step with the cluster node mapping as the key, and averaging as the value RecordSet rs = AggregatePlan.Render(this._data, this._where, keys, set); // Need to chop up the recordset we just created // Key mean_keeper = Key.Build(this._means.Columns.Count); RecordSet means = FastReadPlan.Render(rs, Predicate.TrueForAll, mean_keeper, long.MaxValue); Key stat_keeper = new Key(0,1); // keep the id and the count for (int i = mean_keeper.Count; i < rs.Columns.Count; i++) { stat_keeper.Add(i); } this._stats = FastReadPlan.Render(rs, Predicate.TrueForAll, stat_keeper, long.MaxValue); // Check for cluster misses; cluster misses occur when no node maps to a cluster correctly // if (means.Count != this._means.Count) { this.HandleNullCluster(means); } // Compare the changes between itterations double change = this.CompareChanges(this._means, means); // Set the means to the newly calculated means // this._means = means; // Return a boolean indicating if we failed or not return change < this._exit_condition; }
/// <summary> /// Parses a string into a key /// </summary> /// <param name="Columns">A variable list of columns</param> /// <returns>A key</returns> public Key KeyParse(string[] Columns) { Key k = new Key(); foreach (string s in Columns) { k.Add(this.ColumnIndex(s)); } return k; }
/// <summary> /// Parses a string into a key /// </summary> /// <param name="Text">The text list of columns</param> /// <returns>A key</returns> public Key KeyParse(string Text) { Key k = new Key(); if (Text == "*") return Key.Build(this.Count); string[] t = Text.Split(','); foreach (string s in t) { // Parse out the 'NAME KEY_AFFINITY' logic // string[] u = s.Trim().Split(' '); string v = u[0]; // column name string w = "A"; // affinity (Optional) if (u.Length > 1) w = u[1]; // get index and affinity int j = this.ColumnIndex(v); KeyAffinity a = Key.ParseAffinity(w); // Add values // if (j != -1) k.Add(j, a); else if (v.ToList().TrueForAll((c) => { return "1234567890".Contains(c); })) k.Add(int.Parse(v), a); } return k; }
// Update // public static UpdatePlan RenderUpdatePlan(Workspace Home, HScriptParser.Crudam_updateContext context) { // Get the data source // DataSet data = VisitorHelper.GetData(Home, context.full_table_name()); // Create expression visitor // ExpressionVisitor exp_vis = new ExpressionVisitor(null, Home, data.Name, data.Columns, null); // Get where // Predicate where = VisitorHelper.GetWhere(exp_vis, context.where_clause()); // Create the key and fnodeset // Key keys = new Key(); FNodeSet expressions = new FNodeSet(); foreach (HScriptParser.Update_unitContext ctx in context.update_unit()) { keys.Add(data.Columns.ColumnIndex(ctx.IDENTIFIER().GetText())); expressions.Add(exp_vis.ToNode(ctx.expression())); } return new UpdatePlan(data, keys, expressions, where); }
// Merge // public static MergePlan RenderMergePlan(Workspace Home, HScriptParser.Crudam_mergeContext context) { // Get the data sources // DataSet data1 = VisitorHelper.GetData(Home, context.merge_source()[0].full_table_name()); DataSet data2 = VisitorHelper.GetData(Home, context.merge_source()[1].full_table_name()); // Get the aliases // string alias1 = (context.merge_source()[0].IDENTIFIER() ?? context.merge_source()[0].full_table_name().table_name().IDENTIFIER()).GetText(); string alias2 = (context.merge_source()[1].IDENTIFIER() ?? context.merge_source()[1].full_table_name().table_name().IDENTIFIER()).GetText(); // Build the registers; the join functions only use static registers // StaticRegister mem1 = new StaticRegister(null); StaticRegister mem2 = new StaticRegister(null); // Create our expression builder // ExpressionVisitor exp_vis = new ExpressionVisitor(null, Home); exp_vis.AddSchema(alias1, data1.Columns, mem1); exp_vis.AddSchema(alias2, data2.Columns, mem2); // Get the equality keys // Key eq1 = new Key(); Key eq2 = new Key(); foreach (HScriptParser.Merge_equi_predicateContext ctx in context.merge_equi_predicate()) { string a1 = ctx.table_variable()[0].IDENTIFIER()[0].GetText(); string a2 = ctx.table_variable()[1].IDENTIFIER()[0].GetText(); string c1 = ctx.table_variable()[0].IDENTIFIER()[1].GetText(); string c2 = ctx.table_variable()[1].IDENTIFIER()[1].GetText(); int idx1 = -1; int idx2 = -1; if (a1 == alias1 && a2 == alias2) { // Look up indicides // idx1 = data1.Columns.ColumnIndex(c1); idx2 = data2.Columns.ColumnIndex(c2); // Check for invalid keys // if (idx1 == -1) throw new Exception(string.Format("Column '{0}' does not exist in '{1}'", c1, alias1)); if (idx2 == -1) throw new Exception(string.Format("Column '{0}' does not exist in '{1}'", c2, alias2)); } else if (a1 == alias2 && a2 == alias1) { // Look up indicides // idx1 = data1.Columns.ColumnIndex(c2); idx2 = data2.Columns.ColumnIndex(c1); // Check for invalid keys // if (idx1 == -1) throw new Exception(string.Format("Column '{0}' does not exist in '{1}'", c2, idx1)); if (idx2 == -1) throw new Exception(string.Format("Column '{0}' does not exist in '{1}'", c1, idx2)); } else throw new Exception("Aliases passed are invalid"); // add the keys // eq1.Add(idx1); eq2.Add(idx2); } // Get the predicate // Predicate where = VisitorHelper.GetWhere(exp_vis, context.where_clause()); // Get the list of expressions // FNodeSet nodes = VisitorHelper.GetReturnStatement(exp_vis, context.return_action().expression_or_wildcard_set()); // Get the output cursor // RecordWriter out_data = VisitorHelper.GetWriter(Home, nodes.Columns, context.return_action()); // Get the join method // MergeMethod method = VisitorHelper.GetMergeMethod(context.merge_type()); // Find the best algorithm // MergeAlgorithm alg = MergeAlgorithm.SortMerge; if (context.merge_algorithm() != null) { string suggest_alg = exp_vis.ToNode(context.merge_algorithm().expression()).Evaluate().valueSTRING.ToUpper(); if (suggest_alg == "NL") alg = MergeAlgorithm.NestedLoop; else if (suggest_alg == "SM") alg = MergeAlgorithm.SortMerge; else if (suggest_alg == "HT") alg = MergeAlgorithm.HashTable; } if (eq1.Count == 0) alg = MergeAlgorithm.NestedLoop; return new MergePlan(method, alg, out_data, nodes, where, data1, data2, eq1, eq2, mem1, mem2); }
/// <summary> /// Creates a key with a certain number of ascending elements /// </summary> /// <param name="StartAt">The starting index of the key</param> /// <param name="Count">The upper bound non-inclusive</param> /// <returns>Key</returns> public static Key Build(int StartAt, int Count) { Key k = new Key(); for (int i = StartAt; i < StartAt + Count; i++) { k.Add(i); } return k; }
/// <summary> /// Parses a key from a string /// </summary> /// <param name="Text">Text to parse</param> /// <returns>Key parsed from the string</returns> public static Key Parse(string Text) { string[] s = Text.Split(','); int i = 0; KeyAffinity ka = KeyAffinity.Ascending; Key k = new Key(); foreach (string r in s) { string[] t = r.Trim().Split(' '); if (s.Length == 2) ka = ParseAffinity(s[1]); else ka = KeyAffinity.Ascending; i = int.Parse(t[0]); k.Add(i, ka); } return k; }
/// <summary> /// Renders a string into a key /// </summary> /// <param name="Columns">The schema that will be used in the key generation</param> /// <param name="Text">The key string to be parsed</param> /// <returns>A key with defined sort affinities</returns> public Key Render(Schema Columns, string Text) { Columns = Columns ?? new Schema(); string[] tokens = Text.Split(this._FieldDelims.ToArray()); Key k = new Key(); foreach (string t in tokens) { string[] temp = t.Split(this._AscDescDelims.ToArray(),StringSplitOptions.RemoveEmptyEntries); // Get the field name // string field_or_index = temp[0]; int idx = Columns.ColumnIndex(field_or_index); if (idx == -1) { if (!int.TryParse(field_or_index, out idx)) throw new Exception("Element passed is neither a field or an index: " + field_or_index); } // Get the affinity // string asc_or_desc = (temp.Length < 2) ? "\0" : temp[1]; KeyAffinity sort_type = KeyAffinity.Ascending; if (this._AscTokens.Contains(asc_or_desc, StringComparer.OrdinalIgnoreCase)) sort_type = KeyAffinity.Ascending; else if (this._DescTokens.Contains(asc_or_desc, StringComparer.OrdinalIgnoreCase)) sort_type = KeyAffinity.Descending; k.Add(idx, sort_type); } return k; }
// Hash Table - Collection Map // private static DataSet BuildJoinHelper(DataSet Data1, DataSet Data2, MergeMethod JM) { // Create the predicate fields // Key joiner = new Key(); int T1Count = Data1.Columns.Count; for (int i = 2; i < T1Count; i++) joiner.Add(i); // Memory Registers // StaticRegister mem1 = new StaticRegister(null); StaticRegister mem2 = new StaticRegister(null); // Build the output fields // FNodeSet keeper = new FNodeSet(); keeper.Add(new FNodeFieldRef(null, 0, CellAffinity.INT, 8, mem1)); keeper.Add(new FNodeFieldRef(null, 1, CellAffinity.INT, 8, mem1)); keeper.Add(new FNodeFieldRef(null, 0, CellAffinity.INT, 8, mem2)); keeper.Add(new FNodeFieldRef(null, 1, CellAffinity.INT, 8, mem2)); // Create the hashing variables // string dir = (Data1.Directory != null ? Data1.Directory : Data2.Directory); string name = Header.TempName(); Schema s = new Schema("set_id1 int, row_id1 int, set_id2 int, row_id2 int"); // Write the join result to the data set // DataSet hash = DataSet.CreateOfType(Data1, dir, name, s, Data1.MaxRecords); RecordWriter brw = hash.OpenWriter(); MergeFunctions.SortMerge(JM, brw, keeper, Predicate.TrueForAll, Data1, Data2, joiner, joiner, mem1, mem2); brw.Close(); // Return // return hash; }