private void Attach(WBooleanExpression expr) { var table = _booleanTabRefVisitor.Invoke(expr); // Only expression who reference one table can be attached var tableName = table.Count == 1 ? table.First().Key : ""; MatchEdge edge; MatchNode node; if (_graph.TryGetEdge(tableName, out edge)) { if (edge.Predicates == null) { edge.Predicates = new List <WBooleanExpression>(); } edge.Predicates.Add(expr); } else if (_graph.TryGetNode(tableName, out node)) { if (node.Predicates == null) { node.Predicates = new List <WBooleanExpression>(); } node.Predicates.Add(expr); } else { FailedToAssign.Add(expr); } }
private void Attach(WBooleanExpression expr) { var table = _bindTableVisitor.Invoke(expr, _columnTableMapping); MatchEdge edge, revEdge; MatchNode node; if (_graph.TryGetEdge(table, out edge)) { if (edge.Predicates == null) { edge.Predicates = new List <WBooleanExpression>(); } edge.Predicates.Add(expr); if (_graph.ReversedEdgeDict.TryGetValue(edge.EdgeAlias, out revEdge)) { if (revEdge.Predicates == null) { revEdge.Predicates = new List <WBooleanExpression>(); } // take care when an edge is using a default alias var revExpr = ObjectExtensions.Copy(expr); var compExpr = revExpr as WBooleanComparisonExpression; if (compExpr != null) { var columnExpr = compExpr.FirstExpr as WColumnReferenceExpression ?? compExpr.SecondExpr as WColumnReferenceExpression; if (columnExpr != null) { var column = columnExpr.MultiPartIdentifier.Identifiers; if (column.Count >= 2) { column[column.Count - 2].Value = revEdge.EdgeAlias; } } } revEdge.Predicates.Add(revExpr); } } else if (_graph.TryGetNode(table, out node)) { if (node.Predicates == null) { node.Predicates = new List <WBooleanExpression>(); } node.Predicates.Add(expr); } }
private void Attach(WBooleanExpression expr) { var table = _bindTableVisitor.Invoke(expr, _columnTableMapping); MatchEdge edge; MatchNode node; if (_graph.TryGetEdge(table, out edge)) { if (edge.Predicates == null) { edge.Predicates = new List <WBooleanExpression>(); } edge.Predicates.Add(expr); } else if (_graph.TryGetNode(table, out node)) { if (node.Predicates == null) { node.Predicates = new List <WBooleanExpression>(); } node.Predicates.Add(expr); } }
/// <summary> /// Estimates the average degree of the edges and retrieve density value. /// Send sa query to retrieve the varbinary of the sink in the edge sampling table with edge predicates, /// then generates the statistics histogram for each edge /// </summary> private void RetrieveStatistics(MatchGraph graph) { if (graph == null) throw new ArgumentNullException("graph"); // Declare the parameters if any var declareParameter = ""; if (_variables != null) { declareParameter = _variables.Aggregate(declareParameter, (current, parameter) => current + ("DECLARE " + parameter.VariableName.Value + " " + TsqlFragmentToString.DataType(parameter.DataType) + "\r\n")); } // Calculates the average degree var sb = new StringBuilder(); bool first = true; sb.Append("SELECT [Edge].*, [EdgeDegrees].[SampleRowCount], [EdgeDegrees].[AverageDegree] FROM"); sb.Append("(\n"); foreach (var edge in graph.ConnectedSubGraphs.SelectMany(subGraph => subGraph.Edges.Values)) { if (!first) sb.Append("\nUNION ALL\n"); else { first = false; } var tableObjectName = edge.SourceNode.NodeTableObjectName; string schema = tableObjectName.SchemaIdentifier.Value.ToLower(); string tableName = tableObjectName.BaseIdentifier.Value.ToLower(); string edgeName = edge.EdgeColumn.MultiPartIdentifier.Identifiers.Last().Value.ToLower(); string bindTableName = _context.EdgeNodeBinding[new Tuple<string, string>(tableName, edgeName)]; // Distinguished between path and edge //var sinkColumnName = edge.IsPath ? "COUNT(Sink)" : "[dbo].[GraphViewUDFGlobalNodeIdEncoder](Sink)"; sb.Append( string.Format(@" SELECT '{0}' as TableSchema, '{1}' as TableName, '{2}' as ColumnName, '{3}' as Alias, [dbo].[GraphViewUDFGlobalNodeIdEncoder](Src) as Src, [dbo].[GraphViewUDFGlobalNodeIdEncoder](Sink) as Sink, 0 as IsReversed, NULL as OriginalEdgeAlias FROM [{0}_{1}_{2}_Sampling] as [{3}]", schema, bindTableName, edgeName, edge.EdgeAlias)); var predicatesExpr = edge.RetrievePredicatesExpression(); if (predicatesExpr!=null) sb.AppendFormat("\n WHERE {0}", predicatesExpr); MatchEdge revEdge; if (graph.ReversedEdgeDict.TryGetValue(edge.EdgeAlias, out revEdge)) { sb.Append("\nUNION ALL\n"); var isEdgeView = revEdge.IsEdgeView; var revTableObjectName = revEdge.SourceNode.NodeTableObjectName; var revSchema = revTableObjectName.SchemaIdentifier.Value.ToLower(); var revEdgeName = revEdge.EdgeColumn.MultiPartIdentifier.Identifiers.Last().Value.ToLower(); var revBindTableName = revEdge.BindNodeTableObjName.Identifiers.Last().Value.ToLower(); var revSamplingTableName = revSchema + "_" + revBindTableName + "_" + revEdgeName; var revSrcTuple = WNamedTableReference.SchemaNameToTuple(revEdge.BindNodeTableObjName); //var revSinkTuple = WNamedTableReference.SchemaNameToTuple(revEdge.SinkNode.NodeTableObjectName); var originalEdgeName = edge.EdgeColumn.MultiPartIdentifier.Identifiers.Last().Value.ToLower(); // [node/nodeView]-[edgeView]->[node/nodeView] if (isEdgeView) { revEdgeName = revSrcTuple.Item2 + "_" + originalEdgeName + "Reversed"; revSamplingTableName = revSrcTuple.Item1 + "_" + revSrcTuple.Item2 + "_" + revEdgeName; } sb.Append( string.Format(@" SELECT '{0}' as TableSchema, '{1}' as TableName, '{2}' as ColumnName, '{3}' as Alias, [dbo].[GraphViewUDFGlobalNodeIdEncoder](Src) as Src, [dbo].[GraphViewUDFGlobalNodeIdEncoder](Sink) as Sink, 1 as IsReversed, '{4}' as OriginalEdgeAlias FROM [{5}_Sampling] as [{3}]", revSchema, revBindTableName, revEdgeName, revEdge.EdgeAlias, edge.EdgeAlias, revSamplingTableName)); var revPredicatesExpr = revEdge.RetrievePredicatesExpression(); if (revPredicatesExpr != null) sb.AppendFormat("\n WHERE {0}", revPredicatesExpr); } } sb.Append("\n) as Edge \n"); sb.Append(String.Format(@" INNER JOIN [{0}] as [EdgeDegrees] ON [EdgeDegrees].[TableSchema] = [Edge].[TableSchema] AND [EdgeDegrees].[TableName] = [Edge].[TableName] AND [EdgeDegrees].[ColumnName] = [Edge].[ColumnName]", GraphViewConnection.MetadataTables[3])); using (var command = Tx.Connection.CreateCommand()) { command.Transaction = Tx; command.CommandText = declareParameter + sb.ToString(); using (var reader = command.ExecuteReader()) { var srcNodeStatisticsDict = graph.SourceNodeStatisticsDict; while (reader.Read()) { MatchEdge edge = null; bool isReversed = reader["isReversed"].ToString().Equals("1"); if (!isReversed && !graph.TryGetEdge(reader["Alias"].ToString(), out edge)) throw new GraphViewException(string.Format("Edge {0} not exists", reader["Alias"].ToString())); if (isReversed && !graph.TryGetEdge(reader["OriginalEdgeAlias"].ToString(), out edge)) throw new GraphViewException(string.Format("Edge {0} not exists", reader["OriginalEdgeAlias"].ToString())); if (isReversed) edge = graph.ReversedEdgeDict[edge.EdgeAlias]; var srcBytes = reader["Src"] as byte[]; var cursor = 0; if (srcBytes != null) { var srcList = new List<long>(); while (cursor < srcBytes.Length) { var src = BitConverter.ToInt64(srcBytes, cursor); cursor += 8; srcList.Add(src); } var tmpEdge = new MatchEdge(); Statistics.UpdateEdgeHistogram(tmpEdge, srcList); srcNodeStatisticsDict[new Tuple<string, bool>(edge.EdgeAlias, isReversed)] = tmpEdge.Statistics; } else srcNodeStatisticsDict[new Tuple<string, bool>(edge.EdgeAlias, isReversed)] = null; var sinkBytes = reader["Sink"] as byte[]; if (sinkBytes == null) { edge.Statistics = new Statistics { Density = 0, Histogram = new Dictionary<long, Tuple<double, bool>>(), MaxValue = 0, RowCount = 0, //Selectivity = 1.0 }; continue; } List<long> sinkList = new List<long>(); cursor = 0; while (cursor < sinkBytes.Length) { var sink = BitConverter.ToInt64(sinkBytes, cursor); cursor += 8; sinkList.Add(sink); } Statistics.UpdateEdgeHistogram(edge, sinkList); edge.AverageDegree = Convert.ToDouble(reader["AverageDegree"])*sinkList.Count*1.0/ Convert.ToInt64(reader["SampleRowCount"]); var path = edge as MatchPath; if (path != null) { if (path.AverageDegree > 1) if (path.MaxLength != -1) { path.AverageDegree = Math.Pow(path.AverageDegree, path.MaxLength) - (path.MinLength > 0 ? Math.Pow(path.AverageDegree, path.MinLength - 1) : 0); } else path.AverageDegree = double.MaxValue; } } } // Retrieves density value for each node table string tempTableName = Path.GetRandomFileName().Replace(".", "").Substring(0, 8); var dbccDensityQuery = new StringBuilder(); dbccDensityQuery.Append(string.Format(@"CREATE TABLE #{0} (Density float, Len int, Col sql_variant); INSERT INTO #{0} EXEC('", tempTableName)); Dictionary<Tuple<string, string>, List<MatchNode>> schemaTableToNodeListMapping = new Dictionary<Tuple<string, string>, List<MatchNode>>(); foreach (var subGraph in graph.ConnectedSubGraphs) { foreach (var node in subGraph.Nodes.Values) { var tableTuple = WNamedTableReference.SchemaNameToTuple(node.NodeTableObjectName); if (_graphMetaData.NodeViewMapping.ContainsKey(tableTuple)) { node.GlobalNodeIdDensity = Statistics.DefaultDensity; } else { var nodeList = schemaTableToNodeListMapping.GetOrCreate(tableTuple); nodeList.Add(node); } } } foreach (var tableTuple in schemaTableToNodeListMapping.Keys) { dbccDensityQuery.Append(string.Format( "DBCC SHOW_STATISTICS (\"{0}.{1}\", [{0}{1}_PK_GlobalNodeId]) with DENSITY_VECTOR;\n", tableTuple.Item1, tableTuple.Item2)); } dbccDensityQuery.Append("');\n"); dbccDensityQuery.Append(string.Format("SELECT Density FROM #{0} WHERE Col = 'GlobalNodeId'", tempTableName)); command.CommandText = dbccDensityQuery.ToString(); using (var reader = command.ExecuteReader()) { foreach (var item in schemaTableToNodeListMapping) { double density; if (!reader.Read()) density = Statistics.DefaultDensity; else { density = Convert.ToDouble(reader["Density"]); if (Math.Abs(density - 1.0) < 0.0001) density = Statistics.DefaultDensity; } foreach (var node in item.Value) { node.GlobalNodeIdDensity = density; } } } } }
/// <summary> /// Estimate the average degree of the edges and retrieve density value. /// Send a query to retrieve the varbinary of the sink in the edge sampling table with edge predicates, /// then generate the statistics histogram for each edge /// </summary> /// <param name="subGraph"></param> private void EstimateAverageDegree(MatchGraph graph) { // Declare the parameters if any var declareParameter = ""; if (_variables != null) { foreach (var parameter in _variables) { declareParameter += "DECLARE " + parameter.VariableName.Value + " " + TsqlFragmentToString.DataType(parameter.DataType) + "\r\n"; } } // Calculate the average degree var sb = new StringBuilder(); bool first = true; sb.Append("SELECT [Edge].*, [sysindexes].[rows], [EdgeDegrees].[AverageDegree] FROM"); sb.Append("(\n"); foreach (var edge in graph.ConnectedSubGraphs.SelectMany(subGraph => subGraph.Edges.Values)) { if (!first) sb.Append("\nUNION ALL\n"); else { first = false; } var tableObjectName = edge.SourceNode.TableObjectName; sb.Append( string.Format(@" SELECT '{0}' as TableSchema, '{1}' as TableName, '{2}' as ColumnName, '{3}' as Alias, [dbo].[GraphViewUDFGlobalNodeIdEncoder](Sink) as Sink FROM [{0}_{1}_{2}_Sampling] as [{3}]", tableObjectName.SchemaIdentifier.Value, tableObjectName.BaseIdentifier.Value, edge.EdgeColumn.MultiPartIdentifier.Identifiers.Last().Value, edge.EdgeAlias)); if (edge.Predicates != null) { sb.Append("\n WHERE "); bool fisrtPre = true; foreach (var predicate in edge.Predicates) { if (fisrtPre) fisrtPre = false; else { sb.Append(" AND "); } sb.Append(predicate); } } } sb.Append("\n) as Edge \n"); sb.Append(String.Format(@"INNER JOIN [sysindexes] ON [id] = OBJECT_ID([TableSchema] + '_' + [TableName] + '_' + [ColumnName] + '_' + 'Sampling') and [indid]<2 INNER JOIN [{0}] as [EdgeDegrees] ON [EdgeDegrees].[TableSchema] = [Edge].[TableSchema] AND [EdgeDegrees].[TableName] = [Edge].[TableName] AND [EdgeDegrees].[ColumnName] = [Edge].[ColumnName]", GraphViewConnection.MetadataTables[3])); // Retrieve density value for each node table _tableIdDensity.Clear(); string tempTableName = Path.GetRandomFileName().Replace(".", "").Substring(0, 8); var dbccDensityQuery = new StringBuilder(); dbccDensityQuery.Append(string.Format(@"CREATE TABLE #{0} (Density float, Len int, Col sql_variant); INSERT INTO #{0} EXEC('", tempTableName)); foreach (var nodeTable in graph.NodeTypesSet) { _tableIdDensity[string.Format("[{0}].[{1}]", nodeTable.Item1, nodeTable.Item2)] = ColumnStatistics.DefaultDensity; dbccDensityQuery.Append(string.Format( "DBCC SHOW_STATISTICS (\"{0}.{1}\", [{0}{1}_PK_GlobalNodeId]) with DENSITY_VECTOR;\n", nodeTable.Item1, nodeTable.Item2)); } dbccDensityQuery.Append("');\n"); dbccDensityQuery.Append(string.Format("SELECT Density FROM #{0} WHERE Col = 'GlobalNodeId'", tempTableName)); using (var command = Conn.CreateCommand()) { command.CommandText = declareParameter + sb.ToString(); using (var reader = command.ExecuteReader()) { while (reader.Read()) { MatchEdge edge; if (!graph.TryGetEdge(reader["Alias"].ToString(), out edge)) throw new GraphViewException(string.Format("Edge {0} not exists", reader["Alias"].ToString())); var sinkBytes = reader["Sink"] as byte[]; if (sinkBytes == null) { _context.AddEdgeStatistics(edge, new ColumnStatistics { Density = 0, Histogram = new Dictionary<long, Tuple<double, bool>>(), MaxValue = 0, RowCount = 0, Selectivity = 1.0 }); continue; } List<long> sinkList = new List<long>(); var cursor = 0; while (cursor < sinkBytes.Length) { var sink = BitConverter.ToInt64(sinkBytes, cursor); cursor += 8; sinkList.Add(sink); } UpdateEdgeHistogram(edge, sinkList); edge.AverageDegree = Convert.ToDouble(reader["AverageDegree"]) * sinkList.Count * 1.0 / Convert.ToInt64(reader["rows"]); } } var tableKey = _tableIdDensity.Keys.ToArray(); command.CommandText = dbccDensityQuery.ToString(); using (var reader = command.ExecuteReader()) { foreach (var key in tableKey) { if (!reader.Read()) break; _tableIdDensity[key] = Convert.ToDouble(reader["Density"]); } } } _tableIdDensity = _tableIdDensity.OrderBy(e => e.Key).ToDictionary(e => e.Key, e => e.Value); }