/// <summary> /// Algorithm taken from Grochow and Kellis. This is failing at the moment /// </summary> /// <param name="partialMap">f; Map is represented as a dictionary, with the Key as h and the Value as g</param> /// <param name="queryGraph">G</param> /// <param name="queryGraphEdges">G's edges. Added to speedup computation by avoiding to evaluate it frequently and needlessly</param> /// <param name="inputGraph">H</param> /// <param name="getInducedMappingsOnly">If true, then the querygraph must match exactly to the input subgraph. In other words, only induced subgraphs will be returned</param> /// <returns>List of isomorphisms. Remember, Key is h, Value is g</returns> internal static Dictionary <int[], List <Mapping> > IsomorphicExtension(Dictionary <int, int> partialMap, QueryGraph queryGraph , Edge <int>[] queryGraphEdges, UndirectedGraph <int> inputGraph, bool getInducedMappingsOnly) { if (partialMap.Count == queryGraph.VertexCount) { #region Return base case var function = new SortedList <int, int>(partialMap); var result = IsMappingCorrect(function, queryGraphEdges, inputGraph, getInducedMappingsOnly); if (result.IsCorrectMapping) { return(new Dictionary <int[], List <Mapping> >(1) { { function.Values.ToArray(), new List <Mapping>(1) { new Mapping(function, result.SubgraphEdgeCount) } } }); } function.Clear(); function = null; return(null); #endregion } //Remember: f(h) = g, so h is Domain and g is Range. // In other words, Key is h and Value is g in the dictionary // get m, most constrained neighbor int m = GetMostConstrainedNeighbour(partialMap.Keys, queryGraph); if (m < 0) { return(null); } var listOfIsomorphisms = new Dictionary <int[], List <Mapping> >(ModaAlgorithms.MappingNodesComparer); var neighbourRange = ChooseNeighboursOfRange(partialMap.Values, inputGraph); var neighborsOfM = queryGraph.GetNeighbors(m, false); var newPartialMapCount = partialMap.Count + 1; //foreach neighbour n of f(D) for (int i = 0; i < neighbourRange.Count; i++) { //int n = neighbourRange[i]; if (false == IsNeighbourIncompatible(inputGraph, neighbourRange[i], partialMap, queryGraph, neighborsOfM)) { //It's not; so, let f' = f on D, and f'(m) = n. //Find all isomorphic extensions of f'. //newPartialMap[m] = neighbourRange[i]; var newPartialMap = new Dictionary <int, int>(newPartialMapCount); foreach (var item in partialMap) { newPartialMap.Add(item.Key, item.Value); } newPartialMap[m] = neighbourRange[i]; var subList = IsomorphicExtension(newPartialMap, queryGraph, queryGraphEdges, inputGraph, getInducedMappingsOnly); newPartialMap.Clear(); newPartialMap = null; if (subList != null && subList.Count > 0) { foreach (var item in subList) { if (item.Value.Count > 1) { queryGraph.RemoveNonApplicableMappings(item.Value, inputGraph, getInducedMappingsOnly); } List <Mapping> maps; if (listOfIsomorphisms.TryGetValue(item.Key, out maps)) { maps.AddRange(item.Value); } else { listOfIsomorphisms[item.Key] = item.Value; } } subList.Clear(); } subList = null; } } neighborsOfM = null; // DO NOT Clear this variable neighbourRange.Clear(); neighbourRange = null; return(listOfIsomorphisms); }
/// <summary> /// Mapping module (aka FindSubgraphInstances in Grochow & Kellis) modified /// The modification: /// Instead of traversing all nodes in the query graph (H) for each node in the input graph (G), /// we simply use just one node h in H to traverse G. This makes it much easier to parallelize /// unlike the original algorithm, and eliminate the need for removing visited g from G. /// /// Testing will show whether this improves, worsens or makes no difference in performance. /// </summary> /// <param name="queryGraph">H</param> /// <param name="inputGraph">G</param> /// <param name="numberOfSamples">To be decided. If not set, we use the <paramref name="inputGraph"/> size / 3</param> private static ICollection <Mapping> Algorithm2_Modified(QueryGraph queryGraph, UndirectedGraph <int> inputGraph, int numberOfSamples, bool getInducedMappingsOnly) { if (numberOfSamples <= 0) { numberOfSamples = inputGraph.VertexCount / 3; } var theMappings = new Dictionary <int[], List <Mapping> >(MappingNodesComparer); var inputGraphDegSeq = inputGraph.GetNodesSortedByDegree(numberOfSamples); var threadName = Thread.CurrentThread.ManagedThreadId; Console.WriteLine("Thread {0}:\tCalling Algo 2-Modified:\n", threadName); var queryGraphEdges = queryGraph.Edges.ToArray(); var h = queryGraph.Vertices.ElementAt(0); var f = new Dictionary <int, int>(1); for (int i = 0; i < inputGraphDegSeq.Count; i++) { var g = inputGraphDegSeq[i]; if (Utils.CanSupport(queryGraph, h, inputGraph, g)) { #region Can Support //Remember: f(h) = g, so h is Domain and g is Range f[h] = g; var mappings = Utils.IsomorphicExtension(f, queryGraph, queryGraphEdges, inputGraph, getInducedMappingsOnly); if (mappings.Count > 0) { foreach (var item in mappings) { if (item.Value.Count > 1) { queryGraph.RemoveNonApplicableMappings(item.Value, inputGraph, getInducedMappingsOnly); } //Recall: f(h) = g List <Mapping> maps; if (theMappings.TryGetValue(item.Key, out maps)) { maps.AddRange(item.Value); } else { theMappings[item.Key] = item.Value; } } mappings.Clear(); } mappings = null; #endregion } } f.Clear(); f = null; Array.Clear(queryGraphEdges, 0, queryGraphEdges.Length); queryGraphEdges = null; inputGraphDegSeq.Clear(); inputGraphDegSeq = null; var toReturn = GetSet(theMappings); theMappings = null; Console.WriteLine("\nThread {0}:\tAlgorithm 2: All iteration tasks completed. Number of mappings found: {1}.\n", threadName, toReturn.Count); return(toReturn); }
/// <summary> /// Enumeration module. NB: If either of <paramref name="allMappings"/> or <paramref name="fileName"/> is null, the other will not be. /// </summary> /// <param name="allMappings"></param> /// <param name="inputGraph">G</param> /// <param name="queryGraph">H</param> /// <param name="expansionTree">T_k</param> /// <param name="parentQueryGraph"></param> /// <param name="fileName"></param> /// <param name="parentGraphMappings">NB: This param is still used even outside this method is call. So, be careful how you set/clear its values.</param> private static IList <Mapping> Algorithm3(Dictionary <QueryGraph, ICollection <Mapping> > allMappings, UndirectedGraph <int> inputGraph, QueryGraph queryGraph, AdjacencyGraph <ExpansionTreeNode> expansionTree, QueryGraph parentQueryGraph, out string newFileName, string fileName = null) { newFileName = null; ICollection <Mapping> parentGraphMappings; if (string.IsNullOrWhiteSpace(fileName)) { if (!allMappings.TryGetValue(parentQueryGraph, out parentGraphMappings)) { return(new Mapping[0]); } } else { parentGraphMappings = parentQueryGraph.ReadMappingsFromFile(fileName); } if (parentGraphMappings.Count == 0) { return(new Mapping[0]); } var subgraphSize = queryGraph.VertexCount; var parentQueryGraphEdges = new HashSet <Edge <int> >(); foreach (var edge in parentQueryGraph.Edges) { parentQueryGraphEdges.Add(edge); } var newEdge = GetEdgeDifference(queryGraph, parentQueryGraph, parentQueryGraphEdges); parentQueryGraphEdges.Clear(); parentQueryGraphEdges = null; // if it's NOT a valid edge if (newEdge.Source == Utils.DefaultEdgeNodeVal) { return(new Mapping[0]); } var list = new List <Mapping>(); int oldCount = parentGraphMappings.Count, id = 0, queryGraphEdgeCount = queryGraph.EdgeCount; var queryGraphEdges = queryGraph.Edges.ToArray(); var groupByGNodes = parentGraphMappings.GroupBy(x => x.Function.Values.ToArray(), MappingNodesComparer); //.ToDictionary(x => x.Key, x => x.ToArray(), MappingNodesComparer); foreach (var set in groupByGNodes) { // function.value (= set of G nodes) are all same here. So build the subgraph here and pass it dowm var subgraph = Utils.GetSubgraph(inputGraph, set.Key); foreach (var item in set) { item.Id = id++; // Remember, f(h) = g // if (f(u), f(v)) ϵ G and meets the conditions, add to list if (item.SubGraphEdgeCount == queryGraphEdgeCount) { var isMapping = Utils.IsMappingCorrect2(item.Function, subgraph, queryGraphEdges, true); if (isMapping.IsCorrectMapping) { list.Add(item); } isMapping = null; } else if (item.SubGraphEdgeCount > queryGraphEdgeCount) { var newEdgeImage = item.GetImage(inputGraph, newEdge); // if it's a valid edge... if (newEdgeImage.Source != Utils.DefaultEdgeNodeVal && inputGraph.ContainsEdge(newEdgeImage.Source, newEdgeImage.Target)) { list.Add(item); } } } subgraph = null; } Array.Clear(queryGraphEdges, 0, queryGraphEdges.Length); queryGraphEdges = null; var threadName = System.Threading.Thread.CurrentThread.ManagedThreadId; // Remove mappings from the parent qGraph that are found in this qGraph // This is because we're only interested in induced subgraphs var theRest = parentGraphMappings.Except(list).ToList(); parentQueryGraph.RemoveNonApplicableMappings(theRest, inputGraph); parentGraphMappings.Clear(); foreach (var item in theRest) { parentGraphMappings.Add(item); } theRest.Clear(); theRest = null; // Now, remove duplicates queryGraph.RemoveNonApplicableMappings(list, inputGraph); if (!string.IsNullOrWhiteSpace(fileName) && oldCount > parentGraphMappings.Count) { // This means that some of the mappings from parent fit the current query graph newFileName = parentQueryGraph.WriteMappingsToFile(parentGraphMappings); try { System.IO.File.Delete(fileName); } catch { } // we can afford to let this fail } Console.WriteLine("Thread {0}:\tAlgorithm 3: All tasks completed. Number of mappings found: {1}.\n", threadName, list.Count); return(list); }
/// <summary> /// Algo 1: Find subgraph frequency (mappings found are saved to disk to be retrieved later during Algo 3). /// The value of the dictionary returned is in the form: $"{mappings.Count}#{qGraph.Label}.ser" /// </summary> /// <param name="inputGraph"></param> /// <param name="qGraph">The query graph to be searched for. If not available, we use expansion trees (MODA). Otherwise, we use Grochow's (Algo 2)</param> /// <param name="subgraphSize"></param> /// <param name="thresholdValue">Frequency value, above which we can comsider the subgraph a "frequent subgraph"</param> /// <returns></returns> public static Dictionary <QueryGraph, string> Algorithm1_C(UndirectedGraph <int> inputGraph, QueryGraph qGraph, int subgraphSize, int thresholdValue) { // The enumeration module (Algo 3) needs the mappings generated from the previous run(s) Dictionary <QueryGraph, string> allMappings; int numIterations = -1; if (inputGraph.VertexCount < 121) { numIterations = inputGraph.VertexCount; } if (qGraph == null) // Use MODA's expansion tree { #region Use MODA's expansion tree var treatedNodes = new HashSet <QueryGraph>(); allMappings = new Dictionary <QueryGraph, string>(_builder.NumberOfQueryGraphs); do { qGraph = GetNextNode()?.QueryGraph; if (qGraph == null) { break; } ICollection <Mapping> mappings; if (qGraph.EdgeCount == (subgraphSize - 1)) // i.e. if qGraph is a tree { if (UseModifiedGrochow) { // Modified Mapping module - MODA and Grockow & Kellis mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, false); } else { var inputGraphClone = inputGraph.Clone(); mappings = Algorithm2(qGraph, inputGraphClone, numIterations, false); inputGraphClone.Clear(); inputGraphClone = null; } // Because we're saving to file, we're better off doing this now qGraph.RemoveNonApplicableMappings(mappings, inputGraph, false); treatedNodes.Add(qGraph); } else { // Enumeration moodule - MODA // This is part of Algo 3; but performance tweaks makes it more useful to get it here var parentQueryGraph = GetParent(qGraph, _builder.ExpansionTree); if (parentQueryGraph.EdgeCount == (subgraphSize - 1)) { treatedNodes.Add(parentQueryGraph); } string _filename; if (allMappings.TryGetValue(parentQueryGraph, out _filename)) { string newFileName; // for parentQueryGraph mappings = Algorithm3(null, inputGraph, qGraph, _builder.ExpansionTree, parentQueryGraph, out newFileName, _filename); if (!string.IsNullOrWhiteSpace(newFileName)) { // We change the _filename value in the dictionary since this means some of the mappings from parent fit the child allMappings[parentQueryGraph] = newFileName; } } else { mappings = new Mapping[0]; } } if (mappings.Count > thresholdValue) { qGraph.IsFrequentSubgraph = true; } // Save mappings. var fileName = qGraph.WriteMappingsToFile(mappings); if (mappings.Count > 0) { mappings.Clear(); } allMappings.Add(qGraph, fileName); // Check for complete-ness; if complete, break if (qGraph.IsComplete(subgraphSize)) { qGraph = null; break; } qGraph = null; }while (true); #endregion } else { ICollection <Mapping> mappings; if (UseModifiedGrochow) { // Modified Mapping module - MODA and Grockow & Kellis mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, true); } else { mappings = Algorithm2(qGraph, inputGraph, numIterations, true); } qGraph.RemoveNonApplicableMappings(mappings, inputGraph); var fileName = $"{mappings.Count}#{qGraph.Identifier}.ser"; System.IO.File.WriteAllText(fileName, Extensions.CompressString(Newtonsoft.Json.JsonConvert.SerializeObject(mappings))); if (mappings.Count > 0) { mappings.Clear(); } allMappings = new Dictionary <QueryGraph, string>(1) { { qGraph, fileName } }; } return(allMappings); }
/// <summary> /// Algo 1: Find subgraph frequency (mappings help in memory) /// </summary> /// <param name="inputGraph"></param> /// <param name="qGraph">The query graph to be searched for. If not available, we use expansion trees (MODA). Otherwise, we use Grochow's (Algo 2)</param> /// <param name="subgraphSize"></param> /// <param name="thresholdValue">Frequency value, above which we can comsider the subgraph a "frequent subgraph"</param> /// <returns></returns> public static Dictionary <QueryGraph, ICollection <Mapping> > Algorithm1(UndirectedGraph <int> inputGraph, QueryGraph qGraph, int subgraphSize = -1, int thresholdValue = 0) { // The enumeration module (Algo 3) needs the mappings generated from the previous run(s) Dictionary <QueryGraph, ICollection <Mapping> > allMappings; int numIterations = -1; if (inputGraph.VertexCount < 121) { numIterations = inputGraph.VertexCount; } if (qGraph == null) // Use MODA's expansion tree { #region Use MODA's expansion tree var treatedNodes = new HashSet <QueryGraph>(); allMappings = new Dictionary <QueryGraph, ICollection <Mapping> >(_builder.NumberOfQueryGraphs); do { qGraph = GetNextNode()?.QueryGraph; if (qGraph == null) { break; } ICollection <Mapping> mappings; if (qGraph.IsTree(subgraphSize)) { if (UseModifiedGrochow) { // Modified Mapping module - MODA and Grockow & Kellis mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, false); } else { // Mapping module - MODA and Grockow & Kellis. var inputGraphClone = inputGraph.Clone(); mappings = Algorithm2(qGraph, inputGraphClone, numIterations, false); inputGraphClone.Clear(); inputGraphClone = null; } } else { // Enumeration moodule - MODA // This is part of Algo 3; but performance tweaks makes it more useful to get it here var parentQueryGraph = GetParent(qGraph, _builder.ExpansionTree); if (parentQueryGraph.IsTree(subgraphSize)) { treatedNodes.Add(parentQueryGraph); } string file; mappings = Algorithm3(allMappings, inputGraph, qGraph, _builder.ExpansionTree, parentQueryGraph, out file); } if (mappings != null && mappings.Count > thresholdValue) { qGraph.IsFrequentSubgraph = true; } // Save mappings. Do we need to save to disk? Maybe not! allMappings.Add(qGraph, mappings); // Do not call mappings.Clear() mappings = null; // Check for complete-ness; if complete, break if (qGraph.IsComplete(subgraphSize)) { qGraph = null; break; } qGraph = null; }while (true); if (treatedNodes.Count > 0) { foreach (var mapping in allMappings) { if (mapping.Key.IsTree(subgraphSize) && !treatedNodes.Contains(mapping.Key)) { mapping.Key.RemoveNonApplicableMappings(mapping.Value, inputGraph); } } treatedNodes.Clear(); } treatedNodes = null; #endregion } else { ICollection <Mapping> mappings; if (UseModifiedGrochow) { // Modified Mapping module - MODA and Grockow & Kellis mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, true); // mappings = ModaAlgorithm2Parallelized.Algorithm2_Modified(qGraph, inputGraph, numIterations); } else { mappings = Algorithm2(qGraph, inputGraph, numIterations, true); } qGraph.RemoveNonApplicableMappings(mappings, inputGraph); allMappings = new Dictionary <QueryGraph, ICollection <Mapping> >(1) { { qGraph, mappings } }; // Do not call mappings.Clear() mappings = null; } return(allMappings); }
/// <summary> /// Mapping module; aka FindSubgraphInstances in Grochow & Kellis /// </summary> /// <param name="queryGraph">H</param> /// <param name="inputGraphClone">G</param> /// <param name="numberOfSamples">To be decided. If not set, we use the <paramref name="inputGraphClone"/> size / 3</param> internal static ICollection <Mapping> Algorithm2(QueryGraph queryGraph, UndirectedGraph <int> inputGraphClone, int numberOfSamples, bool getInducedMappingsOnly) { if (numberOfSamples <= 0) { numberOfSamples = inputGraphClone.VertexCount / 3; } // Do we need this clone? Can't we just remove the node directly from the graph? // We do need it. var theMappings = new Dictionary <int[], List <Mapping> >(MappingNodesComparer); var inputGraphDegSeq = inputGraphClone.GetNodesSortedByDegree(numberOfSamples); var queryGraphVertices = queryGraph.Vertices.ToArray(); var queryGraphEdges = queryGraph.Edges.ToArray(); var subgraphSize = queryGraphVertices.Length; var threadName = System.Threading.Thread.CurrentThread.ManagedThreadId; Console.WriteLine("Thread {0}:\tCallingu Algo 2:\n", threadName); for (int i = 0; i < inputGraphDegSeq.Count; i++) { var g = inputGraphDegSeq[i]; for (int j = 0; j < subgraphSize; j++) { var h = queryGraphVertices[j]; if (Utils.CanSupport(queryGraph, h, inputGraphClone, g)) { #region Can Support //Remember: f(h) = g, so h is Domain and g is Range var f = new Dictionary <int, int>(1); f[h] = g; var mappings = Utils.IsomorphicExtension(f, queryGraph, queryGraphEdges, inputGraphClone, getInducedMappingsOnly); f.Clear(); f = null; if (mappings.Count > 0) { foreach (var item in mappings) { if (item.Value.Count > 1) { queryGraph.RemoveNonApplicableMappings(item.Value, inputGraphClone, getInducedMappingsOnly); } //Recall: f(h) = g List <Mapping> maps; if (theMappings.TryGetValue(item.Key, out maps)) { maps.AddRange(item.Value); } else { theMappings[item.Key] = item.Value; } } mappings.Clear(); } mappings = null; #endregion } } //Remove g inputGraphClone.RemoveVertex(g); if (inputGraphClone.EdgeCount == 0) { break; } } Array.Clear(queryGraphEdges, 0, queryGraphEdges.Length); queryGraphEdges = null; Array.Clear(queryGraphVertices, 0, subgraphSize); queryGraphVertices = null; inputGraphDegSeq.Clear(); inputGraphDegSeq = null; var toReturn = GetSet(theMappings); theMappings = null; Console.WriteLine("Thread {0}:\tAlgorithm 2: All tasks completed. Number of mappings found: {1}.", threadName, toReturn.Count); return(toReturn); }