Beispiel #1
0
        /// <summary>
        /// Algorithm taken from Grochow and Kellis. This is failing at the moment
        /// </summary>
        /// <param name="partialMap">f; Map is represented as a dictionary, with the Key as h and the Value as g</param>
        /// <param name="queryGraph">G</param>
        /// <param name="queryGraphEdges">G's edges. Added to speedup computation by avoiding to evaluate it frequently and needlessly</param>
        /// <param name="inputGraph">H</param>
        /// <param name="getInducedMappingsOnly">If true, then the querygraph must match exactly to the input subgraph. In other words, only induced subgraphs will be returned</param>
        /// <returns>List of isomorphisms. Remember, Key is h, Value is g</returns>
        internal static Dictionary <int[], List <Mapping> > IsomorphicExtension(Dictionary <int, int> partialMap, QueryGraph queryGraph
                                                                                , Edge <int>[] queryGraphEdges, UndirectedGraph <int> inputGraph, bool getInducedMappingsOnly)
        {
            if (partialMap.Count == queryGraph.VertexCount)
            {
                #region Return base case
                var function = new SortedList <int, int>(partialMap);

                var result = IsMappingCorrect(function, queryGraphEdges, inputGraph, getInducedMappingsOnly);
                if (result.IsCorrectMapping)
                {
                    return(new Dictionary <int[], List <Mapping> >(1)
                    {
                        { function.Values.ToArray(), new List <Mapping>(1)
                          {
                              new Mapping(function, result.SubgraphEdgeCount)
                          } }
                    });
                }
                function.Clear();
                function = null;
                return(null);

                #endregion
            }

            //Remember: f(h) = g, so h is Domain and g is Range.
            //  In other words, Key is h and Value is g in the dictionary

            // get m, most constrained neighbor
            int m = GetMostConstrainedNeighbour(partialMap.Keys, queryGraph);
            if (m < 0)
            {
                return(null);
            }

            var listOfIsomorphisms = new Dictionary <int[], List <Mapping> >(ModaAlgorithms.MappingNodesComparer);

            var neighbourRange = ChooseNeighboursOfRange(partialMap.Values, inputGraph);

            var neighborsOfM       = queryGraph.GetNeighbors(m, false);
            var newPartialMapCount = partialMap.Count + 1;
            //foreach neighbour n of f(D)
            for (int i = 0; i < neighbourRange.Count; i++)
            {
                //int n = neighbourRange[i];
                if (false == IsNeighbourIncompatible(inputGraph, neighbourRange[i], partialMap, queryGraph, neighborsOfM))
                {
                    //It's not; so, let f' = f on D, and f'(m) = n.

                    //Find all isomorphic extensions of f'.
                    //newPartialMap[m] = neighbourRange[i];
                    var newPartialMap = new Dictionary <int, int>(newPartialMapCount);
                    foreach (var item in partialMap)
                    {
                        newPartialMap.Add(item.Key, item.Value);
                    }
                    newPartialMap[m] = neighbourRange[i];
                    var subList = IsomorphicExtension(newPartialMap, queryGraph, queryGraphEdges, inputGraph, getInducedMappingsOnly);
                    newPartialMap.Clear();
                    newPartialMap = null;
                    if (subList != null && subList.Count > 0)
                    {
                        foreach (var item in subList)
                        {
                            if (item.Value.Count > 1)
                            {
                                queryGraph.RemoveNonApplicableMappings(item.Value, inputGraph, getInducedMappingsOnly);
                            }
                            List <Mapping> maps;
                            if (listOfIsomorphisms.TryGetValue(item.Key, out maps))
                            {
                                maps.AddRange(item.Value);
                            }
                            else
                            {
                                listOfIsomorphisms[item.Key] = item.Value;
                            }
                        }
                        subList.Clear();
                    }
                    subList = null;
                }
            }

            neighborsOfM = null; // DO NOT Clear this variable
            neighbourRange.Clear();
            neighbourRange = null;
            return(listOfIsomorphisms);
        }
        /// <summary>
        /// Mapping module (aka FindSubgraphInstances in Grochow & Kellis) modified
        /// The modification:
        ///     Instead of traversing all nodes in the query graph (H) for each node in the input graph (G),
        ///     we simply use just one node h in H to traverse G. This makes it much easier to parallelize
        ///     unlike the original algorithm, and eliminate the need for removing visited g from G.
        ///
        ///     Testing will show whether this improves, worsens or makes no difference in performance.
        /// </summary>
        /// <param name="queryGraph">H</param>
        /// <param name="inputGraph">G</param>
        /// <param name="numberOfSamples">To be decided. If not set, we use the <paramref name="inputGraph"/> size / 3</param>
        private static ICollection <Mapping> Algorithm2_Modified(QueryGraph queryGraph, UndirectedGraph <int> inputGraph, int numberOfSamples, bool getInducedMappingsOnly)
        {
            if (numberOfSamples <= 0)
            {
                numberOfSamples = inputGraph.VertexCount / 3;
            }

            var theMappings      = new Dictionary <int[], List <Mapping> >(MappingNodesComparer);
            var inputGraphDegSeq = inputGraph.GetNodesSortedByDegree(numberOfSamples);

            var threadName = Thread.CurrentThread.ManagedThreadId;

            Console.WriteLine("Thread {0}:\tCalling Algo 2-Modified:\n", threadName);

            var queryGraphEdges = queryGraph.Edges.ToArray();
            var h = queryGraph.Vertices.ElementAt(0);
            var f = new Dictionary <int, int>(1);

            for (int i = 0; i < inputGraphDegSeq.Count; i++)
            {
                var g = inputGraphDegSeq[i];
                if (Utils.CanSupport(queryGraph, h, inputGraph, g))
                {
                    #region Can Support
                    //Remember: f(h) = g, so h is Domain and g is Range
                    f[h] = g;
                    var mappings = Utils.IsomorphicExtension(f, queryGraph, queryGraphEdges, inputGraph, getInducedMappingsOnly);
                    if (mappings.Count > 0)
                    {
                        foreach (var item in mappings)
                        {
                            if (item.Value.Count > 1)
                            {
                                queryGraph.RemoveNonApplicableMappings(item.Value, inputGraph, getInducedMappingsOnly);
                            }
                            //Recall: f(h) = g
                            List <Mapping> maps;
                            if (theMappings.TryGetValue(item.Key, out maps))
                            {
                                maps.AddRange(item.Value);
                            }
                            else
                            {
                                theMappings[item.Key] = item.Value;
                            }
                        }
                        mappings.Clear();
                    }
                    mappings = null;
                    #endregion
                }
            }

            f.Clear();
            f = null;
            Array.Clear(queryGraphEdges, 0, queryGraphEdges.Length);
            queryGraphEdges = null;
            inputGraphDegSeq.Clear();
            inputGraphDegSeq = null;

            var toReturn = GetSet(theMappings);
            theMappings = null;

            Console.WriteLine("\nThread {0}:\tAlgorithm 2: All iteration tasks completed. Number of mappings found: {1}.\n", threadName, toReturn.Count);
            return(toReturn);
        }
Beispiel #3
0
        /// <summary>
        /// Enumeration module. NB: If either of <paramref name="allMappings"/> or <paramref name="fileName"/> is null, the other will not be.
        /// </summary>
        /// <param name="allMappings"></param>
        /// <param name="inputGraph">G</param>
        /// <param name="queryGraph">H</param>
        /// <param name="expansionTree">T_k</param>
        /// <param name="parentQueryGraph"></param>
        /// <param name="fileName"></param>
        /// <param name="parentGraphMappings">NB: This param is still used even outside this method is call. So, be careful how you set/clear its values.</param>
        private static IList <Mapping> Algorithm3(Dictionary <QueryGraph, ICollection <Mapping> > allMappings, UndirectedGraph <int> inputGraph, QueryGraph queryGraph,
                                                  AdjacencyGraph <ExpansionTreeNode> expansionTree,
                                                  QueryGraph parentQueryGraph, out string newFileName, string fileName = null)
        {
            newFileName = null;
            ICollection <Mapping> parentGraphMappings;

            if (string.IsNullOrWhiteSpace(fileName))
            {
                if (!allMappings.TryGetValue(parentQueryGraph, out parentGraphMappings))
                {
                    return(new Mapping[0]);
                }
            }
            else
            {
                parentGraphMappings = parentQueryGraph.ReadMappingsFromFile(fileName);
            }
            if (parentGraphMappings.Count == 0)
            {
                return(new Mapping[0]);
            }

            var subgraphSize          = queryGraph.VertexCount;
            var parentQueryGraphEdges = new HashSet <Edge <int> >();

            foreach (var edge in parentQueryGraph.Edges)
            {
                parentQueryGraphEdges.Add(edge);
            }
            var newEdge = GetEdgeDifference(queryGraph, parentQueryGraph, parentQueryGraphEdges);

            parentQueryGraphEdges.Clear();
            parentQueryGraphEdges = null;

            // if it's NOT a valid edge
            if (newEdge.Source == Utils.DefaultEdgeNodeVal)
            {
                return(new Mapping[0]);
            }

            var list = new List <Mapping>();
            int oldCount = parentGraphMappings.Count, id = 0, queryGraphEdgeCount = queryGraph.EdgeCount;
            var queryGraphEdges = queryGraph.Edges.ToArray();

            var groupByGNodes = parentGraphMappings.GroupBy(x => x.Function.Values.ToArray(), MappingNodesComparer); //.ToDictionary(x => x.Key, x => x.ToArray(), MappingNodesComparer);

            foreach (var set in groupByGNodes)
            {
                // function.value (= set of G nodes) are all same here. So build the subgraph here and pass it dowm
                var subgraph = Utils.GetSubgraph(inputGraph, set.Key);
                foreach (var item in set)
                {
                    item.Id = id++;
                    // Remember, f(h) = g

                    // if (f(u), f(v)) ϵ G and meets the conditions, add to list
                    if (item.SubGraphEdgeCount == queryGraphEdgeCount)
                    {
                        var isMapping = Utils.IsMappingCorrect2(item.Function, subgraph, queryGraphEdges, true);
                        if (isMapping.IsCorrectMapping)
                        {
                            list.Add(item);
                        }
                        isMapping = null;
                    }
                    else if (item.SubGraphEdgeCount > queryGraphEdgeCount)
                    {
                        var newEdgeImage = item.GetImage(inputGraph, newEdge);

                        // if it's a valid edge...
                        if (newEdgeImage.Source != Utils.DefaultEdgeNodeVal &&
                            inputGraph.ContainsEdge(newEdgeImage.Source, newEdgeImage.Target))
                        {
                            list.Add(item);
                        }
                    }
                }
                subgraph = null;
            }
            Array.Clear(queryGraphEdges, 0, queryGraphEdges.Length);
            queryGraphEdges = null;
            var threadName = System.Threading.Thread.CurrentThread.ManagedThreadId;

            // Remove mappings from the parent qGraph that are found in this qGraph
            // This is because we're only interested in induced subgraphs
            var theRest = parentGraphMappings.Except(list).ToList();

            parentQueryGraph.RemoveNonApplicableMappings(theRest, inputGraph);
            parentGraphMappings.Clear();
            foreach (var item in theRest)
            {
                parentGraphMappings.Add(item);
            }
            theRest.Clear();
            theRest = null;
            // Now, remove duplicates
            queryGraph.RemoveNonApplicableMappings(list, inputGraph);
            if (!string.IsNullOrWhiteSpace(fileName) && oldCount > parentGraphMappings.Count)
            {
                // This means that some of the mappings from parent fit the current query graph
                newFileName = parentQueryGraph.WriteMappingsToFile(parentGraphMappings);
                try
                {
                    System.IO.File.Delete(fileName);
                }
                catch { } // we can afford to let this fail
            }

            Console.WriteLine("Thread {0}:\tAlgorithm 3: All tasks completed. Number of mappings found: {1}.\n", threadName, list.Count);
            return(list);
        }
Beispiel #4
0
        /// <summary>
        /// Algo 1: Find subgraph frequency (mappings found are saved to disk to be retrieved later during Algo 3).
        /// The value of the dictionary returned is in the form: $"{mappings.Count}#{qGraph.Label}.ser"
        /// </summary>
        /// <param name="inputGraph"></param>
        /// <param name="qGraph">The query graph to be searched for. If not available, we use expansion trees (MODA). Otherwise, we use Grochow's (Algo 2)</param>
        /// <param name="subgraphSize"></param>
        /// <param name="thresholdValue">Frequency value, above which we can comsider the subgraph a "frequent subgraph"</param>
        /// <returns></returns>
        public static Dictionary <QueryGraph, string> Algorithm1_C(UndirectedGraph <int> inputGraph, QueryGraph qGraph, int subgraphSize, int thresholdValue)
        {
            // The enumeration module (Algo 3) needs the mappings generated from the previous run(s)
            Dictionary <QueryGraph, string> allMappings;
            int numIterations = -1;

            if (inputGraph.VertexCount < 121)
            {
                numIterations = inputGraph.VertexCount;
            }

            if (qGraph == null) // Use MODA's expansion tree
            {
                #region Use MODA's expansion tree
                var treatedNodes = new HashSet <QueryGraph>();
                allMappings = new Dictionary <QueryGraph, string>(_builder.NumberOfQueryGraphs);
                do
                {
                    qGraph = GetNextNode()?.QueryGraph;
                    if (qGraph == null)
                    {
                        break;
                    }
                    ICollection <Mapping> mappings;
                    if (qGraph.EdgeCount == (subgraphSize - 1)) // i.e. if qGraph is a tree
                    {
                        if (UseModifiedGrochow)
                        {
                            // Modified Mapping module - MODA and Grockow & Kellis
                            mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, false);
                        }
                        else
                        {
                            var inputGraphClone = inputGraph.Clone();
                            mappings = Algorithm2(qGraph, inputGraphClone, numIterations, false);
                            inputGraphClone.Clear();
                            inputGraphClone = null;
                        }

                        // Because we're saving to file, we're better off doing this now
                        qGraph.RemoveNonApplicableMappings(mappings, inputGraph, false);
                        treatedNodes.Add(qGraph);
                    }
                    else
                    {
                        // Enumeration moodule - MODA
                        // This is part of Algo 3; but performance tweaks makes it more useful to get it here
                        var parentQueryGraph = GetParent(qGraph, _builder.ExpansionTree);
                        if (parentQueryGraph.EdgeCount == (subgraphSize - 1))
                        {
                            treatedNodes.Add(parentQueryGraph);
                        }
                        string _filename;
                        if (allMappings.TryGetValue(parentQueryGraph, out _filename))
                        {
                            string newFileName; // for parentQueryGraph
                            mappings = Algorithm3(null, inputGraph, qGraph, _builder.ExpansionTree, parentQueryGraph, out newFileName, _filename);
                            if (!string.IsNullOrWhiteSpace(newFileName))
                            {
                                // We change the _filename value in the dictionary since this means some of the mappings from parent fit the child
                                allMappings[parentQueryGraph] = newFileName;
                            }
                        }
                        else
                        {
                            mappings = new Mapping[0];
                        }
                    }

                    if (mappings.Count > thresholdValue)
                    {
                        qGraph.IsFrequentSubgraph = true;
                    }

                    // Save mappings.
                    var fileName = qGraph.WriteMappingsToFile(mappings);
                    if (mappings.Count > 0)
                    {
                        mappings.Clear();
                    }
                    allMappings.Add(qGraph, fileName);

                    // Check for complete-ness; if complete, break
                    if (qGraph.IsComplete(subgraphSize))
                    {
                        qGraph = null;
                        break;
                    }
                    qGraph = null;
                }while (true);
                #endregion
            }
            else
            {
                ICollection <Mapping> mappings;
                if (UseModifiedGrochow)
                {
                    // Modified Mapping module - MODA and Grockow & Kellis
                    mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, true);
                }
                else
                {
                    mappings = Algorithm2(qGraph, inputGraph, numIterations, true);
                }
                qGraph.RemoveNonApplicableMappings(mappings, inputGraph);
                var fileName = $"{mappings.Count}#{qGraph.Identifier}.ser";
                System.IO.File.WriteAllText(fileName, Extensions.CompressString(Newtonsoft.Json.JsonConvert.SerializeObject(mappings)));
                if (mappings.Count > 0)
                {
                    mappings.Clear();
                }
                allMappings = new Dictionary <QueryGraph, string>(1)
                {
                    { qGraph, fileName }
                };
            }

            return(allMappings);
        }
Beispiel #5
0
        /// <summary>
        /// Algo 1: Find subgraph frequency (mappings help in memory)
        /// </summary>
        /// <param name="inputGraph"></param>
        /// <param name="qGraph">The query graph to be searched for. If not available, we use expansion trees (MODA). Otherwise, we use Grochow's (Algo 2)</param>
        /// <param name="subgraphSize"></param>
        /// <param name="thresholdValue">Frequency value, above which we can comsider the subgraph a "frequent subgraph"</param>
        /// <returns></returns>
        public static Dictionary <QueryGraph, ICollection <Mapping> > Algorithm1(UndirectedGraph <int> inputGraph, QueryGraph qGraph, int subgraphSize = -1, int thresholdValue = 0)
        {
            // The enumeration module (Algo 3) needs the mappings generated from the previous run(s)
            Dictionary <QueryGraph, ICollection <Mapping> > allMappings;
            int numIterations = -1;

            if (inputGraph.VertexCount < 121)
            {
                numIterations = inputGraph.VertexCount;
            }

            if (qGraph == null) // Use MODA's expansion tree
            {
                #region Use MODA's expansion tree
                var treatedNodes = new HashSet <QueryGraph>();
                allMappings = new Dictionary <QueryGraph, ICollection <Mapping> >(_builder.NumberOfQueryGraphs);
                do
                {
                    qGraph = GetNextNode()?.QueryGraph;
                    if (qGraph == null)
                    {
                        break;
                    }
                    ICollection <Mapping> mappings;
                    if (qGraph.IsTree(subgraphSize))
                    {
                        if (UseModifiedGrochow)
                        {
                            // Modified Mapping module - MODA and Grockow & Kellis
                            mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, false);
                        }
                        else
                        {
                            // Mapping module - MODA and Grockow & Kellis.
                            var inputGraphClone = inputGraph.Clone();
                            mappings = Algorithm2(qGraph, inputGraphClone, numIterations, false);
                            inputGraphClone.Clear();
                            inputGraphClone = null;
                        }
                    }
                    else
                    {
                        // Enumeration moodule - MODA
                        // This is part of Algo 3; but performance tweaks makes it more useful to get it here
                        var parentQueryGraph = GetParent(qGraph, _builder.ExpansionTree);
                        if (parentQueryGraph.IsTree(subgraphSize))
                        {
                            treatedNodes.Add(parentQueryGraph);
                        }
                        string file;
                        mappings = Algorithm3(allMappings, inputGraph, qGraph, _builder.ExpansionTree, parentQueryGraph, out file);
                    }
                    if (mappings != null && mappings.Count > thresholdValue)
                    {
                        qGraph.IsFrequentSubgraph = true;
                    }
                    // Save mappings. Do we need to save to disk? Maybe not!

                    allMappings.Add(qGraph, mappings);
                    // Do not call mappings.Clear()
                    mappings = null;
                    // Check for complete-ness; if complete, break
                    if (qGraph.IsComplete(subgraphSize))
                    {
                        qGraph = null;
                        break;
                    }
                    qGraph = null;
                }while (true);

                if (treatedNodes.Count > 0)
                {
                    foreach (var mapping in allMappings)
                    {
                        if (mapping.Key.IsTree(subgraphSize) && !treatedNodes.Contains(mapping.Key))
                        {
                            mapping.Key.RemoveNonApplicableMappings(mapping.Value, inputGraph);
                        }
                    }
                    treatedNodes.Clear();
                }
                treatedNodes = null;
                #endregion
            }
            else
            {
                ICollection <Mapping> mappings;
                if (UseModifiedGrochow)
                {
                    // Modified Mapping module - MODA and Grockow & Kellis
                    mappings = Algorithm2_Modified(qGraph, inputGraph, numIterations, true);
                    // mappings = ModaAlgorithm2Parallelized.Algorithm2_Modified(qGraph, inputGraph, numIterations);
                }
                else
                {
                    mappings = Algorithm2(qGraph, inputGraph, numIterations, true);
                }

                qGraph.RemoveNonApplicableMappings(mappings, inputGraph);
                allMappings = new Dictionary <QueryGraph, ICollection <Mapping> >(1)
                {
                    { qGraph, mappings }
                };

                // Do not call mappings.Clear()
                mappings = null;
            }

            return(allMappings);
        }
Beispiel #6
0
        /// <summary>
        /// Mapping module; aka FindSubgraphInstances in Grochow & Kellis
        /// </summary>
        /// <param name="queryGraph">H</param>
        /// <param name="inputGraphClone">G</param>
        /// <param name="numberOfSamples">To be decided. If not set, we use the <paramref name="inputGraphClone"/> size / 3</param>
        internal static ICollection <Mapping> Algorithm2(QueryGraph queryGraph, UndirectedGraph <int> inputGraphClone, int numberOfSamples, bool getInducedMappingsOnly)
        {
            if (numberOfSamples <= 0)
            {
                numberOfSamples = inputGraphClone.VertexCount / 3;
            }

            // Do we need this clone? Can't we just remove the node directly from the graph?
            // We do need it.
            var theMappings        = new Dictionary <int[], List <Mapping> >(MappingNodesComparer);
            var inputGraphDegSeq   = inputGraphClone.GetNodesSortedByDegree(numberOfSamples);
            var queryGraphVertices = queryGraph.Vertices.ToArray();
            var queryGraphEdges    = queryGraph.Edges.ToArray();
            var subgraphSize       = queryGraphVertices.Length;
            var threadName         = System.Threading.Thread.CurrentThread.ManagedThreadId;

            Console.WriteLine("Thread {0}:\tCallingu Algo 2:\n", threadName);
            for (int i = 0; i < inputGraphDegSeq.Count; i++)
            {
                var g = inputGraphDegSeq[i];
                for (int j = 0; j < subgraphSize; j++)
                {
                    var h = queryGraphVertices[j];
                    if (Utils.CanSupport(queryGraph, h, inputGraphClone, g))
                    {
                        #region Can Support
                        //Remember: f(h) = g, so h is Domain and g is Range
                        var f = new Dictionary <int, int>(1);
                        f[h] = g;
                        var mappings = Utils.IsomorphicExtension(f, queryGraph, queryGraphEdges, inputGraphClone, getInducedMappingsOnly);
                        f.Clear();
                        f = null;
                        if (mappings.Count > 0)
                        {
                            foreach (var item in mappings)
                            {
                                if (item.Value.Count > 1)
                                {
                                    queryGraph.RemoveNonApplicableMappings(item.Value, inputGraphClone, getInducedMappingsOnly);
                                }
                                //Recall: f(h) = g
                                List <Mapping> maps;
                                if (theMappings.TryGetValue(item.Key, out maps))
                                {
                                    maps.AddRange(item.Value);
                                }
                                else
                                {
                                    theMappings[item.Key] = item.Value;
                                }
                            }
                            mappings.Clear();
                        }
                        mappings = null;
                        #endregion
                    }
                }

                //Remove g
                inputGraphClone.RemoveVertex(g);
                if (inputGraphClone.EdgeCount == 0)
                {
                    break;
                }
            }
            Array.Clear(queryGraphEdges, 0, queryGraphEdges.Length);
            queryGraphEdges = null;
            Array.Clear(queryGraphVertices, 0, subgraphSize);
            queryGraphVertices = null;
            inputGraphDegSeq.Clear();
            inputGraphDegSeq = null;

            var toReturn = GetSet(theMappings);
            theMappings = null;

            Console.WriteLine("Thread {0}:\tAlgorithm 2: All tasks completed. Number of mappings found: {1}.", threadName, toReturn.Count);
            return(toReturn);
        }