Example #1
0
        /// <summary>
        /// Actual Implementation of the DynamicLogProcessor.
        /// Generate Dynamic Graph from Log File. Optionally generate Statistics and DataModel.
        /// </summary>
        /// <param name="logPath">Full Path to the Log File.</param>
        /// <param name="diOutput">DirectoryInfo of the output Directory.</param>
        /// <param name="computeStats">true if Statistics and Model should be generated; otherwise, false.</param>
        private void processLogDynamic(String logPath, DirectoryInfo diOutput, bool computeStats)
        {
            // Reset the external EventProcessor (LibDFT)
            EventProcessor ep = new EventProcessor();

            ep.freeResources();

            logger.Info("Processing Log for Dynamics: " + logPath);

            // Init EventLog, Statistics & Metrics
            EventLog           eventLog = new EventLog(logPath);
            QDFGStatCollection stats    = new QDFGStatCollection();
            CombinedMetrics    metrics  = new CombinedMetrics();

            // Subpath for the Output Directory of this particular dynamic Log
            String outPath = diOutput.Name + "\\dynamicLog";

            // Pre-process log file (Create Output Directories, Verify Log contains Nodes of interest)
            if (!preprocessLog(eventLog, logPath, outPath))
            {
                return;
            }

            // Memory Check
            checkMemoryCount(logPath + ", Stage: init");

            // Create Transformer for Event Log; Used to perform operations on EventLog
            EventLogTransformer transformer = new EventLogTransformer(eventLog);

            // Fix inconsistent time stamps & transform the absolute time representation into a relative one
            logger.Info("Fixing Time Info in Log: " + logPath);
            transformer.fixDates();
            //eventLog.writeToFile(outPath + "\\sortedLog.txt");

            // Memory Check
            checkMemoryCount(logPath + ", Stage: Fix Dates");

            // Split Log into multiple Logs, each covering an increasing Interval (i.e. last Log is equivalent to original Log)
            logger.Info("Splitting Log additively: " + logPath);
            // List<EventLog> splitMergedLogs = transformer.splitAndMerge(Settings.eventSplitCount);
            List <EventLog> splitLogs  = transformer.splitLogByTimeAdditive(Settings.timeStepMS);
            int             numSamples = splitLogs.Count; // Number of Logs = Number of Intervals = Number of Samples for Features

            logger.Info("Split Done. Split into " + numSamples + " Logs.");

            // Memory Check
            checkMemoryCount(logPath + ", Stage: Split Logs");

            // Only start advanced Processing of Interval Logs, if we have a sufficient Number of Samples
            if (numSamples >= Settings.MIN_SAMPLES)
            {
                Dictionary <string, string> processedIDs = new Dictionary <string, string>();
                IEnumerable <DFTNode>       verts        = new List <DFTNode>();
                DFTGraph lastGraph      = new DFTGraph();
                long     start          = -1;
                long     end            = splitLogs.Count + 10;
                bool     monitoredFound = false;

                // For each Interval (partial) Log
                for (int i = 0; i < splitLogs.Count; i++)
                {
                    // Memory Check
                    checkMemoryCount(logPath + ", Stage: Partial Log " + i);

                    if (monitoredFound || splitLogs[i].containsMonitoredProcess())
                    {
                        logger.Debug("Contains Monitored " + i);
                        if (start == -1)
                        {
                            start          = i;
                            monitoredFound = true;
                        }

                        logger.Info("Generating SubGraph " + i + "/" + splitLogs.Count);

                        // TODO Investigate possible "Bug" in EventProcessor
                        // At this point, somehow Graphs or Nodes/Edges that were generated in
                        // previous calls of this method can leak state into this call.
                        // This can be verified by calling generateGraphFromString
                        // multiple times for different Logs and directly outputting the graph given by
                        // the EventProcessor.generateGraphFromString(<someLog>.ToString()) Method.

                        // One can sometimes observe nodes in the graph that are not present in any event of "<someLog>".
                        // Further investigation shows that these nodes were however present in Logs previously
                        // processed by the EventProcessor. Therefore it seems like the EventProcessor
                        // is not working with a comepleteley fresh or cleared state when calling generateGraphFromString
                        // multiple times. Even though one would not expect that from a static Method.
                        // The problem seems to be solvable by creating a new instance of EventProcessor and (possibly?)
                        // calling ep.freeRessources(). However this quickly leads to the next problem...

                        //  The ressources (memory) used by the EventProcessor are seemingly not garbage collected,
                        // when the reference is nulled. This might be due to how Large Object Heap Collection works.
                        // Processing multiple logs in a single EventProcessor instance
                        // is not possible due to the "shared state" problem mentioned above. However, creating a new instance of
                        // EventProcessor every time we need to process a log, is not a good solution either, because
                        // memory from expired instances of EventProcessor is apparently not released.

                        // Therefore one is forced to restart the program for each Log (frees memory) and also
                        // choose a time step that leads to a number of partial Logs that fit into memory. Smaller time step
                        // will result in more partial Logs and higher memory requirements (since we need to create
                        // a new EventProcessor instance for each partial log.) Calling ep.freeRessources() for
                        // each partial Log does not seem to do anything, neither does nulling the EventProcessor object.)

                        // Debug: Output the Log for the current Interval
                        // File.WriteAllText(Settings.outputDirectory + "\\" + outPath + "\\input-" + i + ".txt", splitMergedLogs[i].ToString());

                        /* ????????????????????????????????????????????????????????????????????????????? */
                        /* ????????????????????????????????????????????????????????????????????????????? */
                        /* When exactly are we supposed to call this ? */
                        ep = new EventProcessor();
                        ep.freeResources();

                        // generate QDFG for this time instant (step / snapshot)
                        lastGraph = EventProcessor.generateGraphFromString(splitLogs[i].ToString());
                        DFTGraph workingGraph = new DFTGraph(lastGraph);
                        // Debug Output graphical representation of the Graph for the current Interval
                        //File.WriteAllText(Settings.outputDirectory + "\\" + outPath + "\\input-" + i + ".graphml",lastGraph.serializeGraphML());

                        // if stat collection is enabled, add stats for this time instant to the collection
                        if (computeStats)
                        {
                            metrics.decorate(workingGraph);
                            stats.addStats(workingGraph, i);
                            foreach (DFTNode n in workingGraph.Vertices)
                            {
                                if (n.nameFTR.Contains("malware"))
                                {
                                    logger.Debug("Node Features For " + n.nameFTR);
                                    foreach (DFTNodeFeature f in n.nodeFeatures)
                                    {
                                        logger.Debug(f.name);
                                    }
                                    logger.Debug("End of Node Features.");
                                }
                            }
                        }


                        verts = lastGraph.Vertices;

                        foreach (DFTNode n in verts)
                        {
                            if (!processedIDs.ContainsKey(n.nameFTR))
                            {
                                /* Somehow this information persists through separate method calls...
                                 * See advanced problem description above.
                                 * DFTNodeAttribute startTime = new DFTNodeAttribute();
                                 * startTime.name = "start";
                                 * startTime.value = i.ToString();
                                 * n.nodeProperties.Add(startTime);
                                 */

                                // Workaround: Pass a dictionary containing the relevant time information to the GEXF Engine
                                processedIDs.Add(n.nameFTR, i.ToString());
                                logger.Debug("ADDED NEW NODE: " + n.nameFTR + " -> " + i.ToString());
                            }
                        }

                        logger.Debug("IDs in Dict: " + processedIDs.Count);
                        //splitMergedLogs[i].writeToFile(outPath + "\\SplitAndMergedLog-" + i + ".txt");
                        //String gSerialized = g.serializeGraphML();
                        //Utility.writeGraphToFile(gSerialized, outPath + "\\SplitAndMergedLog-" + i + ".graphml");
                    }
                }
                GEXFWriter.writeGraph(lastGraph, outPath + "\\dynamic.gexf", true, start, end, processedIDs);
                File.WriteAllText(Settings.outputDirectory + "\\" + outPath + "\\final" + ".graphml", lastGraph.serializeGraphML());
                ep = new EventProcessor();
                ep.freeResources();
                logger.Warn("Wrote Dynamic Graph for: " + logPath);

                if (computeStats)
                {
                    String statsFile = Settings.outputDirectory + "\\" + diOutput.Name + "\\" + "stats.txt";
                    stats.writeStatsToFile(statsFile);
                    logger.Warn("Wrote Stats for: " + logPath + " to file: " + statsFile);
                }

                if (Settings.generateModel)
                {
                    ModelBuilder.addModelData(stats, diOutput.Name);
                }
            }

            else
            {
                logger.Fatal(logPath + ": Not Enough Active Samples (" + numSamples + ", MIN: " + Settings.MIN_SAMPLES + ")");
            }
        }
Example #2
0
        // Construct Model Data from Node and Statistics
        public static ModelData computeModelDataForNode(DFTNode n, QDFGStatCollection stats)
        {
            // The Model Data Object
            ModelData modelData = null;

            // Lists holding certain Statistics for each time instance (sampled data)
            Dictionary <String, List <Double> > timedFeatureList = new Dictionary <String, List <Double> >();

            // Obtain the Collection of Vertex Statistics for this Node
            VertexStatCollection nodeStats = stats.getVertexStatCollectionByID(n.node_id);

            if (nodeStats != null) // Make sure we were able to obtain the Vertex Statistics for this Node
            {
                // Obtain Vertex Stats (of this Node), for all recorded Time Instances
                Dictionary <int, VertexStats> vertexStatsByTime = nodeStats.getAllStats();

                // Iterate over all recorded Time Instances
                foreach (VertexStats vs in vertexStatsByTime.Values)
                {
                    // Iterate over all Features within the FeatureSet of this Vertex Stats Object
                    foreach (Feature feature in vs.FeatureSet.Values)
                    {
                        /*
                         * if (feature.Value >= 0) // Filter negative feature Values
                         * {*/
                        // Get the assigned List for this feature
                        if (timedFeatureList.ContainsKey(feature.Name))
                        {
                            // Add Feature Value for this time Instance to the List
                            timedFeatureList[feature.Name].Add(feature.Value);
                        }

                        else // No List found for this feature
                        {
                            // Create new List for this Feature
                            timedFeatureList.Add(feature.Name, new List <Double>());
                            // Add Feature Value for this time Instance to the List
                            timedFeatureList[feature.Name].Add(feature.Value);
                        }
                        //}
                    }
                }

                // Create the Model Data Object using the time sampled Data
                modelData = new ModelData(n.nameFTR, timedFeatureList);
            }

            else // nodeStats == null
            {
                logger.Fatal("Unable to obtain Vertex Statistics Collection for Node " + n.nameFTR + "; Therefore, no Model Data was produced.");
                return(null);
            }

            // Evaluate Model Data Constraints
            foreach (KeyValuePair <String, List <Double> > kvp in modelData.TimedFeatureCollection)
            {
                if (kvp.Value.Count < Settings.MIN_SAMPLES)
                {
                    logger.Fatal(modelData.Name + ": Model Data for does not contain enough samples for Statistic <" + kvp.Key + ">. Samples: " + kvp.Value.Count + ", Minimum: " + Settings.MIN_SAMPLES);
                    return(null);
                }
            }

            return(modelData);
        }
Example #3
0
        public static void addModelData(QDFGStatCollection stats, String name)
        {
            Console.WriteLine("Model Data Name: " + name);
            // Classifier String for Node
            String classifier = "malware";

            // List of relevant Nodes (goodware/malware)
            List <DFTNode> relevantNodes = stats.getNodesByKeyword("malware");

            if (!(relevantNodes.Count > 0))
            {
                relevantNodes = stats.getNodesByKeyword("goodware");
                classifier    = "goodware";
            }

            if (!(relevantNodes.Count > 0))
            {
                logger.Fatal(name + ": FAIL - Unable to add model data, Graph contains no malware or goodware Nodes.");
                return;
            }

            // List holding all Model Data, one for each relevant Node (i.e. goodware/malware processes)
            List <ModelData> modelData = new List <ModelData>();

            // Iterate over all relevant Nodes
            foreach (DFTNode n in relevantNodes)
            {
                if (n.typeEnum == DFTNodeType.PROCESS) // Node must be a process
                {
                    // Compute the Model Data for this Node
                    ModelData m = computeModelDataForNode(n, stats);

                    if (m != null)
                    {
                        // Add Model Data to the List
                        modelData.Add(m);
                        logger.Debug(name + ": OK. Model Data added.");
                        // DEBUG Output
                        // Console.WriteLine(m.getStatisticsReadableString());
                        // printMetrics(n);
                    }
                }
            }

            if (modelData.Count > 0)
            {
                // Path of the Model Data File

                String modelDataPath = Path.Combine(Settings.outputDirectory, name, Settings.ModelDataFile); //Settings.outputDirectory + "\\" + name + "\\" + Settings.ModelDataFile;
                String timeDataPath  = Path.Combine(Settings.outputDirectory, name, Settings.TimeDataFile);  //Settings.outputDirectory + "\\" + name + + "\\" + Settings.TimeDataFile;


                /*
                 * int count = 2;
                 * while (File.Exists(timeDataPath))
                 * {
                 *  Console.WriteLine("!!!!   FILE EXISTS: " + timeDataPath);
                 *  timeDataPath = Settings.outputDirectory + "\\" + "TimeData" + count + ".csv";
                 *  count++;
                 * }*/

                // logger.Fatal("Path: " + timeDataPath);
                try
                {
                    StringBuilder mdString = new StringBuilder();
                    StringBuilder tdString = new StringBuilder();

                    // If Model Data File does not exist, create it and add Header Line
                    if (!File.Exists(modelDataPath))
                    {
                        File.AppendAllText(modelDataPath, modelData.First().getHeaderString(Settings.MODEL_DATA_TYPE) + "\r\n");
                    }

                    // Construct Model Data String
                    foreach (ModelData m in modelData)
                    {
                        mdString.AppendLine(m.getModelDataString(Settings.MODEL_DATA_TYPE, classifier));

                        // Construct Time Data String
                        String line = TimeData.getStringTimeData(Settings.TIME_DATA_TYPE, m);
                        tdString.AppendLine(line);
                        //logger.Fatal(line);
                    }

                    // Append the constructed String to Model Data File
                    File.AppendAllText(modelDataPath, mdString.ToString());
                    File.AppendAllText(timeDataPath, tdString.ToString());
                }

                catch (ArgumentException e)
                {
                    logger.Fatal(e.Message);
                }

                catch (IOException e)
                {
                    logger.Fatal(e.Message);
                }
            }

            else
            {
                logger.Fatal(name + ": FAIL - no malware or goodware processes or insufficient samples.");
            }
        }
        /// <summary>
        /// Actual Implementation of the StaticLogProcessor.
        /// Generate Graph from Log File. Optionally generate Statistics and DataModel.
        /// </summary>
        /// <param name="logPath">Full Path to the Log File.</param>
        /// <param name="diOutput">DirectoryInfo of the output Directory.</param>
        /// <param name="computeStats">true if Statistics and Model should be generated; otherwise, false.</param>
        private void processLogStatic(String logPath, DirectoryInfo diOutput, bool computeStats)
        {
            Settings.MIN_SAMPLES = 1;  // Static Logs only consist of 1 sample (the final state of the graph)
            Settings.timeStepMS  = -1; // Time is not considered for static graphs.

            // Reset the external EventProcessor (LibDFT)
            EventProcessor ep = new EventProcessor();

            ep.freeResources();

            logger.Info("Processing Log (Static): " + logPath);

            // Init EventLog, Statistics & Metrics
            EventLog           eventLog = new EventLog(logPath);
            QDFGStatCollection stats    = new QDFGStatCollection();
            CombinedMetrics    metrics  = new CombinedMetrics();

            // Subpath for the Output Directory of this particular dynamic Log
            String outPath = diOutput.Name + "\\staticLog";

            // Pre-process log file (Create Output Directories, Verify Log contains Nodes of interest)
            if (!preprocessLog(eventLog, logPath, outPath))
            {
                return;
            }

            // Memory Check
            checkMemoryCount(logPath + ", Stage: init");

            // Create Transformer for Event Log; Used to perform operations on EventLog
            EventLogTransformer transformer = new EventLogTransformer(eventLog);

            // Fix inconsistent time stamps & transform the absolute time representation into a relative one
            logger.Info("Fixing Time Info in Log: " + logPath);
            transformer.fixDates();
            //eventLog.writeToFile(outPath + "\\sortedLog.txt");

            // Memory Check
            checkMemoryCount(logPath + ", Stage: Fix Dates");

            DFTGraph g = EventProcessor.generateGraphFromString(eventLog.ToString());

            Utility.Graph.writeGraphToFile(g.serializeGraphML(), diOutput.Name + "\\StaticGraphG_" + Utility.IO.getFileNameFromPath(logPath) + ".graphml");
            logger.Warn("Wrote Static Graph for: " + logPath);
            DFTGraph workingGraph = new DFTGraph(g);

            // if stat collection is enabled, add stats for this time instant to the collection
            if (computeStats)
            {
                logger.Info("Decorating graph: " + logPath);
                metrics.decorate(workingGraph);
                stats.addStats(workingGraph, 0);

                // logger.Info("Metrics for " + logPath);
                // printMetrics(workingGraph);
            }


            ep = new EventProcessor();
            ep.freeResources();

            if (computeStats)
            {
                String statsFile = Settings.outputDirectory + "\\" + diOutput.Name + "\\" + "stats.txt";
                stats.writeStatsToFile(statsFile);
                logger.Warn("Wrote Stats for: " + logPath + " to file: " + statsFile);
            }

            if (Settings.generateModel)
            {
                ModelBuilder.addModelData(stats, diOutput.Name);
            }
        }