/// <summary> /// Actual Implementation of the DynamicLogProcessor. /// Generate Dynamic Graph from Log File. Optionally generate Statistics and DataModel. /// </summary> /// <param name="logPath">Full Path to the Log File.</param> /// <param name="diOutput">DirectoryInfo of the output Directory.</param> /// <param name="computeStats">true if Statistics and Model should be generated; otherwise, false.</param> private void processLogDynamic(String logPath, DirectoryInfo diOutput, bool computeStats) { // Reset the external EventProcessor (LibDFT) EventProcessor ep = new EventProcessor(); ep.freeResources(); logger.Info("Processing Log for Dynamics: " + logPath); // Init EventLog, Statistics & Metrics EventLog eventLog = new EventLog(logPath); QDFGStatCollection stats = new QDFGStatCollection(); CombinedMetrics metrics = new CombinedMetrics(); // Subpath for the Output Directory of this particular dynamic Log String outPath = diOutput.Name + "\\dynamicLog"; // Pre-process log file (Create Output Directories, Verify Log contains Nodes of interest) if (!preprocessLog(eventLog, logPath, outPath)) { return; } // Memory Check checkMemoryCount(logPath + ", Stage: init"); // Create Transformer for Event Log; Used to perform operations on EventLog EventLogTransformer transformer = new EventLogTransformer(eventLog); // Fix inconsistent time stamps & transform the absolute time representation into a relative one logger.Info("Fixing Time Info in Log: " + logPath); transformer.fixDates(); //eventLog.writeToFile(outPath + "\\sortedLog.txt"); // Memory Check checkMemoryCount(logPath + ", Stage: Fix Dates"); // Split Log into multiple Logs, each covering an increasing Interval (i.e. last Log is equivalent to original Log) logger.Info("Splitting Log additively: " + logPath); // List<EventLog> splitMergedLogs = transformer.splitAndMerge(Settings.eventSplitCount); List <EventLog> splitLogs = transformer.splitLogByTimeAdditive(Settings.timeStepMS); int numSamples = splitLogs.Count; // Number of Logs = Number of Intervals = Number of Samples for Features logger.Info("Split Done. Split into " + numSamples + " Logs."); // Memory Check checkMemoryCount(logPath + ", Stage: Split Logs"); // Only start advanced Processing of Interval Logs, if we have a sufficient Number of Samples if (numSamples >= Settings.MIN_SAMPLES) { Dictionary <string, string> processedIDs = new Dictionary <string, string>(); IEnumerable <DFTNode> verts = new List <DFTNode>(); DFTGraph lastGraph = new DFTGraph(); long start = -1; long end = splitLogs.Count + 10; bool monitoredFound = false; // For each Interval (partial) Log for (int i = 0; i < splitLogs.Count; i++) { // Memory Check checkMemoryCount(logPath + ", Stage: Partial Log " + i); if (monitoredFound || splitLogs[i].containsMonitoredProcess()) { logger.Debug("Contains Monitored " + i); if (start == -1) { start = i; monitoredFound = true; } logger.Info("Generating SubGraph " + i + "/" + splitLogs.Count); // TODO Investigate possible "Bug" in EventProcessor // At this point, somehow Graphs or Nodes/Edges that were generated in // previous calls of this method can leak state into this call. // This can be verified by calling generateGraphFromString // multiple times for different Logs and directly outputting the graph given by // the EventProcessor.generateGraphFromString(<someLog>.ToString()) Method. // One can sometimes observe nodes in the graph that are not present in any event of "<someLog>". // Further investigation shows that these nodes were however present in Logs previously // processed by the EventProcessor. Therefore it seems like the EventProcessor // is not working with a comepleteley fresh or cleared state when calling generateGraphFromString // multiple times. Even though one would not expect that from a static Method. // The problem seems to be solvable by creating a new instance of EventProcessor and (possibly?) // calling ep.freeRessources(). However this quickly leads to the next problem... // The ressources (memory) used by the EventProcessor are seemingly not garbage collected, // when the reference is nulled. This might be due to how Large Object Heap Collection works. // Processing multiple logs in a single EventProcessor instance // is not possible due to the "shared state" problem mentioned above. However, creating a new instance of // EventProcessor every time we need to process a log, is not a good solution either, because // memory from expired instances of EventProcessor is apparently not released. // Therefore one is forced to restart the program for each Log (frees memory) and also // choose a time step that leads to a number of partial Logs that fit into memory. Smaller time step // will result in more partial Logs and higher memory requirements (since we need to create // a new EventProcessor instance for each partial log.) Calling ep.freeRessources() for // each partial Log does not seem to do anything, neither does nulling the EventProcessor object.) // Debug: Output the Log for the current Interval // File.WriteAllText(Settings.outputDirectory + "\\" + outPath + "\\input-" + i + ".txt", splitMergedLogs[i].ToString()); /* ????????????????????????????????????????????????????????????????????????????? */ /* ????????????????????????????????????????????????????????????????????????????? */ /* When exactly are we supposed to call this ? */ ep = new EventProcessor(); ep.freeResources(); // generate QDFG for this time instant (step / snapshot) lastGraph = EventProcessor.generateGraphFromString(splitLogs[i].ToString()); DFTGraph workingGraph = new DFTGraph(lastGraph); // Debug Output graphical representation of the Graph for the current Interval //File.WriteAllText(Settings.outputDirectory + "\\" + outPath + "\\input-" + i + ".graphml",lastGraph.serializeGraphML()); // if stat collection is enabled, add stats for this time instant to the collection if (computeStats) { metrics.decorate(workingGraph); stats.addStats(workingGraph, i); foreach (DFTNode n in workingGraph.Vertices) { if (n.nameFTR.Contains("malware")) { logger.Debug("Node Features For " + n.nameFTR); foreach (DFTNodeFeature f in n.nodeFeatures) { logger.Debug(f.name); } logger.Debug("End of Node Features."); } } } verts = lastGraph.Vertices; foreach (DFTNode n in verts) { if (!processedIDs.ContainsKey(n.nameFTR)) { /* Somehow this information persists through separate method calls... * See advanced problem description above. * DFTNodeAttribute startTime = new DFTNodeAttribute(); * startTime.name = "start"; * startTime.value = i.ToString(); * n.nodeProperties.Add(startTime); */ // Workaround: Pass a dictionary containing the relevant time information to the GEXF Engine processedIDs.Add(n.nameFTR, i.ToString()); logger.Debug("ADDED NEW NODE: " + n.nameFTR + " -> " + i.ToString()); } } logger.Debug("IDs in Dict: " + processedIDs.Count); //splitMergedLogs[i].writeToFile(outPath + "\\SplitAndMergedLog-" + i + ".txt"); //String gSerialized = g.serializeGraphML(); //Utility.writeGraphToFile(gSerialized, outPath + "\\SplitAndMergedLog-" + i + ".graphml"); } } GEXFWriter.writeGraph(lastGraph, outPath + "\\dynamic.gexf", true, start, end, processedIDs); File.WriteAllText(Settings.outputDirectory + "\\" + outPath + "\\final" + ".graphml", lastGraph.serializeGraphML()); ep = new EventProcessor(); ep.freeResources(); logger.Warn("Wrote Dynamic Graph for: " + logPath); if (computeStats) { String statsFile = Settings.outputDirectory + "\\" + diOutput.Name + "\\" + "stats.txt"; stats.writeStatsToFile(statsFile); logger.Warn("Wrote Stats for: " + logPath + " to file: " + statsFile); } if (Settings.generateModel) { ModelBuilder.addModelData(stats, diOutput.Name); } } else { logger.Fatal(logPath + ": Not Enough Active Samples (" + numSamples + ", MIN: " + Settings.MIN_SAMPLES + ")"); } }
// Construct Model Data from Node and Statistics public static ModelData computeModelDataForNode(DFTNode n, QDFGStatCollection stats) { // The Model Data Object ModelData modelData = null; // Lists holding certain Statistics for each time instance (sampled data) Dictionary <String, List <Double> > timedFeatureList = new Dictionary <String, List <Double> >(); // Obtain the Collection of Vertex Statistics for this Node VertexStatCollection nodeStats = stats.getVertexStatCollectionByID(n.node_id); if (nodeStats != null) // Make sure we were able to obtain the Vertex Statistics for this Node { // Obtain Vertex Stats (of this Node), for all recorded Time Instances Dictionary <int, VertexStats> vertexStatsByTime = nodeStats.getAllStats(); // Iterate over all recorded Time Instances foreach (VertexStats vs in vertexStatsByTime.Values) { // Iterate over all Features within the FeatureSet of this Vertex Stats Object foreach (Feature feature in vs.FeatureSet.Values) { /* * if (feature.Value >= 0) // Filter negative feature Values * {*/ // Get the assigned List for this feature if (timedFeatureList.ContainsKey(feature.Name)) { // Add Feature Value for this time Instance to the List timedFeatureList[feature.Name].Add(feature.Value); } else // No List found for this feature { // Create new List for this Feature timedFeatureList.Add(feature.Name, new List <Double>()); // Add Feature Value for this time Instance to the List timedFeatureList[feature.Name].Add(feature.Value); } //} } } // Create the Model Data Object using the time sampled Data modelData = new ModelData(n.nameFTR, timedFeatureList); } else // nodeStats == null { logger.Fatal("Unable to obtain Vertex Statistics Collection for Node " + n.nameFTR + "; Therefore, no Model Data was produced."); return(null); } // Evaluate Model Data Constraints foreach (KeyValuePair <String, List <Double> > kvp in modelData.TimedFeatureCollection) { if (kvp.Value.Count < Settings.MIN_SAMPLES) { logger.Fatal(modelData.Name + ": Model Data for does not contain enough samples for Statistic <" + kvp.Key + ">. Samples: " + kvp.Value.Count + ", Minimum: " + Settings.MIN_SAMPLES); return(null); } } return(modelData); }
public static void addModelData(QDFGStatCollection stats, String name) { Console.WriteLine("Model Data Name: " + name); // Classifier String for Node String classifier = "malware"; // List of relevant Nodes (goodware/malware) List <DFTNode> relevantNodes = stats.getNodesByKeyword("malware"); if (!(relevantNodes.Count > 0)) { relevantNodes = stats.getNodesByKeyword("goodware"); classifier = "goodware"; } if (!(relevantNodes.Count > 0)) { logger.Fatal(name + ": FAIL - Unable to add model data, Graph contains no malware or goodware Nodes."); return; } // List holding all Model Data, one for each relevant Node (i.e. goodware/malware processes) List <ModelData> modelData = new List <ModelData>(); // Iterate over all relevant Nodes foreach (DFTNode n in relevantNodes) { if (n.typeEnum == DFTNodeType.PROCESS) // Node must be a process { // Compute the Model Data for this Node ModelData m = computeModelDataForNode(n, stats); if (m != null) { // Add Model Data to the List modelData.Add(m); logger.Debug(name + ": OK. Model Data added."); // DEBUG Output // Console.WriteLine(m.getStatisticsReadableString()); // printMetrics(n); } } } if (modelData.Count > 0) { // Path of the Model Data File String modelDataPath = Path.Combine(Settings.outputDirectory, name, Settings.ModelDataFile); //Settings.outputDirectory + "\\" + name + "\\" + Settings.ModelDataFile; String timeDataPath = Path.Combine(Settings.outputDirectory, name, Settings.TimeDataFile); //Settings.outputDirectory + "\\" + name + + "\\" + Settings.TimeDataFile; /* * int count = 2; * while (File.Exists(timeDataPath)) * { * Console.WriteLine("!!!! FILE EXISTS: " + timeDataPath); * timeDataPath = Settings.outputDirectory + "\\" + "TimeData" + count + ".csv"; * count++; * }*/ // logger.Fatal("Path: " + timeDataPath); try { StringBuilder mdString = new StringBuilder(); StringBuilder tdString = new StringBuilder(); // If Model Data File does not exist, create it and add Header Line if (!File.Exists(modelDataPath)) { File.AppendAllText(modelDataPath, modelData.First().getHeaderString(Settings.MODEL_DATA_TYPE) + "\r\n"); } // Construct Model Data String foreach (ModelData m in modelData) { mdString.AppendLine(m.getModelDataString(Settings.MODEL_DATA_TYPE, classifier)); // Construct Time Data String String line = TimeData.getStringTimeData(Settings.TIME_DATA_TYPE, m); tdString.AppendLine(line); //logger.Fatal(line); } // Append the constructed String to Model Data File File.AppendAllText(modelDataPath, mdString.ToString()); File.AppendAllText(timeDataPath, tdString.ToString()); } catch (ArgumentException e) { logger.Fatal(e.Message); } catch (IOException e) { logger.Fatal(e.Message); } } else { logger.Fatal(name + ": FAIL - no malware or goodware processes or insufficient samples."); } }
/// <summary> /// Actual Implementation of the StaticLogProcessor. /// Generate Graph from Log File. Optionally generate Statistics and DataModel. /// </summary> /// <param name="logPath">Full Path to the Log File.</param> /// <param name="diOutput">DirectoryInfo of the output Directory.</param> /// <param name="computeStats">true if Statistics and Model should be generated; otherwise, false.</param> private void processLogStatic(String logPath, DirectoryInfo diOutput, bool computeStats) { Settings.MIN_SAMPLES = 1; // Static Logs only consist of 1 sample (the final state of the graph) Settings.timeStepMS = -1; // Time is not considered for static graphs. // Reset the external EventProcessor (LibDFT) EventProcessor ep = new EventProcessor(); ep.freeResources(); logger.Info("Processing Log (Static): " + logPath); // Init EventLog, Statistics & Metrics EventLog eventLog = new EventLog(logPath); QDFGStatCollection stats = new QDFGStatCollection(); CombinedMetrics metrics = new CombinedMetrics(); // Subpath for the Output Directory of this particular dynamic Log String outPath = diOutput.Name + "\\staticLog"; // Pre-process log file (Create Output Directories, Verify Log contains Nodes of interest) if (!preprocessLog(eventLog, logPath, outPath)) { return; } // Memory Check checkMemoryCount(logPath + ", Stage: init"); // Create Transformer for Event Log; Used to perform operations on EventLog EventLogTransformer transformer = new EventLogTransformer(eventLog); // Fix inconsistent time stamps & transform the absolute time representation into a relative one logger.Info("Fixing Time Info in Log: " + logPath); transformer.fixDates(); //eventLog.writeToFile(outPath + "\\sortedLog.txt"); // Memory Check checkMemoryCount(logPath + ", Stage: Fix Dates"); DFTGraph g = EventProcessor.generateGraphFromString(eventLog.ToString()); Utility.Graph.writeGraphToFile(g.serializeGraphML(), diOutput.Name + "\\StaticGraphG_" + Utility.IO.getFileNameFromPath(logPath) + ".graphml"); logger.Warn("Wrote Static Graph for: " + logPath); DFTGraph workingGraph = new DFTGraph(g); // if stat collection is enabled, add stats for this time instant to the collection if (computeStats) { logger.Info("Decorating graph: " + logPath); metrics.decorate(workingGraph); stats.addStats(workingGraph, 0); // logger.Info("Metrics for " + logPath); // printMetrics(workingGraph); } ep = new EventProcessor(); ep.freeResources(); if (computeStats) { String statsFile = Settings.outputDirectory + "\\" + diOutput.Name + "\\" + "stats.txt"; stats.writeStatsToFile(statsFile); logger.Warn("Wrote Stats for: " + logPath + " to file: " + statsFile); } if (Settings.generateModel) { ModelBuilder.addModelData(stats, diOutput.Name); } }