public override string ToString() { StringBuilder sb = new StringBuilder(); // start with header sb.Append("Name"); _featureNames.ForEach(featureName => sb.Append(_separator + featureName)); sb.AppendLine(_separator + "Class"); foreach (GenericNode node in _nodes) { sb.Append(node.Name); _featureNames.ForEach(featureName => { double value = node.getFeatureValue(featureName); if (!Double.IsNaN(value) && !Double.IsInfinity(value) && value != _missingValue) { //DDEBUG: sb.Append(_separator + featureName + ":" + value.ToString(_numberFormat)); sb.Append(_separator + value.ToString(_numberFormat)); } else { //DEBUG: sb.Append(_separator + featureName + ":" + _missingValueIdentifier); sb.Append(_separator + _missingValueIdentifier); } }); // Add the classifier Analyzer.NodeType nodeType = _analyzer.getNodeType(node.Name); if (nodeType == Analyzer.NodeType.GOODWARE) { sb.AppendLine(_separator + "goodware"); } else if (nodeType == Analyzer.NodeType.MALWARE) { sb.AppendLine(_separator + "malware"); } else { sb.AppendLine(_separator + "unknown"); } } return(sb.ToString()); }
/* * private void saveModelToFile(String filePath, MODEL model, String selectedNode, int samples) * { * // The basic set of features * List<String> baseFeatureSet = getBaseFeatures(); * * // Start with the base feature set * List<String> featureSet = baseFeatureSet; * * // Obtain the list of node names * List<String> nodeNames = this.getNodeNames(); * * // Use StringBuilder to build output * StringBuilder sb = new StringBuilder(); * * List<String> headerValues; * if (model == Model.MODEL.DYNAMIC) * { * headerValues = getBaseFeatures(samples); * } * else if (model == Model.MODEL.STATISTICAL || model == Model.MODEL.STATISTICAL_FILL) * { * headerValues = getStatisticalFeatures(false); * } * else if (model == Model.MODEL.STATISTICAL_COUNT || model == Model.MODEL.STATISTICAL_FILL_COUNT) * { * headerValues = getStatisticalFeatures(true); * } * else * { * headerValues = featureSet; * } * * int numFeatureValues = headerValues.Count; * // Append the header line (columns) * sb.Append("Name"); // first column is the name * headerValues.ForEach(feature => sb.Append(";" + feature)); // following columns are dynamic features * sb.AppendLine(";Class"); // last column is the classifier * * * * // Holds stat names and their list of values * Dictionary<string, List<Double>> nodeStats = new Dictionary<string, List<double>>(); * * * // For each node (line) * foreach (String nodeName in nodeNames) * { * if (selectedNode == "" || nodeName == selectedNode) * { * * List<String> processedFeatures = new List<String>(); * * // First column is the node name * sb.Append(nodeName); * * // Obtain node stats, store them in previously defined dictionary * nodeStats = this.getAllStatsForNode(nodeName); * * * // For each feature specified in our feature set * foreach (String feature in featureSet) * { * String featureBaseName = feature; * * // If we have valid statistics of this feature for the current node * if (nodeStats.ContainsKey(featureBaseName)) * { * if (!processedFeatures.Contains(featureBaseName)) // Feature was not processed previously * { * if (isBaseFeature(featureBaseName)) // Process base feature * { * if (model == Model.MODEL.STATIC) * { * double featureValue = nodeStats[featureBaseName].Last(); * if (featureValue >= 0) * sb.Append(";" + featureValue.ToString(Settings.DOUBLE_FORMAT)); * else * sb.Append(";" + Settings.MISSING_VALUE_IDENTIFIER); * } * else if (model == Model.MODEL.DYNAMIC) * { * int count = 0; * foreach (double featureValue in nodeStats[featureBaseName]) * { * if (count < samples) * { * if (featureValue >= 0) * sb.Append(";" + featureValue.ToString(Settings.DOUBLE_FORMAT)); * else * sb.Append(";" + Settings.MISSING_VALUE_IDENTIFIER); * * count++; * } * else * { * break; * } * } * * while (count < samples) * { * sb.Append(";" + Settings.MISSING_VALUE_IDENTIFIER); * count++; * } * } * else if (model == Model.MODEL.STATISTICAL || model == Model.MODEL.STATISTICAL_FILL || * model == Model.MODEL.STATISTICAL_COUNT || model == Model.MODEL.STATISTICAL_FILL_COUNT) * { * List<Double> featureValuesForStatistics = new List<Double>(); * int count = 0; * foreach (double featureValue in nodeStats[featureBaseName]) * { * if (count < samples) * { * if (featureValue >= 0) * { * featureValuesForStatistics.Add(featureValue); * count++; * } * } * else * { * break; * } * } * * if (model == Model.MODEL.STATISTICAL_FILL || model == Model.MODEL.STATISTICAL_FILL_COUNT) * { * while (count < samples) * { * featureValuesForStatistics.Add(0); * count++; * } * } * * // Build Statistic from model * // Count, Min, Max, Mean, StdDev, Variance, Skewness, Kurtosis * DescriptiveStatistics featureStatistics = new DescriptiveStatistics(featureValuesForStatistics); * List<Double> statValues = new List<Double>(); * * statValues.Add(featureStatistics.Minimum); * statValues.Add(featureStatistics.Maximum); * statValues.Add(featureStatistics.Mean); * statValues.Add(featureStatistics.StandardDeviation); * statValues.Add(featureStatistics.Variance); * statValues.Add(featureStatistics.Skewness); * statValues.Add(featureStatistics.Kurtosis); * * if (model == Model.MODEL.STATISTICAL_COUNT || model == Model.MODEL.STATISTICAL_FILL_COUNT) * { * sb.Append(";" + featureStatistics.Count); * } * * foreach (Double val in statValues) * { * if (Double.IsInfinity(val)) * sb.Append(";" + Settings.MISSING_VALUE_IDENTIFIER); * else if (Double.IsNaN(val)) * sb.Append(";" + Settings.MISSING_VALUE_IDENTIFIER); * else * sb.Append(";" + val.ToString(Settings.DOUBLE_FORMAT)); * } * } * else * { * throw new NotImplementedException("The chosen Model is not implemented."); * } * } * * processedFeatures.Add(featureBaseName); * } * } * else // append missing value * { * // Console.WriteLine("Node Stats for " + nodeName + ", does not contain stat: " + featureBaseName); * for (int i = 0; i < samples; i++) * sb.Append(";" + Settings.MISSING_VALUE_IDENTIFIER); * } * } * * // Add the classifier * Analyzer.NodeType nodeType = getNodeType(nodeName); * if (nodeType == NodeType.GOODWARE) * sb.Append(";goodware"); * else if (nodeType == NodeType.MALWARE) * sb.Append(";malware"); * else * sb.Append(";unknown"); * * sb.Append("\n"); * } * } * File.WriteAllText(filePath, sb.ToString()); * }*/ public void saveCoefficientsToFile(String filePath, String saveModel, Settings.TIME_MODEL timeModel, String selectedNode) { // The basic set of features List <String> baseFeatureSet = getBaseFeatures(); // Set of Features for the fit model List <String> fitFeatureSet = getFitFeatures(saveModel); // Each Coefficient of each Feature of the fit model List <String> coeffFeatureSet = getFitFeatureCoeffs(saveModel); // Start with the base feature set List <String> featureSet = baseFeatureSet; // Add the coefficient feature set featureSet.AddRange(coeffFeatureSet); // Obtain the list of node names List <String> nodeNames = this.getNodeNames(); // Use StringBuilder to build output StringBuilder sb = new StringBuilder(); // Append the header line (columns) sb.Append("Name"); // first column is the name featureSet.ForEach(feature => sb.Append(";" + feature)); // following columns are dynamic features sb.AppendLine(";Class"); // last column is the classifier // Holds stat names and their list of values Dictionary <string, List <Double> > nodeStats = new Dictionary <string, List <double> >(); // For each node (line) foreach (String nodeName in nodeNames) { if (selectedNode == "" || nodeName == selectedNode) { List <String> processedFeatures = new List <String>(); // First column is the node name sb.Append(nodeName); // Obtain node stats, store them in previously defined dictionary nodeStats = this.getAllStatsForNode(nodeName); // For each feature specified in our feature set foreach (String feature in featureSet) { String featureBaseName = feature; if (!isBaseFeature(feature)) { featureBaseName = getFitFeatureName(feature); } // If we have valid statistics of this feature for the current node if (nodeStats.ContainsKey(featureBaseName)) // Append value of each coefficient { if (!processedFeatures.Contains(featureBaseName)) // Feature was not processed previously { if (isBaseFeature(featureBaseName)) // Process base feature { if (timeModel == Settings.TIME_MODEL.LAST_VALUE) { sb.Append(";" + nodeStats[featureBaseName].Last().ToString(Settings.DOUBLE_FORMAT)); } else { nodeStats[featureBaseName].ForEach(timeValue => sb.Append(";" + timeValue.ToString(Settings.DOUBLE_FORMAT))); } } else // Process fit feature { nodeStats[featureBaseName].ForEach(coeffValue => sb.Append(";" + coeffValue.ToString(Settings.DOUBLE_FORMAT))); } processedFeatures.Add(featureBaseName); } } else // append missing value { // Console.WriteLine("Node Stats for " + nodeName + ", does not contain stat: " + featureBaseName); sb.Append(";" + Settings.MISSING_VALUE_IDENTIFIER); } } // Add the classifier Analyzer.NodeType nodeType = getNodeType(nodeName); if (nodeType == NodeType.GOODWARE) { sb.Append(";goodware"); } else if (nodeType == NodeType.MALWARE) { sb.Append(";malware"); } else { sb.Append(";unknown"); } sb.Append("\n"); } } File.WriteAllText(filePath, sb.ToString()); }