public override (ClassificationData Data, string Output) FeatureExtraction(ClassifierResponseItem response) { var classificationData = new ClassificationData { NGramText = GetStrings(response.Data, 0, 65536), FileGroupType = (int)response.FileGroup }; return(classificationData, $"\"{classificationData.NGramText}\"\t{(int)response.FileGroup}"); }
public override (ClusterData Data, string Output) FeatureExtraction(ClassifierResponseItem response) { var clusterData = new ClusterData { StartStringData = string.Empty, EndStringData = string.Empty }; clusterData.StartStringData = GetStrings(response.Data, 0, STRING_BYTE_MINIMUM); clusterData.EndStringData = GetStrings(response.Data, response.Data.Length - STRING_BYTE_MINIMUM, STRING_BYTE_MINIMUM); return(clusterData, $"{(int)response.FileGroup},{clusterData.StartStringData},{clusterData.EndStringData}"); }
public ClassifierResponseItem Predict(ClassifierResponseItem response, ClassifierCommandLineOptions options) { if (response == null) { throw new ArgumentNullException(nameof(response)); } var assembly = typeof(BasePredictionData).GetTypeInfo().Assembly; var resource = assembly.GetManifestResourceStream($"FileClassifier.lib.Models.{MODEL_NAME}"); var model = MlContext.Model.Load(resource, out var schema); var predictor = MlContext.Model.CreatePredictionEngine <T, TK>(model); var(data, _) = FeatureExtraction(response); var result = predictor.Predict(data); return(UpdateResponse(result, response, options)); }
protected string FeatureExtractFolder(TrainerCommandLineOptions options) { var fileName = Path.Combine(AppContext.BaseDirectory, $"{DateTime.Now.Ticks}.txt"); var files = Directory.GetFiles(options.FolderOfData); Logger <TrainerCommandLineOptions> .Debug($"{files.Length} Files found for training...", options); var stopWatch = DateTime.Now; var extractions = new ConcurrentQueue <string>(); var classifications = new ConcurrentQueue <FileGroupType>(); Parallel.ForEach(files, file => { var response = new ClassifierResponseItem(File.ReadAllBytes(file), file, true); var(data, output) = FeatureExtraction(response); classifications.Enqueue(response.FileGroup); extractions.Enqueue(output); }); File.WriteAllText(fileName, string.Join(System.Environment.NewLine, extractions)); var featureBreakdown = (from classification in classifications.GroupBy(a => a).Select(a => a.Key) let count = classifications.Count(a => a == classification) let percentage = Math.Round((double)count / files.Length * 100.0, 0) select $"{classification}: {(double) count} ({percentage}%)").ToList(); Logger <TrainerCommandLineOptions> .Debug(string.Join("|", featureBreakdown), options); Logger <TrainerCommandLineOptions> .Debug($"Feature Extraction took {DateTime.Now.Subtract(stopWatch).TotalSeconds} seconds", options); return(fileName); }
protected override ClassifierResponseItem UpdateResponse(ClusterDataPrediction prediction, ClassifierResponseItem response, ClassifierCommandLineOptions options) { response.FileGroup = (FileGroupType)prediction.PredictedClusterId; var distances = prediction.Distances.Select((t, x) => $"{(FileGroupType)x+1}:{t}").ToList(); Logger <ClassifierCommandLineOptions> .Debug($"Distances: {string.Join("|", distances)}", options); response.UpdateStatus(ClassifierStatus.SUCCESS); return(response); }
protected override ClassifierResponseItem UpdateResponse(ClassificationDataPrediction prediction, ClassifierResponseItem response, ClassifierCommandLineOptions options) { response.Confidence = prediction.Score; response.IsMalicious = prediction.Prediction; return(response); }
public abstract (T Data, string Output) FeatureExtraction(ClassifierResponseItem response);
protected abstract ClassifierResponseItem UpdateResponse(TK prediction, ClassifierResponseItem response, ClassifierCommandLineOptions options);
public void EmptyBytes() { var response = new ClassifierResponseItem(new byte[0], "fakeness"); }
public void NullTest() { var response = new ClassifierResponseItem(null, null); }