Esempio n. 1
0
        public override (ClassificationData Data, string Output) FeatureExtraction(ClassifierResponseItem response)
        {
            var classificationData = new ClassificationData
            {
                NGramText     = GetStrings(response.Data, 0, 65536),
                FileGroupType = (int)response.FileGroup
            };

            return(classificationData, $"\"{classificationData.NGramText}\"\t{(int)response.FileGroup}");
        }
Esempio n. 2
0
        public override (ClusterData Data, string Output) FeatureExtraction(ClassifierResponseItem response)
        {
            var clusterData = new ClusterData
            {
                StartStringData = string.Empty,
                EndStringData   = string.Empty
            };

            clusterData.StartStringData = GetStrings(response.Data, 0, STRING_BYTE_MINIMUM);
            clusterData.EndStringData   = GetStrings(response.Data, response.Data.Length - STRING_BYTE_MINIMUM, STRING_BYTE_MINIMUM);

            return(clusterData, $"{(int)response.FileGroup},{clusterData.StartStringData},{clusterData.EndStringData}");
        }
Esempio n. 3
0
        public ClassifierResponseItem Predict(ClassifierResponseItem response, ClassifierCommandLineOptions options)
        {
            if (response == null)
            {
                throw new ArgumentNullException(nameof(response));
            }

            var assembly = typeof(BasePredictionData).GetTypeInfo().Assembly;

            var resource = assembly.GetManifestResourceStream($"FileClassifier.lib.Models.{MODEL_NAME}");

            var model = MlContext.Model.Load(resource, out var schema);

            var predictor = MlContext.Model.CreatePredictionEngine <T, TK>(model);

            var(data, _) = FeatureExtraction(response);

            var result = predictor.Predict(data);

            return(UpdateResponse(result, response, options));
        }
Esempio n. 4
0
        protected string FeatureExtractFolder(TrainerCommandLineOptions options)
        {
            var fileName = Path.Combine(AppContext.BaseDirectory, $"{DateTime.Now.Ticks}.txt");

            var files = Directory.GetFiles(options.FolderOfData);

            Logger <TrainerCommandLineOptions> .Debug($"{files.Length} Files found for training...", options);

            var stopWatch = DateTime.Now;

            var extractions = new ConcurrentQueue <string>();

            var classifications = new ConcurrentQueue <FileGroupType>();

            Parallel.ForEach(files, file =>
            {
                var response = new ClassifierResponseItem(File.ReadAllBytes(file), file, true);

                var(data, output) = FeatureExtraction(response);

                classifications.Enqueue(response.FileGroup);

                extractions.Enqueue(output);
            });

            File.WriteAllText(fileName, string.Join(System.Environment.NewLine, extractions));

            var featureBreakdown = (from classification in classifications.GroupBy(a => a).Select(a => a.Key)
                                    let count = classifications.Count(a => a == classification)
                                                let percentage = Math.Round((double)count / files.Length * 100.0, 0)
                                                                 select $"{classification}: {(double) count} ({percentage}%)").ToList();

            Logger <TrainerCommandLineOptions> .Debug(string.Join("|", featureBreakdown), options);

            Logger <TrainerCommandLineOptions> .Debug($"Feature Extraction took {DateTime.Now.Subtract(stopWatch).TotalSeconds} seconds", options);

            return(fileName);
        }
Esempio n. 5
0
        protected override ClassifierResponseItem UpdateResponse(ClusterDataPrediction prediction, ClassifierResponseItem response, ClassifierCommandLineOptions options)
        {
            response.FileGroup = (FileGroupType)prediction.PredictedClusterId;

            var distances = prediction.Distances.Select((t, x) => $"{(FileGroupType)x+1}:{t}").ToList();

            Logger <ClassifierCommandLineOptions> .Debug($"Distances: {string.Join("|", distances)}", options);

            response.UpdateStatus(ClassifierStatus.SUCCESS);

            return(response);
        }
Esempio n. 6
0
        protected override ClassifierResponseItem UpdateResponse(ClassificationDataPrediction prediction, ClassifierResponseItem response, ClassifierCommandLineOptions options)
        {
            response.Confidence  = prediction.Score;
            response.IsMalicious = prediction.Prediction;

            return(response);
        }
Esempio n. 7
0
 public abstract (T Data, string Output) FeatureExtraction(ClassifierResponseItem response);
Esempio n. 8
0
 protected abstract ClassifierResponseItem UpdateResponse(TK prediction, ClassifierResponseItem response, ClassifierCommandLineOptions options);
 public void EmptyBytes()
 {
     var response = new ClassifierResponseItem(new byte[0], "fakeness");
 }
 public void NullTest()
 {
     var response = new ClassifierResponseItem(null, null);
 }