Ejemplo n.º 1
0
        public void EPI_FullFeatureSet_fast()
        {
            this.AppIdentTestContext = new AppIdentTestContext(nameof(this.EPI_FullFeatureSet_fast))
            {
                MinFlows = 10,
                FeatureSelectionTreshold     = 0.5,
                TrainingToVerificationRation = 0.7
            };
            this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_streamSkypeHttpTls_cap);

            var featureSelector = new FeatureSelector();

            this.L7Conversations.Clear();
            this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_streamSkypeHttpTls_cap);
            this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_dnsHttpTls_cap);
            this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_learn1_cap);
            this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_refSkype_cap);
            this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_testM1_cap);
            this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_testM2_cap);


            var appIdentDataSource = this.AppIdentService.CreateAppIdentDataSource(this.L7Conversations, this.AppIdentTestContext.MinFlows, this.AppIdentTestContext.TrainingToVerificationRation);

            var classificationStatisticsMeter = this.AppIdentService.EpiClasify(appIdentDataSource, featureSelector, this.AppIdentTestContext);

            classificationStatisticsMeter.PrintResults();
            this.AppIdentTestContext.Save(classificationStatisticsMeter);
            this.AppIdentTestContext.Save();
        }
Ejemplo n.º 2
0
        public void EPI_FullFeatureSet_ICDF()
        {
            this.AppIdentTestContext = new AppIdentTestContext(nameof(this.EPI_FullFeatureSet_ICDF))
            {
                MinFlows = 10,
                FeatureSelectionTreshold     = 0.5,
                TrainingToVerificationRation = 0.7
            };

            var pcapSource      = new AppIdentPcapSource();
            var featureSelector = new FeatureSelector();

            pcapSource.AddTesting(@"D:\pcaps\AppIdent-TestingData\captured\", "*.cap|*.pcap", true);
            this.AppIdentTestContext.Save(pcapSource);

            foreach (var pcap in pcapSource.TestingPcaps)
            {
                this.ProcessPcapFile(pcap);
            }

            var appIdentDataSource = this.AppIdentService.CreateAppIdentDataSource(this.L7Conversations, this.AppIdentTestContext.MinFlows, this.AppIdentTestContext.TrainingToVerificationRation);

            var classificationStatisticsMeter = this.AppIdentService.EpiClasify(appIdentDataSource, featureSelector, this.AppIdentTestContext);

            classificationStatisticsMeter.PrintResults();
            this.AppIdentTestContext.Save(classificationStatisticsMeter);
            this.AppIdentTestContext.Save();
        }
Ejemplo n.º 3
0
 public override IEnumerable <Subset> GetSubsets(Batch batch, IRandom rand)
 {
     for (int i = 0; i < Size; i++)
     {
         yield return(FeatureSelector.SelectFeatures(batch.TrainInstances, rand));
     }
 }
Ejemplo n.º 4
0
 public ApplicationProtocolClassificationStatisticsMeter EpiClasify(
     AppIdentDataSource appIdentDataSource,
     FeatureSelector featureSelector,
     AppIdentTestContext appIdentTestContext = null)
 {
     return(this.EpiClasify(appIdentDataSource, featureSelector, out var epiEvaluator, appIdentTestContext));
 }
Ejemplo n.º 5
0
        private static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                Console.WriteLine("KNN.exe *.names *.data");
                return;
            }
            DateTime start = DateTime.Now;

            Console.WriteLine("Start Time: {0}", start);
            var     builder = new DSBuilder(args);
            DataSet data    = builder.BuildDataSet();

            var sets    = data.RandomInstance(800);
            var knn     = new KNearest(sets[0]);
            var fs      = new FeatureSelector(knn);
            var optimal = fs.ForwardFeatureSelect(Enumerable.Range(0, data.Features.Count - 1).Where(x => x != data.OutputIndex).ToList());

            knn.K        = optimal.Key;
            knn.Features = optimal.Value;
            Console.WriteLine("Final Result: {0:0.##}% with K:{1} using Features:{2}",
                              fs.Test(sets[1].DataEntries) * 100.0,
                              knn.K,
                              string.Join(", ", knn.Features.Select(i => data.Features[i].Name).ToArray()));
            Console.WriteLine("Run-Time: {0}", DateTime.Now - start);
        }
Ejemplo n.º 6
0
 public override IEnumerable <Subset> GetSubsets(Batch batch, IRandom rand)
 {
     for (int i = 0; i < Size; i++)
     {
         // REVIEW: Consider ways to reintroduce "balanced" samples.
         var viewTrain = new BootstrapSampleTransform(Host, new BootstrapSampleTransform.Arguments(), Data.Data);
         var dataTrain = RoleMappedData.Create(viewTrain, Data.Schema.GetColumnRoleNames());
         yield return(FeatureSelector.SelectFeatures(dataTrain, rand));
     }
 }
Ejemplo n.º 7
0
 public static T Execute <T>(
     this FeatureSelector selector,
     string feature,
     FeatureContext context,
     Func <T> ifEnabled,
     Func <T> ifDisabled)
 {
     return(selector
            .Freeze(feature, context)
            .Execute(ifEnabled, ifDisabled));
 }
Ejemplo n.º 8
0
        public GridSearchParameterCollection RandomForestGetBestParameters(
            AppIdentDataSource appIdentDataSource,
            FeatureSelector featureSelector,
            AppIdentTestContext appIdentTestContext = null)
        {
            var accordAppIdent      = new AccordAppIdent();
            var appIdentAcordSource = this.GetAppIdentAcordSource(appIdentDataSource.TrainingSet, featureSelector);

            accordAppIdent.GetBestRandomForestsWithGridSearch(appIdentAcordSource, out var bestParameters, out var minError);
            appIdentTestContext?.Save(bestParameters);
            return(bestParameters);
        }
Ejemplo n.º 9
0
        private double AddFeature(UnitBuilder unit, FeatureSelector selectFeature, double valueBudget)
        {
            var feature = selectFeature(ref valueBudget);

            if (feature == null)
            {
                return(0);
            }

            unit.Features.Add(feature);
            return(valueBudget);
        }
Ejemplo n.º 10
0
        public ApplicationProtocolClassificationStatisticsMeter EpiClasify(
            AppIdentDataSource appIdentDataSource,
            FeatureSelector featureSelector,
            out EPIEvaluator epiEvaluator,
            AppIdentTestContext appIdentTestContext = null)
        {
            //this.CreateDatasetAndTestset(appIdentDataSource, trainingToVerificationRatio, out var trainingSet, out var verificationSet);
            epiEvaluator = new EPIEvaluator(featureSelector);
            epiEvaluator.CreateApplicationProtocolModels(appIdentDataSource.TrainingSet);
            var precMeasure = epiEvaluator.ComputeStatistics(appIdentDataSource.VerificationSet);

            appIdentTestContext?.Save(precMeasure);
            return(precMeasure);
        }
Ejemplo n.º 11
0
        public ApplicationProtocolClassificationStatisticsMeter AccordClassify(
            AppIdentDataSource appIdentDataSource,
            MulticlassClassifierBase model,
            FeatureSelector featureSelector,
            AppIdentTestContext appIdentTestContext)
        {
            var precMeasure         = new ApplicationProtocolClassificationStatisticsMeter();
            var appIdentAcordSource = this.GetAppIdentAcordSource(appIdentDataSource.VerificationSet, featureSelector);
            var predictedValues     = model.Decide(appIdentAcordSource.Samples);

            for (var j = 0; j < predictedValues.Length; j++)
            {
                precMeasure.UpdateStatistics(appIdentAcordSource.LabelsFromInteges[predictedValues[j]], appIdentAcordSource.Labels[j]);
            }
            appIdentTestContext.Save(precMeasure);
            return(precMeasure);
        }
Ejemplo n.º 12
0
        public async Task CreateAsync_logs_off_with_the_correct_type_when_feature_is_off()
        {
            var evaluator = Substitute.For <IFeatureEvaluator>();
            var feature   = new SimpleTestFeature(evaluator);
            var logger    = Logger.For <FeatureSelector <SimpleTestFeature, NewGetSomething, OldGetSomething, IGetSomething> >();
            var selector  = new FeatureSelector <SimpleTestFeature, NewGetSomething, OldGetSomething, IGetSomething>(
                feature,
                () => new NewGetSomething(),
                () => new OldGetSomething(),
                logger);

            evaluator.IsOnAsync(Arg.Any <string>()).Returns(false);

            var getter = await selector.CreateAsync();

            logger.Received().LogInformation(s => s.Contains("OldGetSomething") && s.Contains("Test Feature") && s.Contains("off"));
        }
Ejemplo n.º 13
0
        public RecognitionController()
        {
            //module initialization
            _dataProcessor      = new DataProcessor();
            _gestureModule      = new GestureModule(this);
            _segmentationModule = new SegmentationModule(this);
            _featureSelector    = new FeatureSelector();
            _NLPModule          = new NLPModule();
            _classifier         = new Classifier();
            _postprocessor      = new Postprocessor();

            //data transfer event
            m_dataProcessor.m_dataWarehouse.m_dataTransferEvent += m_gestureModule.OnDataTransfer;
            //data ready event
            m_dataProcessor.m_dataTransferEvent += m_segmentationModule.OnNewFrameDataReady;
            m_dataProcessor.m_dataTransferEvent += m_gestureModule.OnNewFrameDataReady;
            //console manager
            ConsoleManager.Show();
        }
        public RecognitionController()
        {
            //module initialization
            _dataProcessor = new DataProcessor();
            _gestureModule = new GestureModule(this);
            _segmentationModule = new SegmentationModule(this);
            _featureSelector = new FeatureSelector();
            _NLPModule = new NLPModule();
            _classifier = new Classifier();
            _postprocessor = new Postprocessor();

            //data transfer event
            m_dataProcessor.m_dataWarehouse.m_dataTransferEvent += m_gestureModule.OnDataTransfer;
            //data ready event
            m_dataProcessor.m_dataTransferEvent += m_segmentationModule.OnNewFrameDataReady;
            m_dataProcessor.m_dataTransferEvent += m_gestureModule.OnNewFrameDataReady;
            //console manager
            ConsoleManager.Show();
        }
Ejemplo n.º 15
0
        public async Task CreateAsync_creates_the_correct_type_when_feature_is_off()
        {
            var evaluator = Substitute.For <IFeatureEvaluator>();
            var feature   = new SimpleTestFeature(evaluator);
            var selector  = new FeatureSelector <SimpleTestFeature, NewGetSomething, OldGetSomething, IGetSomething>(
                feature,
                () => new NewGetSomething(),
                () => new OldGetSomething(),
                null);

            evaluator.IsOnAsync(Arg.Any <string>()).Returns(false);

            var getter = await selector.CreateAsync();

            var result = getter.GetSomething();

            getter.Should().BeOfType <OldGetSomething>();
            result.Should().Be("Old");
        }
Ejemplo n.º 16
0
        public ApplicationProtocolClassificationStatisticsMeter RandomForestCrossValidation(
            AppIdentDataSource appIdentDataSource,
            FeatureSelector featureSelector,
            GridSearchParameterCollection bestParameters,
            int folds,
            AppIdentTestContext appIdentTestContext)
        {
            var precMeasure = new ApplicationProtocolClassificationStatisticsMeter();

            var accordAppIdent      = new AccordAppIdent();
            var appIdentAcordSource = this.GetAppIdentAcordSource(appIdentDataSource.TrainingSet, featureSelector);

            var cvResults = accordAppIdent.GetCrossValidationResultsOfRandomForestModel(appIdentAcordSource, bestParameters, folds);

            Console.WriteLine("### CV Results ###");
            Console.WriteLine("\n### Training stats ###");
            Console.WriteLine(">> model error mean: {0}\n>> model std:  {1}", Math.Round(cvResults.Training.Mean, 6), Math.Round(cvResults.Training.StandardDeviation, 6));
            Console.WriteLine("\n### Validation stats ###");
            Console.WriteLine(">> model error mean: {0}\n>> model std:  {1}", Math.Round(cvResults.Validation.Mean, 6), Math.Round(cvResults.Validation.StandardDeviation, 6));

            var minErorr   = cvResults.Validation.Values.Min();
            var bestIndex  = cvResults.Validation.Values.IndexOf(minErorr);
            var classifier = cvResults.Models[bestIndex];

            var model         = classifier.Model;
            var labels        = appIdentAcordSource.Labels.Distinct();
            var modelFilePath = appIdentTestContext.Save(model, labels);

            var validationDataSource = classifier.Tag as AccordAppIdent.ValidationDataSource;
            var predictedValues      = classifier.Model.Decide(validationDataSource.ValidationInputs);

            for (var j = 0; j < predictedValues.Length; j++)
            {
                precMeasure.UpdateStatistics(appIdentAcordSource.LabelsFromInteges[predictedValues[j]],
                                             appIdentAcordSource.LabelsFromInteges[validationDataSource.ValidationOutputs[j]]);
            }
            appIdentTestContext.SaveCrossValidation(precMeasure);

            return(precMeasure);
        }
        public override IEnumerable <Subset> GetSubsets(Batch batch, Random rand)
        {
            string name = Data.Data.Schema.GetTempColumnName();
            var    args = new GenerateNumberTransform.Options();

            args.Columns = new[] { new GenerateNumberTransform.Column()
                                   {
                                       Name = name
                                   } };
            args.Seed = (uint)rand.Next();
            IDataTransform view = new GenerateNumberTransform(Host, args, Data.Data);

            // REVIEW: This won't be very efficient when Size is large.
            for (int i = 0; i < Size; i++)
            {
                var viewTrain = new RangeFilter(Host, new RangeFilter.Options()
                {
                    Column = name, Min = (Double)i / Size, Max = (Double)(i + 1) / Size
                }, view);
                var dataTrain = new RoleMappedData(viewTrain, Data.Schema.GetColumnRoleNames());
                yield return(FeatureSelector.SelectFeatures(dataTrain, rand));
            }
        }
        public override IEnumerable <string> SelectFeatures(Prediction prediction)
        {
            _libLinear.LoadClassificationModelFiles();

            string logPath = Path.Combine(Model.ModelDirectory, "feature_selection_log.txt");

            System.IO.File.Delete(logPath);

            int nullClass = -1;

            foreach (string unmappedLabel in _libLinear.Labels)
            {
                if (unmappedLabel == PointPrediction.NullLabel)
                {
                    if (nullClass == -1)
                    {
                        nullClass = int.Parse(_libLinear.GetMappedLabel(unmappedLabel));
                    }
                    else
                    {
                        throw new Exception("Multiple null classes in label map");
                    }
                }
            }

            if (nullClass == -1)
            {
                throw new Exception("Failed to find null class");
            }

            string featureSelectionTrainingPath = Path.GetTempFileName();

            using (FileStream compressedTrainingInstancesFile = new FileStream(CompressedTrainingInstancesPath, FileMode.Open, FileAccess.Read))
                using (GZipStream compressedTrainingInstancesGzip = new GZipStream(compressedTrainingInstancesFile, CompressionMode.Decompress))
                    using (StreamReader trainingInstancesFile = new StreamReader(compressedTrainingInstancesGzip))
                        using (FileStream compressedTrainingInstanceLocationsFile = new FileStream(CompressedTrainingInstanceLocationsPath, FileMode.Open, FileAccess.Read))
                            using (GZipStream compressedTrainingInstanceLocationsGzip = new GZipStream(compressedTrainingInstanceLocationsFile, CompressionMode.Decompress))
                                using (StreamReader trainingInstanceLocationsFile = new StreamReader(compressedTrainingInstanceLocationsGzip))
                                    using (StreamWriter featureSelectionTrainingFile = new StreamWriter(featureSelectionTrainingPath))
                                    {
                                        try
                                        {
                                            string instance;
                                            while ((instance = trainingInstancesFile.ReadLine()) != null)
                                            {
                                                string location = trainingInstanceLocationsFile.ReadLine();
                                                if (location == null)
                                                {
                                                    throw new Exception("Missing location for training instance");
                                                }

                                                featureSelectionTrainingFile.WriteLine(instance + " # " + location);
                                            }

                                            if ((instance = trainingInstanceLocationsFile.ReadToEnd()) != null && (instance = instance.Trim()) != "")
                                            {
                                                throw new Exception("Extra training instance locations:  " + instance);
                                            }

                                            trainingInstancesFile.Close();
                                            trainingInstanceLocationsFile.Close();
                                            featureSelectionTrainingFile.Close();
                                        }
                                        catch (Exception ex)
                                        {
                                            throw new Exception("Failed to read training instances:  " + ex.Message);
                                        }
                                    }

            string groupNamePath = Path.GetTempFileName();
            Dictionary <string, string> groupNameFeatureId = new Dictionary <string, string>();

            using (StreamWriter groupNameFile = new StreamWriter(groupNamePath))
            {
                foreach (PTL.ATT.Models.Feature feature in Model.Features)
                {
                    int featureNumber;
                    if (_libLinear.TryGetFeatureNumber(feature.Id, out featureNumber))
                    {
                        string groupName = feature.ToString().ReplacePunctuation(" ").RemoveRepeatedWhitespace().Replace(' ', '_').Trim('_');
                        groupNameFile.WriteLine(featureNumber + " " + groupName);
                        groupNameFeatureId.Add(groupName, feature.Id);
                    }
                }
                groupNameFile.Close();
            }

            Options featureSelectionOptions = new Options();

            featureSelectionOptions.Add(FeatureSelector.Option.ExitOnErrorAction, FeatureSelector.ExitOnErrorAction.ThrowException.ToString());
            featureSelectionOptions.Add(FeatureSelector.Option.FeatureFilters, typeof(ZeroVectorFeatureFilter).FullName + "," + typeof(CosineSimilarityFeatureFilter).FullName);
            featureSelectionOptions.Add(FeatureSelector.Option.FloatingSelection, false.ToString());
            featureSelectionOptions.Add(FeatureSelector.Option.GroupNamePath, groupNamePath);
            featureSelectionOptions.Add(FeatureSelector.Option.LogPath, logPath);
            featureSelectionOptions.Add(FeatureSelector.Option.MaxThreads, Configuration.ProcessorCount.ToString());
            featureSelectionOptions.Add(FeatureSelector.Option.PerformanceIncreaseRequirement, float.Epsilon.ToString());
            featureSelectionOptions.Add(FeatureSelector.Option.Scorer, typeof(SurveillancePlotScorer).FullName);
            featureSelectionOptions.Add(FeatureSelector.Option.TrainingInstancesInMemory, true.ToString());
            featureSelectionOptions.Add(FeatureSelector.Option.TrainingInstancesPath, featureSelectionTrainingPath);
            featureSelectionOptions.Add(FeatureSelector.Option.Verbosity, FeatureSelector.Verbosity.Debug.ToString());
            featureSelectionOptions.Add(SurveillancePlotScorer.Option.IgnoredSurveillanceClasses, nullClass.ToString());
            featureSelectionOptions.Add(CommonWrapper.Option.ClassifyExePath, Configuration.ClassifierTypeOptions[GetType()]["predict"]);
            featureSelectionOptions.Add(CommonWrapper.Option.TrainExePath, Configuration.ClassifierTypeOptions[GetType()]["train"]);
            featureSelectionOptions.Add(LibLinearWrapper.Option.IgnoredProbabilisticClasses, nullClass.ToString());
            featureSelectionOptions.Add(LibLinearWrapper.Option.SumInstanceProbabilities, true.ToString());
            featureSelectionOptions.Add(CrossFoldValidator.Option.RandomizeInstanceBlocks, true.ToString());
            featureSelectionOptions.Add(CrossFoldValidator.Option.InstanceBlockRandomizationSeed, (498734983).ToString());
            featureSelectionOptions.Add(CrossFoldValidator.Option.NumFolds, (2).ToString());
            featureSelectionOptions.Add(CosineSimilarityFeatureFilter.Option.Threshold, (0.98).ToString());

            if (_positiveClassWeighting == PositiveClassWeighting.NegativePositiveRatio)
            {
                using (FileStream compressedTrainingInstancesFile = new FileStream(CompressedTrainingInstancesPath, FileMode.Open, FileAccess.Read))
                    using (GZipStream compressedTrainingInstancesGzip = new GZipStream(compressedTrainingInstancesFile, CompressionMode.Decompress))
                        using (StreamReader compressedTrainingInstancesReader = new StreamReader(compressedTrainingInstancesGzip))
                        {
                            Dictionary <int, float> classWeight = GetPerClassWeights(compressedTrainingInstancesReader);
                            foreach (int classNum in classWeight.Keys)
                            {
                                featureSelectionOptions.Add((LibLinearWrapper.Option)Enum.Parse(typeof(LibLinearWrapper.Option), "W" + classNum), classWeight[classNum].ToString());
                            }
                        }
            }
            else if (_positiveClassWeighting != PositiveClassWeighting.None)
            {
                throw new Exception("Unrecognized positive class weighting scheme:  " + _positiveClassWeighting);
            }

            FeatureSelector.Run(_libLinear, featureSelectionOptions);

            float score;
            Dictionary <string, Tuple <int, float> > featureRankContribution;

            FeatureSelector.GetResults(logPath, out score, out featureRankContribution);

            System.IO.File.Delete(featureSelectionTrainingPath);
            System.IO.File.Delete(groupNamePath);

            return(featureRankContribution.Keys.Select(groupName => groupNameFeatureId[groupName]));
        }
Ejemplo n.º 19
0
        private AppIdentAcordSource GetAppIdentAcordSource(FeatureVector[] featureVectors, FeatureSelector featureSelector)
        {
            var appIdentAcordSource = new AppIdentAcordSource(featureSelector);

            appIdentAcordSource.Init(featureVectors);
            return(appIdentAcordSource);
        }
Ejemplo n.º 20
0
 public EPIEvaluator(FeatureSelector featureSelector)
 {
     this.FeatureSelector = featureSelector;
 }
Ejemplo n.º 21
0
 public EPIProtocolModel(string applicationProtocolName, FeatureVector trainingFeatureVector, FeatureSelector featureSelector)
 {
     this.ApplicationProtocolName = applicationProtocolName;
     this.FeatureSelector         = featureSelector;
     this.AddTrainingFeatureVector(trainingFeatureVector);
     //this.FeatureVectorProperties = typeof(FeatureVector).GetProperties().Where(prop => Attribute.IsDefined(prop, typeof(FeatureStatisticalAttribute))).ToList();
     this.FeatureVectorProperties = typeof(FeatureVector).GetProperties().Where(prop => this.FeatureSelector.SelectedFeatures.Contains(prop.PropertyType)).ToList();
 }
 public CircuitBreakingFeatureSelector(FeatureSelector inner, int maxFailedAttempts)
 {
     this.inner             = inner;
     this.maxFailedAttempts = maxFailedAttempts;
     this.failures          = new Dictionary <string, int>();
 }