public void EPI_FullFeatureSet_fast() { this.AppIdentTestContext = new AppIdentTestContext(nameof(this.EPI_FullFeatureSet_fast)) { MinFlows = 10, FeatureSelectionTreshold = 0.5, TrainingToVerificationRation = 0.7 }; this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_streamSkypeHttpTls_cap); var featureSelector = new FeatureSelector(); this.L7Conversations.Clear(); this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_streamSkypeHttpTls_cap); this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_dnsHttpTls_cap); this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_learn1_cap); this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_refSkype_cap); this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_testM1_cap); this.ProcessPcapFile(SnoopersPcaps.Default.app_identification_testM2_cap); var appIdentDataSource = this.AppIdentService.CreateAppIdentDataSource(this.L7Conversations, this.AppIdentTestContext.MinFlows, this.AppIdentTestContext.TrainingToVerificationRation); var classificationStatisticsMeter = this.AppIdentService.EpiClasify(appIdentDataSource, featureSelector, this.AppIdentTestContext); classificationStatisticsMeter.PrintResults(); this.AppIdentTestContext.Save(classificationStatisticsMeter); this.AppIdentTestContext.Save(); }
public void EPI_FullFeatureSet_ICDF() { this.AppIdentTestContext = new AppIdentTestContext(nameof(this.EPI_FullFeatureSet_ICDF)) { MinFlows = 10, FeatureSelectionTreshold = 0.5, TrainingToVerificationRation = 0.7 }; var pcapSource = new AppIdentPcapSource(); var featureSelector = new FeatureSelector(); pcapSource.AddTesting(@"D:\pcaps\AppIdent-TestingData\captured\", "*.cap|*.pcap", true); this.AppIdentTestContext.Save(pcapSource); foreach (var pcap in pcapSource.TestingPcaps) { this.ProcessPcapFile(pcap); } var appIdentDataSource = this.AppIdentService.CreateAppIdentDataSource(this.L7Conversations, this.AppIdentTestContext.MinFlows, this.AppIdentTestContext.TrainingToVerificationRation); var classificationStatisticsMeter = this.AppIdentService.EpiClasify(appIdentDataSource, featureSelector, this.AppIdentTestContext); classificationStatisticsMeter.PrintResults(); this.AppIdentTestContext.Save(classificationStatisticsMeter); this.AppIdentTestContext.Save(); }
public override IEnumerable <Subset> GetSubsets(Batch batch, IRandom rand) { for (int i = 0; i < Size; i++) { yield return(FeatureSelector.SelectFeatures(batch.TrainInstances, rand)); } }
public ApplicationProtocolClassificationStatisticsMeter EpiClasify( AppIdentDataSource appIdentDataSource, FeatureSelector featureSelector, AppIdentTestContext appIdentTestContext = null) { return(this.EpiClasify(appIdentDataSource, featureSelector, out var epiEvaluator, appIdentTestContext)); }
private static void Main(string[] args) { if (args.Length != 2) { Console.WriteLine("KNN.exe *.names *.data"); return; } DateTime start = DateTime.Now; Console.WriteLine("Start Time: {0}", start); var builder = new DSBuilder(args); DataSet data = builder.BuildDataSet(); var sets = data.RandomInstance(800); var knn = new KNearest(sets[0]); var fs = new FeatureSelector(knn); var optimal = fs.ForwardFeatureSelect(Enumerable.Range(0, data.Features.Count - 1).Where(x => x != data.OutputIndex).ToList()); knn.K = optimal.Key; knn.Features = optimal.Value; Console.WriteLine("Final Result: {0:0.##}% with K:{1} using Features:{2}", fs.Test(sets[1].DataEntries) * 100.0, knn.K, string.Join(", ", knn.Features.Select(i => data.Features[i].Name).ToArray())); Console.WriteLine("Run-Time: {0}", DateTime.Now - start); }
public override IEnumerable <Subset> GetSubsets(Batch batch, IRandom rand) { for (int i = 0; i < Size; i++) { // REVIEW: Consider ways to reintroduce "balanced" samples. var viewTrain = new BootstrapSampleTransform(Host, new BootstrapSampleTransform.Arguments(), Data.Data); var dataTrain = RoleMappedData.Create(viewTrain, Data.Schema.GetColumnRoleNames()); yield return(FeatureSelector.SelectFeatures(dataTrain, rand)); } }
public static T Execute <T>( this FeatureSelector selector, string feature, FeatureContext context, Func <T> ifEnabled, Func <T> ifDisabled) { return(selector .Freeze(feature, context) .Execute(ifEnabled, ifDisabled)); }
public GridSearchParameterCollection RandomForestGetBestParameters( AppIdentDataSource appIdentDataSource, FeatureSelector featureSelector, AppIdentTestContext appIdentTestContext = null) { var accordAppIdent = new AccordAppIdent(); var appIdentAcordSource = this.GetAppIdentAcordSource(appIdentDataSource.TrainingSet, featureSelector); accordAppIdent.GetBestRandomForestsWithGridSearch(appIdentAcordSource, out var bestParameters, out var minError); appIdentTestContext?.Save(bestParameters); return(bestParameters); }
private double AddFeature(UnitBuilder unit, FeatureSelector selectFeature, double valueBudget) { var feature = selectFeature(ref valueBudget); if (feature == null) { return(0); } unit.Features.Add(feature); return(valueBudget); }
public ApplicationProtocolClassificationStatisticsMeter EpiClasify( AppIdentDataSource appIdentDataSource, FeatureSelector featureSelector, out EPIEvaluator epiEvaluator, AppIdentTestContext appIdentTestContext = null) { //this.CreateDatasetAndTestset(appIdentDataSource, trainingToVerificationRatio, out var trainingSet, out var verificationSet); epiEvaluator = new EPIEvaluator(featureSelector); epiEvaluator.CreateApplicationProtocolModels(appIdentDataSource.TrainingSet); var precMeasure = epiEvaluator.ComputeStatistics(appIdentDataSource.VerificationSet); appIdentTestContext?.Save(precMeasure); return(precMeasure); }
public ApplicationProtocolClassificationStatisticsMeter AccordClassify( AppIdentDataSource appIdentDataSource, MulticlassClassifierBase model, FeatureSelector featureSelector, AppIdentTestContext appIdentTestContext) { var precMeasure = new ApplicationProtocolClassificationStatisticsMeter(); var appIdentAcordSource = this.GetAppIdentAcordSource(appIdentDataSource.VerificationSet, featureSelector); var predictedValues = model.Decide(appIdentAcordSource.Samples); for (var j = 0; j < predictedValues.Length; j++) { precMeasure.UpdateStatistics(appIdentAcordSource.LabelsFromInteges[predictedValues[j]], appIdentAcordSource.Labels[j]); } appIdentTestContext.Save(precMeasure); return(precMeasure); }
public async Task CreateAsync_logs_off_with_the_correct_type_when_feature_is_off() { var evaluator = Substitute.For <IFeatureEvaluator>(); var feature = new SimpleTestFeature(evaluator); var logger = Logger.For <FeatureSelector <SimpleTestFeature, NewGetSomething, OldGetSomething, IGetSomething> >(); var selector = new FeatureSelector <SimpleTestFeature, NewGetSomething, OldGetSomething, IGetSomething>( feature, () => new NewGetSomething(), () => new OldGetSomething(), logger); evaluator.IsOnAsync(Arg.Any <string>()).Returns(false); var getter = await selector.CreateAsync(); logger.Received().LogInformation(s => s.Contains("OldGetSomething") && s.Contains("Test Feature") && s.Contains("off")); }
public RecognitionController() { //module initialization _dataProcessor = new DataProcessor(); _gestureModule = new GestureModule(this); _segmentationModule = new SegmentationModule(this); _featureSelector = new FeatureSelector(); _NLPModule = new NLPModule(); _classifier = new Classifier(); _postprocessor = new Postprocessor(); //data transfer event m_dataProcessor.m_dataWarehouse.m_dataTransferEvent += m_gestureModule.OnDataTransfer; //data ready event m_dataProcessor.m_dataTransferEvent += m_segmentationModule.OnNewFrameDataReady; m_dataProcessor.m_dataTransferEvent += m_gestureModule.OnNewFrameDataReady; //console manager ConsoleManager.Show(); }
public async Task CreateAsync_creates_the_correct_type_when_feature_is_off() { var evaluator = Substitute.For <IFeatureEvaluator>(); var feature = new SimpleTestFeature(evaluator); var selector = new FeatureSelector <SimpleTestFeature, NewGetSomething, OldGetSomething, IGetSomething>( feature, () => new NewGetSomething(), () => new OldGetSomething(), null); evaluator.IsOnAsync(Arg.Any <string>()).Returns(false); var getter = await selector.CreateAsync(); var result = getter.GetSomething(); getter.Should().BeOfType <OldGetSomething>(); result.Should().Be("Old"); }
public ApplicationProtocolClassificationStatisticsMeter RandomForestCrossValidation( AppIdentDataSource appIdentDataSource, FeatureSelector featureSelector, GridSearchParameterCollection bestParameters, int folds, AppIdentTestContext appIdentTestContext) { var precMeasure = new ApplicationProtocolClassificationStatisticsMeter(); var accordAppIdent = new AccordAppIdent(); var appIdentAcordSource = this.GetAppIdentAcordSource(appIdentDataSource.TrainingSet, featureSelector); var cvResults = accordAppIdent.GetCrossValidationResultsOfRandomForestModel(appIdentAcordSource, bestParameters, folds); Console.WriteLine("### CV Results ###"); Console.WriteLine("\n### Training stats ###"); Console.WriteLine(">> model error mean: {0}\n>> model std: {1}", Math.Round(cvResults.Training.Mean, 6), Math.Round(cvResults.Training.StandardDeviation, 6)); Console.WriteLine("\n### Validation stats ###"); Console.WriteLine(">> model error mean: {0}\n>> model std: {1}", Math.Round(cvResults.Validation.Mean, 6), Math.Round(cvResults.Validation.StandardDeviation, 6)); var minErorr = cvResults.Validation.Values.Min(); var bestIndex = cvResults.Validation.Values.IndexOf(minErorr); var classifier = cvResults.Models[bestIndex]; var model = classifier.Model; var labels = appIdentAcordSource.Labels.Distinct(); var modelFilePath = appIdentTestContext.Save(model, labels); var validationDataSource = classifier.Tag as AccordAppIdent.ValidationDataSource; var predictedValues = classifier.Model.Decide(validationDataSource.ValidationInputs); for (var j = 0; j < predictedValues.Length; j++) { precMeasure.UpdateStatistics(appIdentAcordSource.LabelsFromInteges[predictedValues[j]], appIdentAcordSource.LabelsFromInteges[validationDataSource.ValidationOutputs[j]]); } appIdentTestContext.SaveCrossValidation(precMeasure); return(precMeasure); }
public override IEnumerable <Subset> GetSubsets(Batch batch, Random rand) { string name = Data.Data.Schema.GetTempColumnName(); var args = new GenerateNumberTransform.Options(); args.Columns = new[] { new GenerateNumberTransform.Column() { Name = name } }; args.Seed = (uint)rand.Next(); IDataTransform view = new GenerateNumberTransform(Host, args, Data.Data); // REVIEW: This won't be very efficient when Size is large. for (int i = 0; i < Size; i++) { var viewTrain = new RangeFilter(Host, new RangeFilter.Options() { Column = name, Min = (Double)i / Size, Max = (Double)(i + 1) / Size }, view); var dataTrain = new RoleMappedData(viewTrain, Data.Schema.GetColumnRoleNames()); yield return(FeatureSelector.SelectFeatures(dataTrain, rand)); } }
public override IEnumerable <string> SelectFeatures(Prediction prediction) { _libLinear.LoadClassificationModelFiles(); string logPath = Path.Combine(Model.ModelDirectory, "feature_selection_log.txt"); System.IO.File.Delete(logPath); int nullClass = -1; foreach (string unmappedLabel in _libLinear.Labels) { if (unmappedLabel == PointPrediction.NullLabel) { if (nullClass == -1) { nullClass = int.Parse(_libLinear.GetMappedLabel(unmappedLabel)); } else { throw new Exception("Multiple null classes in label map"); } } } if (nullClass == -1) { throw new Exception("Failed to find null class"); } string featureSelectionTrainingPath = Path.GetTempFileName(); using (FileStream compressedTrainingInstancesFile = new FileStream(CompressedTrainingInstancesPath, FileMode.Open, FileAccess.Read)) using (GZipStream compressedTrainingInstancesGzip = new GZipStream(compressedTrainingInstancesFile, CompressionMode.Decompress)) using (StreamReader trainingInstancesFile = new StreamReader(compressedTrainingInstancesGzip)) using (FileStream compressedTrainingInstanceLocationsFile = new FileStream(CompressedTrainingInstanceLocationsPath, FileMode.Open, FileAccess.Read)) using (GZipStream compressedTrainingInstanceLocationsGzip = new GZipStream(compressedTrainingInstanceLocationsFile, CompressionMode.Decompress)) using (StreamReader trainingInstanceLocationsFile = new StreamReader(compressedTrainingInstanceLocationsGzip)) using (StreamWriter featureSelectionTrainingFile = new StreamWriter(featureSelectionTrainingPath)) { try { string instance; while ((instance = trainingInstancesFile.ReadLine()) != null) { string location = trainingInstanceLocationsFile.ReadLine(); if (location == null) { throw new Exception("Missing location for training instance"); } featureSelectionTrainingFile.WriteLine(instance + " # " + location); } if ((instance = trainingInstanceLocationsFile.ReadToEnd()) != null && (instance = instance.Trim()) != "") { throw new Exception("Extra training instance locations: " + instance); } trainingInstancesFile.Close(); trainingInstanceLocationsFile.Close(); featureSelectionTrainingFile.Close(); } catch (Exception ex) { throw new Exception("Failed to read training instances: " + ex.Message); } } string groupNamePath = Path.GetTempFileName(); Dictionary <string, string> groupNameFeatureId = new Dictionary <string, string>(); using (StreamWriter groupNameFile = new StreamWriter(groupNamePath)) { foreach (PTL.ATT.Models.Feature feature in Model.Features) { int featureNumber; if (_libLinear.TryGetFeatureNumber(feature.Id, out featureNumber)) { string groupName = feature.ToString().ReplacePunctuation(" ").RemoveRepeatedWhitespace().Replace(' ', '_').Trim('_'); groupNameFile.WriteLine(featureNumber + " " + groupName); groupNameFeatureId.Add(groupName, feature.Id); } } groupNameFile.Close(); } Options featureSelectionOptions = new Options(); featureSelectionOptions.Add(FeatureSelector.Option.ExitOnErrorAction, FeatureSelector.ExitOnErrorAction.ThrowException.ToString()); featureSelectionOptions.Add(FeatureSelector.Option.FeatureFilters, typeof(ZeroVectorFeatureFilter).FullName + "," + typeof(CosineSimilarityFeatureFilter).FullName); featureSelectionOptions.Add(FeatureSelector.Option.FloatingSelection, false.ToString()); featureSelectionOptions.Add(FeatureSelector.Option.GroupNamePath, groupNamePath); featureSelectionOptions.Add(FeatureSelector.Option.LogPath, logPath); featureSelectionOptions.Add(FeatureSelector.Option.MaxThreads, Configuration.ProcessorCount.ToString()); featureSelectionOptions.Add(FeatureSelector.Option.PerformanceIncreaseRequirement, float.Epsilon.ToString()); featureSelectionOptions.Add(FeatureSelector.Option.Scorer, typeof(SurveillancePlotScorer).FullName); featureSelectionOptions.Add(FeatureSelector.Option.TrainingInstancesInMemory, true.ToString()); featureSelectionOptions.Add(FeatureSelector.Option.TrainingInstancesPath, featureSelectionTrainingPath); featureSelectionOptions.Add(FeatureSelector.Option.Verbosity, FeatureSelector.Verbosity.Debug.ToString()); featureSelectionOptions.Add(SurveillancePlotScorer.Option.IgnoredSurveillanceClasses, nullClass.ToString()); featureSelectionOptions.Add(CommonWrapper.Option.ClassifyExePath, Configuration.ClassifierTypeOptions[GetType()]["predict"]); featureSelectionOptions.Add(CommonWrapper.Option.TrainExePath, Configuration.ClassifierTypeOptions[GetType()]["train"]); featureSelectionOptions.Add(LibLinearWrapper.Option.IgnoredProbabilisticClasses, nullClass.ToString()); featureSelectionOptions.Add(LibLinearWrapper.Option.SumInstanceProbabilities, true.ToString()); featureSelectionOptions.Add(CrossFoldValidator.Option.RandomizeInstanceBlocks, true.ToString()); featureSelectionOptions.Add(CrossFoldValidator.Option.InstanceBlockRandomizationSeed, (498734983).ToString()); featureSelectionOptions.Add(CrossFoldValidator.Option.NumFolds, (2).ToString()); featureSelectionOptions.Add(CosineSimilarityFeatureFilter.Option.Threshold, (0.98).ToString()); if (_positiveClassWeighting == PositiveClassWeighting.NegativePositiveRatio) { using (FileStream compressedTrainingInstancesFile = new FileStream(CompressedTrainingInstancesPath, FileMode.Open, FileAccess.Read)) using (GZipStream compressedTrainingInstancesGzip = new GZipStream(compressedTrainingInstancesFile, CompressionMode.Decompress)) using (StreamReader compressedTrainingInstancesReader = new StreamReader(compressedTrainingInstancesGzip)) { Dictionary <int, float> classWeight = GetPerClassWeights(compressedTrainingInstancesReader); foreach (int classNum in classWeight.Keys) { featureSelectionOptions.Add((LibLinearWrapper.Option)Enum.Parse(typeof(LibLinearWrapper.Option), "W" + classNum), classWeight[classNum].ToString()); } } } else if (_positiveClassWeighting != PositiveClassWeighting.None) { throw new Exception("Unrecognized positive class weighting scheme: " + _positiveClassWeighting); } FeatureSelector.Run(_libLinear, featureSelectionOptions); float score; Dictionary <string, Tuple <int, float> > featureRankContribution; FeatureSelector.GetResults(logPath, out score, out featureRankContribution); System.IO.File.Delete(featureSelectionTrainingPath); System.IO.File.Delete(groupNamePath); return(featureRankContribution.Keys.Select(groupName => groupNameFeatureId[groupName])); }
private AppIdentAcordSource GetAppIdentAcordSource(FeatureVector[] featureVectors, FeatureSelector featureSelector) { var appIdentAcordSource = new AppIdentAcordSource(featureSelector); appIdentAcordSource.Init(featureVectors); return(appIdentAcordSource); }
public EPIEvaluator(FeatureSelector featureSelector) { this.FeatureSelector = featureSelector; }
public EPIProtocolModel(string applicationProtocolName, FeatureVector trainingFeatureVector, FeatureSelector featureSelector) { this.ApplicationProtocolName = applicationProtocolName; this.FeatureSelector = featureSelector; this.AddTrainingFeatureVector(trainingFeatureVector); //this.FeatureVectorProperties = typeof(FeatureVector).GetProperties().Where(prop => Attribute.IsDefined(prop, typeof(FeatureStatisticalAttribute))).ToList(); this.FeatureVectorProperties = typeof(FeatureVector).GetProperties().Where(prop => this.FeatureSelector.SelectedFeatures.Contains(prop.PropertyType)).ToList(); }
public CircuitBreakingFeatureSelector(FeatureSelector inner, int maxFailedAttempts) { this.inner = inner; this.maxFailedAttempts = maxFailedAttempts; this.failures = new Dictionary <string, int>(); }