public FeatureEncoder( ISet<string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet) { _selectedNodeNames = selectedNodeNames; _extractor = extractor; _featureString2Bit = CreateFeatureString2Bit(featureSet); _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit); }
public FeatureEncoder( ISet <string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet) { _selectedNodeNames = selectedNodeNames; _extractor = extractor; _featureString2Bit = CreateFeatureString2Bit(featureSet); _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit); }
public Classifier(IEnumerable<string> groupPaths, FeatuerSet featureSet) { AcceptingFeatureCount = featureSet.AcceptingFeatureCount; RejectingFeatureCount = featureSet.RejectingFeatureCount; AllFeatureCount = AcceptingFeatureCount + RejectingFeatureCount; AllFeatureBitMask = (BigInteger.One << featureSet.FeatureCount) - BigInteger.One; AcceptingFeatureBitMask = (BigInteger.One << featureSet.AcceptingFeatureCount) - BigInteger.One; RejectingFeatureBitMask = AllFeatureBitMask ^ AcceptingFeatureBitMask; GroupPaths = groupPaths.ToList(); Initialize(); }
public Classifier(IEnumerable <string> groupPaths, FeatuerSet featureSet) { AcceptingFeatureCount = featureSet.AcceptingFeatureCount; RejectingFeatureCount = featureSet.RejectingFeatureCount; AllFeatureCount = AcceptingFeatureCount + RejectingFeatureCount; AllFeatureBitMask = (BigInteger.One << featureSet.FeatureCount) - BigInteger.One; AcceptingFeatureBitMask = (BigInteger.One << featureSet.AcceptingFeatureCount) - BigInteger.One; RejectingFeatureBitMask = AllFeatureBitMask ^ AcceptingFeatureBitMask; GroupPaths = groupPaths.ToList(); Initialize(); }
private static IDictionary <string, BigInteger> CreateFeatureString2Bit( FeatuerSet featuerSet) { var featureString2Bit = new Dictionary <string, BigInteger>(); var masterFeatureVector = BigInteger.One; foreach (var featureStr in featuerSet.AcceptingFeatures) { featureString2Bit.Add(featureStr, masterFeatureVector); masterFeatureVector <<= 1; } foreach (var featureStr in featuerSet.RejectingFeatures) { featureString2Bit.Add(featureStr, masterFeatureVector); masterFeatureVector <<= 1; } return(featureString2Bit.ToImmutableDictionary()); }
public LearningResult Learn( ICollection <string> seedPaths, ICollection <string> codePaths, string searchPattern, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var seedCsts = GenerateValidCsts(seedPaths).ToList(); var seedNodes = seedCsts .SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)) .Where(ProtectedIsAcceptedUsingOracle) .ToList(); var seedCst = seedCsts.First(); var seedCode = seedCst.Code; var structuredCode = new StructuredCode(seedCode); var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes); var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst); SeedNodeSet.Create(acceptingFragments, this); var preparingTime = Environment.TickCount; var extractor = CreateExtractor(); var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this); Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count + " (" + acceptingFragments.Count + ")"); Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count + " (" + rejectingFragments.Count + ")"); var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments); var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">"); var classifier = new Classifier(groupPaths, featureSet); Console.WriteLine( "#Features: " + featureSet.AcceptingFeatureCount + ", " + featureSet.RejectingFeatureCount); Console.WriteLine("Inner: " + extractor.IsInner); var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor, featureSet); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet); Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount); if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet() .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) { var others = encodingResult.IdealRejectedVector2GroupPath; var vector = encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey); foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) { Console.WriteLine(Experiment.Beautify(featureString)); } throw new Exception("Master predicates can't classify elements!"); } var groupCache = new GroupCache(encodingResult, classifier); var trainingSet = encodingResult.CreateTrainingVectorSet(); classifier.Create(trainingSet, groupCache); Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder); Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime)); var count = 0; var sumTime = Environment.TickCount; ClassificationResult classificationResult; while (true) { var time = Environment.TickCount; classificationResult = Classify(count, classifier, groupCache, encodingResult, trainingSet); if (classificationResult.SuspiciousNodes == null) { break; } var additionalAcceptedSet = RevealSuspiciousElements( encodingResult.IdealAcceptedVector2GroupPath.Keys, classificationResult.SuspiciousNodes, encodingResult, trainingSet); if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) { count++; } else { count = 0; } Console.WriteLine("Time: " + (Environment.TickCount - time)); } classifier.MakeImmutable(); Console.WriteLine(); Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime)); var trainingVectorCount = trainingSet.Count; var idealVectorCount = encodingResult.IdealVectorSet.Count; Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount); if (writer != null) { encodingResult.WriteResult(writer, trainingSet); } foreach (var groupPath in classifier.GroupPaths) { Console.WriteLine(groupPath); } classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache); return(new LearningResult { ClassificationResult = classificationResult, Classifier = classifier, EncodingResult = encodingResult, FeatureEncoder = featureEncoder, }); }
private static IDictionary<string, BigInteger> CreateFeatureString2Bit( FeatuerSet featuerSet) { var featureString2Bit = new Dictionary<string, BigInteger>(); var masterFeatureVector = BigInteger.One; foreach (var featureStr in featuerSet.AcceptingFeatures) { featureString2Bit.Add(featureStr, masterFeatureVector); masterFeatureVector <<= 1; } foreach (var featureStr in featuerSet.RejectingFeatures) { featureString2Bit.Add(featureStr, masterFeatureVector); masterFeatureVector <<= 1; } return featureString2Bit.ToImmutableDictionary(); }
public LearningResult Learn( ICollection<string> seedPaths, ICollection<string> codePaths, string searchPattern, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var seedCsts = GenerateValidCsts(seedPaths).ToList(); var seedNodes = seedCsts .SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)) .Where(ProtectedIsAcceptedUsingOracle) .ToList(); var seedCst = seedCsts.First(); var seedCode = seedCst.Code; var structuredCode = new StructuredCode(seedCode); var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes); var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst); SeedNodeSet.Create(acceptingFragments, this); var preparingTime = Environment.TickCount; var extractor = CreateExtractor(); var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this); Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count + " (" + acceptingFragments.Count + ")"); Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count + " (" + rejectingFragments.Count + ")"); var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments); var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">"); var classifier = new Classifier(groupPaths, featureSet); Console.WriteLine( "#Features: " + featureSet.AcceptingFeatureCount + ", " + featureSet.RejectingFeatureCount); Console.WriteLine("Inner: " + extractor.IsInner); var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor, featureSet); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet); Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount); if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet() .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) { var others = encodingResult.IdealRejectedVector2GroupPath; var vector = encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey); foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) { Console.WriteLine(Experiment.Beautify(featureString)); } throw new Exception("Master predicates can't classify elements!"); } var groupCache = new GroupCache(encodingResult, classifier); var trainingSet = encodingResult.CreateTrainingVectorSet(); classifier.Create(trainingSet, groupCache); Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder); Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime)); var count = 0; var sumTime = Environment.TickCount; ClassificationResult classificationResult; while (true) { var time = Environment.TickCount; classificationResult = Classify(count, classifier, groupCache, encodingResult, trainingSet); if (classificationResult.SuspiciousNodes == null) { break; } var additionalAcceptedSet = RevealSuspiciousElements( encodingResult.IdealAcceptedVector2GroupPath.Keys, classificationResult.SuspiciousNodes, encodingResult, trainingSet); if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) { count++; } else { count = 0; } Console.WriteLine("Time: " + (Environment.TickCount - time)); } classifier.MakeImmutable(); Console.WriteLine(); Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime)); var trainingVectorCount = trainingSet.Count; var idealVectorCount = encodingResult.IdealVectorSet.Count; Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount); if (writer != null) { encodingResult.WriteResult(writer, trainingSet); } foreach (var groupPath in classifier.GroupPaths) { Console.WriteLine(groupPath); } classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache); return new LearningResult { ClassificationResult = classificationResult, Classifier = classifier, EncodingResult = encodingResult, FeatureEncoder = featureEncoder, }; }