public FeatuerSet( SeedNodeSet seedNodeSet, FeatureExtractor extractor, ICollection<SelectedFragment> acceptingFragments, ICollection<SelectedFragment> rejectingFragments) { AcceptingFeatures = CreateAcceptingFeatures(seedNodeSet.AcceptedNodes, extractor,acceptingFragments) .ToImmutableList(); RejectingFeatures = CreateRejectingFeatures(seedNodeSet.RejectedNodes, extractor,rejectingFragments) .ToImmutableList(); }
public EncodingResult Encode( ICollection<string> codePaths, IEnumerable<CstNode> allCsts, LearningExperiment oracle, SeedNodeSet seedNodeSet = null) { var fileName = codePaths.Count > 0 ? string.Join(",", codePaths).GetHashCode() + "_" + (codePaths.First() + "," + codePaths.Last() + ",").GetHashCode() + "_" + codePaths.Count + ".encoded" : null; var formatter = new BinaryFormatter(); if (fileName != null && File.Exists(fileName)) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read)) { try { var ret = ((EncodingResult)formatter.Deserialize(fs)).MakeImmutable(); Console.WriteLine("############### Warning ###############"); Console.WriteLine("Cache file of encoded result is used."); Console.WriteLine("#######################################"); return ret; } catch (Exception e) { Console.Error.WriteLine(e); } } } var allUppermostNodes = allCsts.SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, _selectedNodeNames)); var result = new EncodingResult(); if (seedNodeSet != null) { result.SeedAcceptedNodeCount = seedNodeSet.AcceptedNodes.Count; result.SeedNodeCount = result.SeedAcceptedNodeCount + seedNodeSet.RejectedNodes.Count; EncodeSeedNodes( seedNodeSet.AcceptedNodes, result, result.IdealAcceptedVector2GroupPath, result.SeedAcceptedVector2GroupPath, oracle); EncodeSeedNodes( seedNodeSet.RejectedNodes, result, result.IdealRejectedVector2GroupPath, result.SeedRejectedVector2GroupPath, oracle); } EncodeTargetNodes(allUppermostNodes, result, oracle); if (fileName != null) { using (var fs = new FileStream(fileName, FileMode.Create, FileAccess.Write)) { formatter.Serialize(fs, result); } } return result.MakeImmutable(); }
public LearningResult Learn( ICollection<string> seedPaths, ICollection<string> codePaths, string searchPattern, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var seedCsts = GenerateValidCsts(seedPaths).ToList(); var seedNodes = seedCsts .SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)) .Where(ProtectedIsAcceptedUsingOracle) .ToList(); var seedCst = seedCsts.First(); var seedCode = seedCst.Code; var structuredCode = new StructuredCode(seedCode); var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes); var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst); SeedNodeSet.Create(acceptingFragments, this); var preparingTime = Environment.TickCount; var extractor = CreateExtractor(); var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this); Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count + " (" + acceptingFragments.Count + ")"); Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count + " (" + rejectingFragments.Count + ")"); var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments); var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">"); var classifier = new Classifier(groupPaths, featureSet); Console.WriteLine( "#Features: " + featureSet.AcceptingFeatureCount + ", " + featureSet.RejectingFeatureCount); Console.WriteLine("Inner: " + extractor.IsInner); var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor, featureSet); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet); Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount); if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet() .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) { var others = encodingResult.IdealRejectedVector2GroupPath; var vector = encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey); foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) { Console.WriteLine(Experiment.Beautify(featureString)); } throw new Exception("Master predicates can't classify elements!"); } var groupCache = new GroupCache(encodingResult, classifier); var trainingSet = encodingResult.CreateTrainingVectorSet(); classifier.Create(trainingSet, groupCache); Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder); Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime)); var count = 0; var sumTime = Environment.TickCount; ClassificationResult classificationResult; while (true) { var time = Environment.TickCount; classificationResult = Classify(count, classifier, groupCache, encodingResult, trainingSet); if (classificationResult.SuspiciousNodes == null) { break; } var additionalAcceptedSet = RevealSuspiciousElements( encodingResult.IdealAcceptedVector2GroupPath.Keys, classificationResult.SuspiciousNodes, encodingResult, trainingSet); if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) { count++; } else { count = 0; } Console.WriteLine("Time: " + (Environment.TickCount - time)); } classifier.MakeImmutable(); Console.WriteLine(); Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime)); var trainingVectorCount = trainingSet.Count; var idealVectorCount = encodingResult.IdealVectorSet.Count; Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount); if (writer != null) { encodingResult.WriteResult(writer, trainingSet); } foreach (var groupPath in classifier.GroupPaths) { Console.WriteLine(groupPath); } classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache); return new LearningResult { ClassificationResult = classificationResult, Classifier = classifier, EncodingResult = encodingResult, FeatureEncoder = featureEncoder, }; }