private IEnumerable <SuspiciousNode> SelectSuspiciousElementsWithMaskForFastRejectionLearning( List <List <SuspiciousNode> > candidateNodesLists, BigInteger xor, BigInteger mask, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var vector = BigInteger.Zero; var classifierUnit = classifier.Units[i]; var candidates = candidateNodesLists[i] .OrderBy( t => LearningExperimentUtil.CountBits( classifierUnit.Accepting & ((t.Vector & _rejectingFeatureBitMask) ^ _rejectingFeatureBitMask))) .ToList(); foreach (var candidate in candidates) { var newVector = (vector | (candidate.Vector ^ xor)) & mask; if (newVector == vector) { continue; } vector = newVector; yield return(candidate); } } }
public IEnumerable <SuspiciousNode> SelectSuspiciousAcceptedNodes( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; foreach (var candidate in candidates) { candidate.BitsCount = LearningExperimentUtil.CountBits(candidate.Vector & _acceptingFeatureBitMask); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var count = DetermineCount(classifier, i); foreach (var candidate in candidates) { if (candidate.Used) { continue; } yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
private IEnumerable <SuspiciousNode> SelectSuspiciousElementsWithMaskWithSmallGrowing( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, BigInteger xor, BigInteger mask) { for (int i = 0; i < candidateNodesLists.Count; i++) { var vector = BigInteger.Zero; var candidates = candidateNodesLists[i]; while (true) { var minDiffCount = int.MaxValue; SuspiciousNode newNode = null; foreach (var candidate in candidates) { var newVector = (vector | (candidate.Vector ^ xor)) & mask; var diff = newVector ^ vector; var diffCount = LearningExperimentUtil.CountBits(diff); if (diffCount > 0 && minDiffCount > diffCount) { minDiffCount = diffCount; vector = newVector; newNode = candidate; } } if (newNode != null) { yield return(newNode); } else { break; } } } }
public IEnumerable <SuspiciousNode> SelectNodesForFastRejectionLearning( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var rejectingVector = classifier.Units[i].Rejecting; foreach (var cnadidate in candidates) { cnadidate.BitsCount = LearningExperimentUtil.CountBits( (cnadidate.Vector & _rejectingFeatureBitMask) | rejectingVector); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var count = DetermineCount(classifier, i); for (int j = candidates.Count - 1; j >= 0; j--) { var candidate = candidates[j]; if (candidate.Used) { continue; } yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
public IEnumerable <SuspiciousNode> SelectNodesForSlowAcceptanceLearning( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var acceptingVector = classifier.Units[i].Accepting; foreach (var target in candidates) { target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var count = DetermineCount(classifier, i); for (int j = candidates.Count - 1; j >= 0; j--) { var candidate = candidates[j]; if (candidate.Used) { continue; } yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
/// <summary> /// Update SurroundingRange, TargetRange, and Node properties then return the last index of the code processed. /// </summary> /// <param name="structuredCode">The structured code processed</param> /// <param name="cst">The concrete syntax tree</param> /// <param name="fragments"></param> /// <returns>The updated last index of the code processed</returns> public static List <SeedNode> ConstructAcceptingFragments(StructuredCode structuredCode, CstNode cst, IList <SelectedFragment> fragments) { var seedNodes = CreateSeedNodes(structuredCode, cst, fragments); var uppermostSeedAcceptedNodes = seedNodes .Select(node => node.Node.AncestorWithSingleChild()) .ToImmutableHashSet(); // We can select multiple nodes in corresponding to a fragment selected by a user // and it means that we have multiple choices for selecting node names to filter nodes // This code tries to select good node names to not filter nodes wanted by a user var selectedNodeNames = LearningExperimentUtil.FindGoodNodeNames(uppermostSeedAcceptedNodes) .ToImmutableHashSet(); foreach (var seedNode in seedNodes) { // Update the node in corresponding to the selected node names keeping the code range of the node seedNode.Node = seedNode.Node.DescendantsOfSingleAndSelf() .First(e => selectedNodeNames.Contains(e.Name)); var rootNode = seedNode.SurroundingRange.FindInnermostNode(cst); var node = seedNode.Node; var path = node.Name; while ((node = node.Parent) != rootNode) { path = path + "<" + node.Name + node.RuleId; } seedNode.Path = path; } return(seedNodes); }
public EncodingResult Encode( ICollection <string> codePaths, IEnumerable <CstNode> allCsts, LearningExperiment oracle, SeedNodeSet seedNodeSet = null) { var fileName = codePaths.Count > 0 ? string.Join(",", codePaths).GetHashCode() + "_" + (codePaths.First() + "," + codePaths.Last() + ",").GetHashCode() + "_" + codePaths.Count + ".encoded" : null; var formatter = new BinaryFormatter(); if (fileName != null && File.Exists(fileName)) { using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read)) { try { var ret = ((EncodingResult)formatter.Deserialize(fs)).MakeImmutable(); Console.WriteLine("############### Warning ###############"); Console.WriteLine("Cache file of encoded result is used."); Console.WriteLine("#######################################"); return(ret); } catch (Exception e) { Console.Error.WriteLine(e); } } } var allUppermostNodes = allCsts.SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, _selectedNodeNames)); var result = new EncodingResult(); if (seedNodeSet != null) { result.SeedAcceptedNodeCount = seedNodeSet.AcceptedNodes.Count; result.SeedNodeCount = result.SeedAcceptedNodeCount + seedNodeSet.RejectedNodes.Count; EncodeSeedNodes( seedNodeSet.AcceptedNodes, result, result.IdealAcceptedVector2GroupPath, result.SeedAcceptedVector2GroupPath, oracle); EncodeSeedNodes( seedNodeSet.RejectedNodes, result, result.IdealRejectedVector2GroupPath, result.SeedRejectedVector2GroupPath, oracle); } EncodeTargetNodes(allUppermostNodes, result, oracle); if (fileName != null) { using (var fs = new FileStream(fileName, FileMode.Create, FileAccess.Write)) { formatter.Serialize(fs, result); } } return(result.MakeImmutable()); }
private void UpdateVector2GroupPath( IDictionary <BigInteger, string> vector2GroupPath, BigInteger vector, CstNode node) { var groupPath = GetGroupPathFromNode(node); var existingGroupPath = vector2GroupPath.GetValueOrDefault(vector); if (existingGroupPath == null) { vector2GroupPath.Add(vector, groupPath); } else { vector2GroupPath[vector] = LearningExperimentUtil.GetCommonSuffix( existingGroupPath, groupPath); } }
public IEnumerable <SuspiciousNode> SelectNodesForSlowRejectionLearningStrongly( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var rejectingUnit = classifier.Units[i].Rejecting; foreach (var target in candidates) { target.BitsCount = LearningExperimentUtil.CountBits( (target.Vector & _rejectingFeatureBitMask) | rejectingUnit); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var vector = _rejectingFeatureBitMask; var count = DetermineStrongCount(i, classifier); foreach (var candidate in candidates) { if (!candidate.Used) { continue; } var newVector = vector & candidate.Vector; if (newVector == vector) { continue; } vector = newVector; yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
private SuspiciousNode SelectMostDifferentElement( IEnumerable <BigInteger> existings, IEnumerable <SuspiciousNode> candidates, BigInteger mask) { if (!existings.Any()) { return(candidates.FirstOrDefault()); } var maxDiff = 0; SuspiciousNode ret = null; foreach (var candidate in candidates) { var vector = candidate.Vector & mask; var diff = existings.Min(f => LearningExperimentUtil.CountBits((f & mask) ^ vector)); if (maxDiff < diff) { maxDiff = diff; ret = candidate; } } return(ret); }
public IEnumerable <SuspiciousNode> SelectNodesForFastAcceptanceLearningStrongly( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var acceptingVector = classifier.Units[i].Accepting; foreach (var target in candidates) { target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var vector = BigInteger.Zero; var count = DetermineStrongCount(i, classifier); foreach (var candidate in candidates) { if (candidate.Used) { continue; } var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask; if (newVector == vector) { continue; } vector = newVector; yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
public LearningResult Learn( ICollection <string> seedPaths, ICollection <string> codePaths, string searchPattern, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var seedCsts = GenerateValidCsts(seedPaths).ToList(); var seedNodes = seedCsts .SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)) .Where(ProtectedIsAcceptedUsingOracle) .ToList(); var seedCst = seedCsts.First(); var seedCode = seedCst.Code; var structuredCode = new StructuredCode(seedCode); var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes); var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst); SeedNodeSet.Create(acceptingFragments, this); var preparingTime = Environment.TickCount; var extractor = CreateExtractor(); var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this); Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count + " (" + acceptingFragments.Count + ")"); Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count + " (" + rejectingFragments.Count + ")"); var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments); var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">"); var classifier = new Classifier(groupPaths, featureSet); Console.WriteLine( "#Features: " + featureSet.AcceptingFeatureCount + ", " + featureSet.RejectingFeatureCount); Console.WriteLine("Inner: " + extractor.IsInner); var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor, featureSet); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet); Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount); if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet() .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) { var others = encodingResult.IdealRejectedVector2GroupPath; var vector = encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey); foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) { Console.WriteLine(Experiment.Beautify(featureString)); } throw new Exception("Master predicates can't classify elements!"); } var groupCache = new GroupCache(encodingResult, classifier); var trainingSet = encodingResult.CreateTrainingVectorSet(); classifier.Create(trainingSet, groupCache); Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder); Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime)); var count = 0; var sumTime = Environment.TickCount; ClassificationResult classificationResult; while (true) { var time = Environment.TickCount; classificationResult = Classify(count, classifier, groupCache, encodingResult, trainingSet); if (classificationResult.SuspiciousNodes == null) { break; } var additionalAcceptedSet = RevealSuspiciousElements( encodingResult.IdealAcceptedVector2GroupPath.Keys, classificationResult.SuspiciousNodes, encodingResult, trainingSet); if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) { count++; } else { count = 0; } Console.WriteLine("Time: " + (Environment.TickCount - time)); } classifier.MakeImmutable(); Console.WriteLine(); Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime)); var trainingVectorCount = trainingSet.Count; var idealVectorCount = encodingResult.IdealVectorSet.Count; Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount); if (writer != null) { encodingResult.WriteResult(writer, trainingSet); } foreach (var groupPath in classifier.GroupPaths) { Console.WriteLine(groupPath); } classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache); return(new LearningResult { ClassificationResult = classificationResult, Classifier = classifier, EncodingResult = encodingResult, FeatureEncoder = featureEncoder, }); }
public int CountUsingOracle(CstNode cst) { return(LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames) .Count(IsAcceptedUsingOracle)); }
public IEnumerable <int> CountRejectingFeatures() { return(Units.Select(c => LearningExperimentUtil.CountBits(c.Rejecting >> AcceptingFeatureCount))); }
public IEnumerable <int> CountAcceptingFeatures() { return(Units.Select(c => LearningExperimentUtil.CountBits(c.Accepting & AcceptingFeatureBitMask))); }