public void UpdateGroupCache(Classifier classifier) { _vector2GroupIndex = new Dictionary<BigInteger, int>(); foreach (var kv in _encodingResult.IdealAcceptedVector2GroupPath) { var index = classifier.GetGroupIndex(kv.Value); _vector2GroupIndex.Add(kv.Key, index); } foreach (var kv in _encodingResult.IdealRejectedVector2GroupPath) { var index = classifier.GetGroupIndex(kv.Value); _vector2GroupIndex.Add(kv.Key, index); } }
public HashSet<SuspiciousNode> SelectSuspiciousNodes( int count, Classifier classifier, List<List<SuspiciousNode>> acceptAccept, List<List<SuspiciousNode>> acceptReject, List<List<SuspiciousNode>> rejectAccept, List<List<SuspiciousNode>> rejectReject) { var suspiciousNodes = new HashSet<SuspiciousNode>(); switch (count) { case 0: var time1 = Environment.TickCount; suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodes(acceptAccept, classifier)); suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodes(acceptReject, classifier)); suspiciousNodes.AddRange(SelectSuspiciousRejectedNodes(acceptAccept, classifier)); suspiciousNodes.AddRange(SelectSuspiciousRejectedNodes(rejectAccept, classifier)); suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearning(rejectAccept, classifier)); //suspiciousNodes.AddRange(SelectNodesForSlowAcceptanceLearning(rejectAccept)); suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearning(rejectReject, classifier)); //suspiciousNodes.AddRange(SelectNodesForSlowAcceptanceLearning(rejectReject)); suspiciousNodes.AddRange(SelectNodesForFastRejectionLearning(acceptReject, classifier)); //suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearning(acceptReject)); suspiciousNodes.AddRange(SelectNodesForFastRejectionLearning(rejectReject, classifier)); //suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearning(rejectReject)); Console.WriteLine("SelectSuspiciousAcceptedNodes: " + (Environment.TickCount - time1)); break; case 1: var time2 = Environment.TickCount; //suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodesStrongly(acceptAccept)); //suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodesStrongly(acceptReject)); //suspiciousNodes.AddRange(SelectSuspiciousRejectedNodesStrongly(acceptAccept)); //suspiciousNodes.AddRange(SelectSuspiciousRejectedNodesStrongly(rejectAccept)); //suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearningStrongly(rejectAccept)); suspiciousNodes.AddRange( SelectNodesForSlowAcceptanceLearningStrongly(rejectAccept, classifier)); //suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearningStrongly(rejectReject)); suspiciousNodes.AddRange( SelectNodesForSlowAcceptanceLearningStrongly(rejectReject, classifier)); //suspiciousNodes.AddRange(SelectNodesForFastRejectionLearningStrongly(acceptReject)); suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearningStrongly(acceptReject, classifier)); //suspiciousNodes.AddRange(SelectNodesForFastRejectionLearningStrongly(rejectReject)); suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearningStrongly(rejectReject, classifier)); Console.WriteLine("SelectSuspiciousAcceptedNodesStrongly: " + (Environment.TickCount - time2)); break; default: return null; } Console.WriteLine("Suspicious nodes: " + suspiciousNodes.Count); return suspiciousNodes; }
public ClassificationResult Apply( ICollection<string> codePaths, string searchPattern, FeatureEncoder featureEncoder, Classifier classifier, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this); var time = Environment.TickCount; var groupCache = new GroupCache(encodingResult, classifier); var result = Classify(int.MaxValue, classifier, groupCache, encodingResult); Console.WriteLine("Time: " + (Environment.TickCount - time)); if (writer != null) { encodingResult.WriteResult(writer); } return result; }
public ClassificationResult Apply( ICollection <string> codePaths, string searchPattern, FeatureEncoder featureEncoder, Classifier classifier, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this); var time = Environment.TickCount; var groupCache = new GroupCache(encodingResult, classifier); var result = Classify(int.MaxValue, classifier, groupCache, encodingResult); Console.WriteLine("Time: " + (Environment.TickCount - time)); if (writer != null) { encodingResult.WriteResult(writer); } return(result); }
private IEnumerable <SuspiciousNode> SelectSuspiciousElementsWithMaskForFastAcceptanceLearning( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, BigInteger xor, BigInteger mask, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var vector = BigInteger.Zero; var classifierUnit = classifier.Units[i]; var candidates = candidateNodesLists[i] .OrderBy(t => LearningExperimentUtil.CountBits(classifierUnit.Accepting & t.Vector)) .ToList(); foreach (var candidate in candidates) { var newVector = (vector | (candidate.Vector ^ xor)) & mask; if (newVector != vector) { vector = newVector; yield return(candidate); } } } }
public IEnumerable <SuspiciousNode> SelectNodesForFastRejectionLearningStrongly( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var rejectingVector = classifier.Units[i].Rejecting; foreach (var candidate in candidates) { candidate.BitsCount = LearningExperimentUtil.CountBits( (candidate.Vector & _rejectingFeatureBitMask) | rejectingVector); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var vector = _rejectingFeatureBitMask; var count = DetermineStrongCount(i, classifier); for (int j = candidates.Count - 1; j >= 0; j--) { var candidate = candidates[j]; if (candidate.Used) { continue; } var newVector = vector & candidate.Vector; if (newVector == vector) { continue; } vector = newVector; yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
public IEnumerable <SuspiciousNode> SelectNodesForSlowAcceptanceLearningStrongly( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var acceptingVector = classifier.Units[i].Accepting; foreach (var target in candidates) { target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var vector = BigInteger.Zero; var count = DetermineStrongCount(i, classifier); for (int j = candidates.Count - 1; j >= 0; j--) { var candidate = candidates[j]; if (candidate.Used) { continue; } var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask; if (newVector == vector) { continue; } vector = newVector; yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
public IEnumerable <SuspiciousNode> SelectSuspiciousAcceptedNodesStrongly( IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; foreach (var candidate in candidates) { candidate.BitsCount = LearningExperimentUtil.CountBits(candidate.Vector & _acceptingFeatureBitMask); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var vector = BigInteger.Zero; var count = DetermineStrongCount(i, classifier); foreach (var candidate in candidates) { if (candidate.Used) { continue; } var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask; if (newVector == vector) { continue; } vector = newVector; yield return(candidate); candidate.Used = true; if (--count == 0) { break; } } } }
public IEnumerable<SuspiciousNode> SelectSuspiciousAcceptedNodes( IReadOnlyList<List<SuspiciousNode>> candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; foreach (var candidate in candidates) { candidate.BitsCount = LearningExperimentUtil.CountBits(candidate.Vector & _acceptingFeatureBitMask); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var count = DetermineCount(classifier, i); foreach (var candidate in candidates) { if (candidate.Used) { continue; } yield return candidate; candidate.Used = true; if (--count == 0) { break; } } } }
private IEnumerable<SuspiciousNode> SelectSuspiciousElementsWithMaskForFastRejectionLearning( List<List<SuspiciousNode>> candidateNodesLists, BigInteger xor, BigInteger mask, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var vector = BigInteger.Zero; var classifierUnit = classifier.Units[i]; var candidates = candidateNodesLists[i] .OrderBy( t => LearningExperimentUtil.CountBits( classifierUnit.Accepting & ((t.Vector & _rejectingFeatureBitMask) ^ _rejectingFeatureBitMask))) .ToList(); foreach (var candidate in candidates) { var newVector = (vector | (candidate.Vector ^ xor)) & mask; if (newVector == vector) { continue; } vector = newVector; yield return candidate; } } }
private static int DetermineStrongCount(int groupIndex, Classifier classifier) { return classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount ? StronglyTargetCount : StronglyTargetCount / 2; }
private static int DetermineCount(Classifier classifier, int groupIndex) { return classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount ? TargetCount : TargetCount / 2; }
public LearningResult Learn( ICollection<string> seedPaths, ICollection<string> codePaths, string searchPattern, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var seedCsts = GenerateValidCsts(seedPaths).ToList(); var seedNodes = seedCsts .SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)) .Where(ProtectedIsAcceptedUsingOracle) .ToList(); var seedCst = seedCsts.First(); var seedCode = seedCst.Code; var structuredCode = new StructuredCode(seedCode); var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes); var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst); SeedNodeSet.Create(acceptingFragments, this); var preparingTime = Environment.TickCount; var extractor = CreateExtractor(); var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this); Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count + " (" + acceptingFragments.Count + ")"); Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count + " (" + rejectingFragments.Count + ")"); var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments); var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">"); var classifier = new Classifier(groupPaths, featureSet); Console.WriteLine( "#Features: " + featureSet.AcceptingFeatureCount + ", " + featureSet.RejectingFeatureCount); Console.WriteLine("Inner: " + extractor.IsInner); var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor, featureSet); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet); Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount); if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet() .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) { var others = encodingResult.IdealRejectedVector2GroupPath; var vector = encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey); foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) { Console.WriteLine(Experiment.Beautify(featureString)); } throw new Exception("Master predicates can't classify elements!"); } var groupCache = new GroupCache(encodingResult, classifier); var trainingSet = encodingResult.CreateTrainingVectorSet(); classifier.Create(trainingSet, groupCache); Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder); Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime)); var count = 0; var sumTime = Environment.TickCount; ClassificationResult classificationResult; while (true) { var time = Environment.TickCount; classificationResult = Classify(count, classifier, groupCache, encodingResult, trainingSet); if (classificationResult.SuspiciousNodes == null) { break; } var additionalAcceptedSet = RevealSuspiciousElements( encodingResult.IdealAcceptedVector2GroupPath.Keys, classificationResult.SuspiciousNodes, encodingResult, trainingSet); if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) { count++; } else { count = 0; } Console.WriteLine("Time: " + (Environment.TickCount - time)); } classifier.MakeImmutable(); Console.WriteLine(); Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime)); var trainingVectorCount = trainingSet.Count; var idealVectorCount = encodingResult.IdealVectorSet.Count; Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount); if (writer != null) { encodingResult.WriteResult(writer, trainingSet); } foreach (var groupPath in classifier.GroupPaths) { Console.WriteLine(groupPath); } classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache); return new LearningResult { ClassificationResult = classificationResult, Classifier = classifier, EncodingResult = encodingResult, FeatureEncoder = featureEncoder, }; }
public IEnumerable<SuspiciousNode> SelectNodesForSlowRejectionLearningStrongly( IReadOnlyList<List<SuspiciousNode>> candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var rejectingUnit = classifier.Units[i].Rejecting; foreach (var target in candidates) { target.BitsCount = LearningExperimentUtil.CountBits( (target.Vector & _rejectingFeatureBitMask) | rejectingUnit); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var vector = _rejectingFeatureBitMask; var count = DetermineStrongCount(i, classifier); foreach (var candidate in candidates) { if (!candidate.Used) { continue; } var newVector = vector & candidate.Vector; if (newVector == vector) { continue; } vector = newVector; yield return candidate; candidate.Used = true; if (--count == 0) { break; } } } }
public IEnumerable<SuspiciousNode> SelectNodesForSlowAcceptanceLearningStrongly( IReadOnlyList<List<SuspiciousNode>> candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var acceptingVector = classifier.Units[i].Accepting; foreach (var target in candidates) { target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var vector = BigInteger.Zero; var count = DetermineStrongCount(i, classifier); for (int j = candidates.Count - 1; j >= 0; j--) { var candidate = candidates[j]; if (candidate.Used) { continue; } var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask; if (newVector == vector) { continue; } vector = newVector; yield return candidate; candidate.Used = true; if (--count == 0) { break; } } } }
public LearningResult Learn( ICollection <string> seedPaths, ICollection <string> codePaths, string searchPattern, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var seedCsts = GenerateValidCsts(seedPaths).ToList(); var seedNodes = seedCsts .SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)) .Where(ProtectedIsAcceptedUsingOracle) .ToList(); var seedCst = seedCsts.First(); var seedCode = seedCst.Code; var structuredCode = new StructuredCode(seedCode); var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes); var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst); SeedNodeSet.Create(acceptingFragments, this); var preparingTime = Environment.TickCount; var extractor = CreateExtractor(); var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this); Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count + " (" + acceptingFragments.Count + ")"); Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count + " (" + rejectingFragments.Count + ")"); var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments); var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">"); var classifier = new Classifier(groupPaths, featureSet); Console.WriteLine( "#Features: " + featureSet.AcceptingFeatureCount + ", " + featureSet.RejectingFeatureCount); Console.WriteLine("Inner: " + extractor.IsInner); var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor, featureSet); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet); Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount); if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet() .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) { var others = encodingResult.IdealRejectedVector2GroupPath; var vector = encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey); foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) { Console.WriteLine(Experiment.Beautify(featureString)); } throw new Exception("Master predicates can't classify elements!"); } var groupCache = new GroupCache(encodingResult, classifier); var trainingSet = encodingResult.CreateTrainingVectorSet(); classifier.Create(trainingSet, groupCache); Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder); Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime)); var count = 0; var sumTime = Environment.TickCount; ClassificationResult classificationResult; while (true) { var time = Environment.TickCount; classificationResult = Classify(count, classifier, groupCache, encodingResult, trainingSet); if (classificationResult.SuspiciousNodes == null) { break; } var additionalAcceptedSet = RevealSuspiciousElements( encodingResult.IdealAcceptedVector2GroupPath.Keys, classificationResult.SuspiciousNodes, encodingResult, trainingSet); if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) { count++; } else { count = 0; } Console.WriteLine("Time: " + (Environment.TickCount - time)); } classifier.MakeImmutable(); Console.WriteLine(); Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime)); var trainingVectorCount = trainingSet.Count; var idealVectorCount = encodingResult.IdealVectorSet.Count; Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount); if (writer != null) { encodingResult.WriteResult(writer, trainingSet); } foreach (var groupPath in classifier.GroupPaths) { Console.WriteLine(groupPath); } classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache); return(new LearningResult { ClassificationResult = classificationResult, Classifier = classifier, EncodingResult = encodingResult, FeatureEncoder = featureEncoder, }); }
public ClassificationResult Classify( int count, Classifier classifier, GroupCache groupCache, EncodingResult encodingResult, RevealedVectorSet trainingSet = null) { trainingSet = trainingSet ?? new RevealedVectorSet(); var correctlyAccepted = 0; var correctlyRejected = 0; var wronglyAccepted = 0; var wronglyRejected = 0; var correctlyRejectedInRejecting = 0; var wronglyRejectedInRejecting = 0; var rejectAccept = new List <List <SuspiciousNode> >(); var rejectReject = new List <List <SuspiciousNode> >(); var acceptAccept = new List <List <SuspiciousNode> >(); var acceptReject = new List <List <SuspiciousNode> >(); for (int i = 0; i < classifier.GroupCount; i++) { rejectAccept.Add(new List <SuspiciousNode>()); rejectReject.Add(new List <SuspiciousNode>()); acceptAccept.Add(new List <SuspiciousNode>()); acceptReject.Add(new List <SuspiciousNode>()); } var wrongNodeCount = 0; var wronglyRejectedFeatures = new List <BigInteger>(); var wronglyAcceptedFeatures = new List <BigInteger>(); foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Accepted) { var vector = vectorAndGroupPath.Key; var groupPath = vectorAndGroupPath.Value; var groupIndex = groupCache.GetGroupIndex(vector); var rejected = classifier.IsRejected(vector, groupIndex); var accepted = classifier.IsAccepted(vector, groupIndex); if (!trainingSet.Accepted.ContainsKey(vector) && !trainingSet.Rejected.ContainsKey(vector)) { var target = new SuspiciousNode { Vector = vector, GroupKey = groupPath, Used = false, }; if (accepted) { if (!rejected) { acceptAccept[groupIndex].Add(target); } else { acceptReject[groupIndex].Add(target); } } else { if (!rejected) { rejectAccept[groupIndex].Add(target); } else { rejectReject[groupIndex].Add(target); } } } if (!accepted) { wronglyRejected++; wrongNodeCount += encodingResult.Vector2Count[vector]; wronglyRejectedFeatures.Add(vector); } else if (!rejected) { correctlyAccepted++; } else { wronglyRejectedInRejecting++; wrongNodeCount += encodingResult.Vector2Count[vector]; wronglyRejectedFeatures.Add(vector); } } foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Rejected) { var vector = vectorAndGroupPath.Key; var groupPath = vectorAndGroupPath.Value; var groupIndex = groupCache.GetGroupIndex(vector); var rejected = classifier.IsRejected(vector, groupIndex); var accepted = classifier.IsAccepted(vector, groupIndex); if (!trainingSet.Accepted.ContainsKey(vector) && !trainingSet.Rejected.ContainsKey(vector)) { var target = new SuspiciousNode { Vector = vector, GroupKey = groupPath, Used = false, }; if (accepted) { if (!rejected) { acceptAccept[groupIndex].Add(target); } else { acceptReject[groupIndex].Add(target); } } else { if (!rejected) { rejectAccept[groupIndex].Add(target); } else { rejectReject[groupIndex].Add(target); } } } if (!accepted) { correctlyRejected++; } else if (!rejected) { wronglyAccepted++; wrongNodeCount += encodingResult.Vector2Count[vector]; wronglyAcceptedFeatures.Add(vector); } else { correctlyRejectedInRejecting++; } } Console.WriteLine("done"); Console.WriteLine( "WA: " + wronglyAccepted + ", WR: " + wronglyRejected + "/" + wronglyRejectedInRejecting + ", CA: " + correctlyAccepted + ", CR: " + correctlyRejected + "/" + correctlyRejectedInRejecting); Console.WriteLine( "TR: " + trainingSet.Count + ", AF: " + String.Join(", ", classifier.CountAcceptingFeatures()) + ", RF: " + String.Join(", ", classifier.CountRejectingFeatures())); var wrongFeatureCount = wronglyAccepted + wronglyRejected + wronglyRejectedInRejecting; var selector = new SuspiciousNodeSelector(classifier.AcceptingFeatureBitMask, classifier.RejectingFeatureBitMask); var suspiciousNodes = selector.SelectSuspiciousNodes(count, classifier, acceptAccept, acceptReject, rejectAccept, rejectReject); return(new ClassificationResult(suspiciousNodes, wronglyAcceptedFeatures, wronglyRejectedFeatures, wrongFeatureCount, wrongNodeCount, encodingResult)); }
private static int DetermineStrongCount(int groupIndex, Classifier classifier) { return(classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount ? StronglyTargetCount : StronglyTargetCount / 2); }
private static int DetermineCount(Classifier classifier, int groupIndex) { return(classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount ? TargetCount : TargetCount / 2); }
public GroupCache(EncodingResult encodingResult, Classifier classifier) { _encodingResult = encodingResult; UpdateGroupCache(classifier); }
public IEnumerable<SuspiciousNode> SelectNodesForFastRejectionLearning( IReadOnlyList<List<SuspiciousNode>> candidateNodesLists, Classifier classifier) { for (int i = 0; i < candidateNodesLists.Count; i++) { var candidates = candidateNodesLists[i]; var rejectingVector = classifier.Units[i].Rejecting; foreach (var cnadidate in candidates) { cnadidate.BitsCount = LearningExperimentUtil.CountBits( (cnadidate.Vector & _rejectingFeatureBitMask) | rejectingVector); } candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount)); var count = DetermineCount(classifier, i); for (int j = candidates.Count - 1; j >= 0; j--) { var candidate = candidates[j]; if (candidate.Used) { continue; } yield return candidate; candidate.Used = true; if (--count == 0) { break; } } } }
public ClassificationResult Classify( int count, Classifier classifier, GroupCache groupCache, EncodingResult encodingResult, RevealedVectorSet trainingSet = null) { trainingSet = trainingSet ?? new RevealedVectorSet(); var correctlyAccepted = 0; var correctlyRejected = 0; var wronglyAccepted = 0; var wronglyRejected = 0; var correctlyRejectedInRejecting = 0; var wronglyRejectedInRejecting = 0; var rejectAccept = new List<List<SuspiciousNode>>(); var rejectReject = new List<List<SuspiciousNode>>(); var acceptAccept = new List<List<SuspiciousNode>>(); var acceptReject = new List<List<SuspiciousNode>>(); for (int i = 0; i < classifier.GroupCount; i++) { rejectAccept.Add(new List<SuspiciousNode>()); rejectReject.Add(new List<SuspiciousNode>()); acceptAccept.Add(new List<SuspiciousNode>()); acceptReject.Add(new List<SuspiciousNode>()); } var wrongNodeCount = 0; var wronglyRejectedFeatures = new List<BigInteger>(); var wronglyAcceptedFeatures = new List<BigInteger>(); foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Accepted) { var vector = vectorAndGroupPath.Key; var groupPath = vectorAndGroupPath.Value; var groupIndex = groupCache.GetGroupIndex(vector); var rejected = classifier.IsRejected(vector, groupIndex); var accepted = classifier.IsAccepted(vector, groupIndex); if (!trainingSet.Accepted.ContainsKey(vector) && !trainingSet.Rejected.ContainsKey(vector)) { var target = new SuspiciousNode { Vector = vector, GroupKey = groupPath, Used = false, }; if (accepted) { if (!rejected) { acceptAccept[groupIndex].Add(target); } else { acceptReject[groupIndex].Add(target); } } else { if (!rejected) { rejectAccept[groupIndex].Add(target); } else { rejectReject[groupIndex].Add(target); } } } if (!accepted) { wronglyRejected++; wrongNodeCount += encodingResult.Vector2Count[vector]; wronglyRejectedFeatures.Add(vector); } else if (!rejected) { correctlyAccepted++; } else { wronglyRejectedInRejecting++; wrongNodeCount += encodingResult.Vector2Count[vector]; wronglyRejectedFeatures.Add(vector); } } foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Rejected) { var vector = vectorAndGroupPath.Key; var groupPath = vectorAndGroupPath.Value; var groupIndex = groupCache.GetGroupIndex(vector); var rejected = classifier.IsRejected(vector, groupIndex); var accepted = classifier.IsAccepted(vector, groupIndex); if (!trainingSet.Accepted.ContainsKey(vector) && !trainingSet.Rejected.ContainsKey(vector)) { var target = new SuspiciousNode { Vector = vector, GroupKey = groupPath, Used = false, }; if (accepted) { if (!rejected) { acceptAccept[groupIndex].Add(target); } else { acceptReject[groupIndex].Add(target); } } else { if (!rejected) { rejectAccept[groupIndex].Add(target); } else { rejectReject[groupIndex].Add(target); } } } if (!accepted) { correctlyRejected++; } else if (!rejected) { wronglyAccepted++; wrongNodeCount += encodingResult.Vector2Count[vector]; wronglyAcceptedFeatures.Add(vector); } else { correctlyRejectedInRejecting++; } } Console.WriteLine("done"); Console.WriteLine( "WA: " + wronglyAccepted + ", WR: " + wronglyRejected + "/" + wronglyRejectedInRejecting + ", CA: " + correctlyAccepted + ", CR: " + correctlyRejected + "/" + correctlyRejectedInRejecting); Console.WriteLine( "TR: " + trainingSet.Count + ", AF: " + String.Join(", ", classifier.CountAcceptingFeatures()) + ", RF: " + String.Join(", ", classifier.CountRejectingFeatures())); var wrongFeatureCount = wronglyAccepted + wronglyRejected + wronglyRejectedInRejecting; var selector = new SuspiciousNodeSelector(classifier.AcceptingFeatureBitMask, classifier.RejectingFeatureBitMask); var suspiciousNodes = selector.SelectSuspiciousNodes(count, classifier, acceptAccept, acceptReject, rejectAccept, rejectReject); return new ClassificationResult(suspiciousNodes, wronglyAcceptedFeatures, wronglyRejectedFeatures, wrongFeatureCount, wrongNodeCount, encodingResult); }