Exemple #1
0
		public void UpdateGroupCache(Classifier classifier) {
			_vector2GroupIndex = new Dictionary<BigInteger, int>();
			foreach (var kv in _encodingResult.IdealAcceptedVector2GroupPath) {
				var index = classifier.GetGroupIndex(kv.Value);
				_vector2GroupIndex.Add(kv.Key, index);
			}
			foreach (var kv in _encodingResult.IdealRejectedVector2GroupPath) {
				var index = classifier.GetGroupIndex(kv.Value);
				_vector2GroupIndex.Add(kv.Key, index);
			}
		}
		public HashSet<SuspiciousNode> SelectSuspiciousNodes(
				int count, Classifier classifier, List<List<SuspiciousNode>> acceptAccept,
				List<List<SuspiciousNode>> acceptReject, List<List<SuspiciousNode>> rejectAccept,
				List<List<SuspiciousNode>> rejectReject) {
			var suspiciousNodes = new HashSet<SuspiciousNode>();

			switch (count) {
			case 0:
				var time1 = Environment.TickCount;
				suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodes(acceptAccept, classifier));
				suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodes(acceptReject, classifier));
				suspiciousNodes.AddRange(SelectSuspiciousRejectedNodes(acceptAccept, classifier));
				suspiciousNodes.AddRange(SelectSuspiciousRejectedNodes(rejectAccept, classifier));

				suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearning(rejectAccept, classifier));
				//suspiciousNodes.AddRange(SelectNodesForSlowAcceptanceLearning(rejectAccept));
				suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearning(rejectReject, classifier));
				//suspiciousNodes.AddRange(SelectNodesForSlowAcceptanceLearning(rejectReject));

				suspiciousNodes.AddRange(SelectNodesForFastRejectionLearning(acceptReject, classifier));
				//suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearning(acceptReject));
				suspiciousNodes.AddRange(SelectNodesForFastRejectionLearning(rejectReject, classifier));
				//suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearning(rejectReject));
				Console.WriteLine("SelectSuspiciousAcceptedNodes: " + (Environment.TickCount - time1));
				break;
			case 1:
				var time2 = Environment.TickCount;
				//suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodesStrongly(acceptAccept));
				//suspiciousNodes.AddRange(SelectSuspiciousAcceptedNodesStrongly(acceptReject));
				//suspiciousNodes.AddRange(SelectSuspiciousRejectedNodesStrongly(acceptAccept));
				//suspiciousNodes.AddRange(SelectSuspiciousRejectedNodesStrongly(rejectAccept));

				//suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearningStrongly(rejectAccept));
				suspiciousNodes.AddRange(
						SelectNodesForSlowAcceptanceLearningStrongly(rejectAccept, classifier));
				//suspiciousNodes.AddRange(SelectNodesForFastAcceptanceLearningStrongly(rejectReject));
				suspiciousNodes.AddRange(
						SelectNodesForSlowAcceptanceLearningStrongly(rejectReject, classifier));

				//suspiciousNodes.AddRange(SelectNodesForFastRejectionLearningStrongly(acceptReject));
				suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearningStrongly(acceptReject, classifier));
				//suspiciousNodes.AddRange(SelectNodesForFastRejectionLearningStrongly(rejectReject));
				suspiciousNodes.AddRange(SelectNodesForSlowRejectionLearningStrongly(rejectReject, classifier));
				Console.WriteLine("SelectSuspiciousAcceptedNodesStrongly: " + (Environment.TickCount - time2));
				break;
			default:
				return null;
			}
			Console.WriteLine("Suspicious nodes: " + suspiciousNodes.Count);
			return suspiciousNodes;
		}
        public ClassificationResult Apply(
                ICollection<string> codePaths, string searchPattern, FeatureEncoder featureEncoder,
                Classifier classifier, StreamWriter writer = null) {
            var allCsts = GenerateValidCsts(codePaths);
            var encodingResult = featureEncoder.Encode(codePaths, allCsts, this);

            var time = Environment.TickCount;
            var groupCache = new GroupCache(encodingResult, classifier);
            var result = Classify(int.MaxValue, classifier, groupCache, encodingResult);
            Console.WriteLine("Time: " + (Environment.TickCount - time));

            if (writer != null) {
                encodingResult.WriteResult(writer);
            }

            return result;
        }
Exemple #4
0
        public ClassificationResult Apply(
            ICollection <string> codePaths, string searchPattern, FeatureEncoder featureEncoder,
            Classifier classifier, StreamWriter writer = null)
        {
            var allCsts        = GenerateValidCsts(codePaths);
            var encodingResult = featureEncoder.Encode(codePaths, allCsts, this);

            var time       = Environment.TickCount;
            var groupCache = new GroupCache(encodingResult, classifier);
            var result     = Classify(int.MaxValue, classifier, groupCache, encodingResult);

            Console.WriteLine("Time: " + (Environment.TickCount - time));

            if (writer != null)
            {
                encodingResult.WriteResult(writer);
            }

            return(result);
        }
 private IEnumerable <SuspiciousNode> SelectSuspiciousElementsWithMaskForFastAcceptanceLearning(
     IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, BigInteger xor, BigInteger mask,
     Classifier classifier)
 {
     for (int i = 0; i < candidateNodesLists.Count; i++)
     {
         var vector         = BigInteger.Zero;
         var classifierUnit = classifier.Units[i];
         var candidates     = candidateNodesLists[i]
                              .OrderBy(t => LearningExperimentUtil.CountBits(classifierUnit.Accepting & t.Vector))
                              .ToList();
         foreach (var candidate in candidates)
         {
             var newVector = (vector | (candidate.Vector ^ xor)) & mask;
             if (newVector != vector)
             {
                 vector = newVector;
                 yield return(candidate);
             }
         }
     }
 }
        public IEnumerable <SuspiciousNode> SelectNodesForFastRejectionLearningStrongly(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates      = candidateNodesLists[i];
                var rejectingVector = classifier.Units[i].Rejecting;
                foreach (var candidate in candidates)
                {
                    candidate.BitsCount = LearningExperimentUtil.CountBits(
                        (candidate.Vector & _rejectingFeatureBitMask) | rejectingVector);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var vector = _rejectingFeatureBitMask;
                var count  = DetermineStrongCount(i, classifier);
                for (int j = candidates.Count - 1; j >= 0; j--)
                {
                    var candidate = candidates[j];
                    if (candidate.Used)
                    {
                        continue;
                    }
                    var newVector = vector & candidate.Vector;
                    if (newVector == vector)
                    {
                        continue;
                    }
                    vector = newVector;
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
        public IEnumerable <SuspiciousNode> SelectNodesForSlowAcceptanceLearningStrongly(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates      = candidateNodesLists[i];
                var acceptingVector = classifier.Units[i].Accepting;
                foreach (var target in candidates)
                {
                    target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var vector = BigInteger.Zero;
                var count  = DetermineStrongCount(i, classifier);
                for (int j = candidates.Count - 1; j >= 0; j--)
                {
                    var candidate = candidates[j];
                    if (candidate.Used)
                    {
                        continue;
                    }
                    var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask;
                    if (newVector == vector)
                    {
                        continue;
                    }
                    vector = newVector;
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
        public IEnumerable <SuspiciousNode> SelectSuspiciousAcceptedNodesStrongly(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates = candidateNodesLists[i];
                foreach (var candidate in candidates)
                {
                    candidate.BitsCount =
                        LearningExperimentUtil.CountBits(candidate.Vector & _acceptingFeatureBitMask);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var vector = BigInteger.Zero;
                var count  = DetermineStrongCount(i, classifier);
                foreach (var candidate in candidates)
                {
                    if (candidate.Used)
                    {
                        continue;
                    }
                    var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask;
                    if (newVector == vector)
                    {
                        continue;
                    }
                    vector = newVector;
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
		public IEnumerable<SuspiciousNode> SelectSuspiciousAcceptedNodes(
				IReadOnlyList<List<SuspiciousNode>> candidateNodesLists,
				Classifier classifier) {
			for (int i = 0; i < candidateNodesLists.Count; i++) {
				var candidates = candidateNodesLists[i];
				foreach (var candidate in candidates) {
					candidate.BitsCount =
							LearningExperimentUtil.CountBits(candidate.Vector & _acceptingFeatureBitMask);
				}
				candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
				var count = DetermineCount(classifier, i);
				foreach (var candidate in candidates) {
					if (candidate.Used) {
						continue;
					}
					yield return candidate;
					candidate.Used = true;
					if (--count == 0) {
						break;
					}
				}
			}
		}
		private IEnumerable<SuspiciousNode> SelectSuspiciousElementsWithMaskForFastRejectionLearning(
				List<List<SuspiciousNode>> candidateNodesLists, BigInteger xor, BigInteger mask,
				Classifier classifier) {
			for (int i = 0; i < candidateNodesLists.Count; i++) {
				var vector = BigInteger.Zero;
				var classifierUnit = classifier.Units[i];
				var candidates = candidateNodesLists[i]
						.OrderBy(
								t => LearningExperimentUtil.CountBits(
										classifierUnit.Accepting
										& ((t.Vector & _rejectingFeatureBitMask) ^ _rejectingFeatureBitMask)))
						.ToList();
				foreach (var candidate in candidates) {
					var newVector = (vector | (candidate.Vector ^ xor)) & mask;
					if (newVector == vector) {
						continue;
					}
					vector = newVector;
					yield return candidate;
				}
			}
		}
		private static int DetermineStrongCount(int groupIndex, Classifier classifier) {
			return classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount
					? StronglyTargetCount : StronglyTargetCount / 2;
		}
		private static int DetermineCount(Classifier classifier, int groupIndex) {
			return classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount
					? TargetCount : TargetCount / 2;
		}
        public LearningResult Learn(
                ICollection<string> seedPaths, ICollection<string> codePaths, string searchPattern,
                StreamWriter writer = null) {
            var allCsts = GenerateValidCsts(codePaths);
            var seedCsts = GenerateValidCsts(seedPaths).ToList();
            var seedNodes = seedCsts
                    .SelectMany(
                            cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames))
                    .Where(ProtectedIsAcceptedUsingOracle)
                    .ToList();

            var seedCst = seedCsts.First();
            var seedCode = seedCst.Code;
            var structuredCode = new StructuredCode(seedCode);

            var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes);
            var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst);

            SeedNodeSet.Create(acceptingFragments, this);

            var preparingTime = Environment.TickCount;
            var extractor = CreateExtractor();
            var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this);
            Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count
                              + " (" + acceptingFragments.Count + ")");
            Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count
                              + " (" + rejectingFragments.Count + ")");

            var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments);
            var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">");
            var classifier = new Classifier(groupPaths, featureSet);
            Console.WriteLine(
                    "#Features: " + featureSet.AcceptingFeatureCount + ", "
                    + featureSet.RejectingFeatureCount);
            Console.WriteLine("Inner: " + extractor.IsInner);

            var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor,
                    featureSet);
            var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet);
            Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount);
            if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet()
                    .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) {
                var others = encodingResult.IdealRejectedVector2GroupPath;
                var vector =
                        encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey);
                foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) {
                    Console.WriteLine(Experiment.Beautify(featureString));
                }
                throw new Exception("Master predicates can't classify elements!");
            }

            var groupCache = new GroupCache(encodingResult, classifier);
            var trainingSet = encodingResult.CreateTrainingVectorSet();
            classifier.Create(trainingSet, groupCache);
            Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder);
            Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime));

            var count = 0;
            var sumTime = Environment.TickCount;
            ClassificationResult classificationResult;
            while (true) {
                var time = Environment.TickCount;
                classificationResult = Classify(count, classifier, groupCache, encodingResult,
                        trainingSet);
                if (classificationResult.SuspiciousNodes == null) {
                    break;
                }

                var additionalAcceptedSet = RevealSuspiciousElements(
                        encodingResult.IdealAcceptedVector2GroupPath.Keys,
                        classificationResult.SuspiciousNodes, encodingResult, trainingSet);
                if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) {
                    count++;
                } else {
                    count = 0;
                }

                Console.WriteLine("Time: " + (Environment.TickCount - time));
            }
            classifier.MakeImmutable();
            Console.WriteLine();
            Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime));
            var trainingVectorCount = trainingSet.Count;
            var idealVectorCount = encodingResult.IdealVectorSet.Count;
            Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount);

            if (writer != null) {
                encodingResult.WriteResult(writer, trainingSet);
            }

            foreach (var groupPath in classifier.GroupPaths) {
                Console.WriteLine(groupPath);
            }

            classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache);

            return new LearningResult {
                ClassificationResult = classificationResult,
                Classifier = classifier,
                EncodingResult = encodingResult,
                FeatureEncoder = featureEncoder,
            };
        }
		public IEnumerable<SuspiciousNode> SelectNodesForSlowRejectionLearningStrongly(
				IReadOnlyList<List<SuspiciousNode>> candidateNodesLists,
				Classifier classifier) {
			for (int i = 0; i < candidateNodesLists.Count; i++) {
				var candidates = candidateNodesLists[i];
				var rejectingUnit = classifier.Units[i].Rejecting;
				foreach (var target in candidates) {
					target.BitsCount = LearningExperimentUtil.CountBits(
							(target.Vector & _rejectingFeatureBitMask) | rejectingUnit);
				}
				candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
				var vector = _rejectingFeatureBitMask;
				var count = DetermineStrongCount(i, classifier);
				foreach (var candidate in candidates) {
					if (!candidate.Used) {
						continue;
					}
					var newVector = vector & candidate.Vector;
					if (newVector == vector) {
						continue;
					}
					vector = newVector;
					yield return candidate;
					candidate.Used = true;
					if (--count == 0) {
						break;
					}
				}
			}
		}
		public IEnumerable<SuspiciousNode> SelectNodesForSlowAcceptanceLearningStrongly(
				IReadOnlyList<List<SuspiciousNode>> candidateNodesLists,
				Classifier classifier) {
			for (int i = 0; i < candidateNodesLists.Count; i++) {
				var candidates = candidateNodesLists[i];
				var acceptingVector = classifier.Units[i].Accepting;
				foreach (var target in candidates) {
					target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector);
				}
				candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
				var vector = BigInteger.Zero;
				var count = DetermineStrongCount(i, classifier);
				for (int j = candidates.Count - 1; j >= 0; j--) {
					var candidate = candidates[j];
					if (candidate.Used) {
						continue;
					}
					var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask;
					if (newVector == vector) {
						continue;
					}
					vector = newVector;
					yield return candidate;
					candidate.Used = true;
					if (--count == 0) {
						break;
					}
				}
			}
		}
Exemple #16
0
        public LearningResult Learn(
            ICollection <string> seedPaths, ICollection <string> codePaths, string searchPattern,
            StreamWriter writer = null)
        {
            var allCsts   = GenerateValidCsts(codePaths);
            var seedCsts  = GenerateValidCsts(seedPaths).ToList();
            var seedNodes = seedCsts
                            .SelectMany(
                cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames))
                            .Where(ProtectedIsAcceptedUsingOracle)
                            .ToList();

            var seedCst        = seedCsts.First();
            var seedCode       = seedCst.Code;
            var structuredCode = new StructuredCode(seedCode);

            var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes);
            var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst);

            SeedNodeSet.Create(acceptingFragments, this);

            var preparingTime = Environment.TickCount;
            var extractor     = CreateExtractor();
            var seedNodeSet   = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this);

            Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count
                              + " (" + acceptingFragments.Count + ")");
            Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count
                              + " (" + rejectingFragments.Count + ")");

            var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments);
            var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">");
            var classifier = new Classifier(groupPaths, featureSet);

            Console.WriteLine(
                "#Features: " + featureSet.AcceptingFeatureCount + ", "
                + featureSet.RejectingFeatureCount);
            Console.WriteLine("Inner: " + extractor.IsInner);

            var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor,
                                                    featureSet);
            var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet);

            Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount);
            if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet()
                .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet()))
            {
                var others = encodingResult.IdealRejectedVector2GroupPath;
                var vector =
                    encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey);
                foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector))
                {
                    Console.WriteLine(Experiment.Beautify(featureString));
                }
                throw new Exception("Master predicates can't classify elements!");
            }

            var groupCache  = new GroupCache(encodingResult, classifier);
            var trainingSet = encodingResult.CreateTrainingVectorSet();

            classifier.Create(trainingSet, groupCache);
            Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder);
            Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime));

            var count   = 0;
            var sumTime = Environment.TickCount;
            ClassificationResult classificationResult;

            while (true)
            {
                var time = Environment.TickCount;
                classificationResult = Classify(count, classifier, groupCache, encodingResult,
                                                trainingSet);
                if (classificationResult.SuspiciousNodes == null)
                {
                    break;
                }

                var additionalAcceptedSet = RevealSuspiciousElements(
                    encodingResult.IdealAcceptedVector2GroupPath.Keys,
                    classificationResult.SuspiciousNodes, encodingResult, trainingSet);
                if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache))
                {
                    count++;
                }
                else
                {
                    count = 0;
                }

                Console.WriteLine("Time: " + (Environment.TickCount - time));
            }
            classifier.MakeImmutable();
            Console.WriteLine();
            Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime));
            var trainingVectorCount = trainingSet.Count;
            var idealVectorCount    = encodingResult.IdealVectorSet.Count;

            Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount);

            if (writer != null)
            {
                encodingResult.WriteResult(writer, trainingSet);
            }

            foreach (var groupPath in classifier.GroupPaths)
            {
                Console.WriteLine(groupPath);
            }

            classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache);

            return(new LearningResult {
                ClassificationResult = classificationResult,
                Classifier = classifier,
                EncodingResult = encodingResult,
                FeatureEncoder = featureEncoder,
            });
        }
Exemple #17
0
        public ClassificationResult Classify(
            int count, Classifier classifier, GroupCache groupCache,
            EncodingResult encodingResult,
            RevealedVectorSet trainingSet = null)
        {
            trainingSet = trainingSet ?? new RevealedVectorSet();
            var correctlyAccepted            = 0;
            var correctlyRejected            = 0;
            var wronglyAccepted              = 0;
            var wronglyRejected              = 0;
            var correctlyRejectedInRejecting = 0;
            var wronglyRejectedInRejecting   = 0;
            var rejectAccept = new List <List <SuspiciousNode> >();
            var rejectReject = new List <List <SuspiciousNode> >();
            var acceptAccept = new List <List <SuspiciousNode> >();
            var acceptReject = new List <List <SuspiciousNode> >();

            for (int i = 0; i < classifier.GroupCount; i++)
            {
                rejectAccept.Add(new List <SuspiciousNode>());
                rejectReject.Add(new List <SuspiciousNode>());
                acceptAccept.Add(new List <SuspiciousNode>());
                acceptReject.Add(new List <SuspiciousNode>());
            }

            var wrongNodeCount          = 0;
            var wronglyRejectedFeatures = new List <BigInteger>();
            var wronglyAcceptedFeatures = new List <BigInteger>();

            foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Accepted)
            {
                var vector     = vectorAndGroupPath.Key;
                var groupPath  = vectorAndGroupPath.Value;
                var groupIndex = groupCache.GetGroupIndex(vector);
                var rejected   = classifier.IsRejected(vector, groupIndex);
                var accepted   = classifier.IsAccepted(vector, groupIndex);
                if (!trainingSet.Accepted.ContainsKey(vector) &&
                    !trainingSet.Rejected.ContainsKey(vector))
                {
                    var target = new SuspiciousNode {
                        Vector   = vector,
                        GroupKey = groupPath,
                        Used     = false,
                    };
                    if (accepted)
                    {
                        if (!rejected)
                        {
                            acceptAccept[groupIndex].Add(target);
                        }
                        else
                        {
                            acceptReject[groupIndex].Add(target);
                        }
                    }
                    else
                    {
                        if (!rejected)
                        {
                            rejectAccept[groupIndex].Add(target);
                        }
                        else
                        {
                            rejectReject[groupIndex].Add(target);
                        }
                    }
                }
                if (!accepted)
                {
                    wronglyRejected++;
                    wrongNodeCount += encodingResult.Vector2Count[vector];
                    wronglyRejectedFeatures.Add(vector);
                }
                else if (!rejected)
                {
                    correctlyAccepted++;
                }
                else
                {
                    wronglyRejectedInRejecting++;
                    wrongNodeCount += encodingResult.Vector2Count[vector];
                    wronglyRejectedFeatures.Add(vector);
                }
            }

            foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Rejected)
            {
                var vector     = vectorAndGroupPath.Key;
                var groupPath  = vectorAndGroupPath.Value;
                var groupIndex = groupCache.GetGroupIndex(vector);
                var rejected   = classifier.IsRejected(vector, groupIndex);
                var accepted   = classifier.IsAccepted(vector, groupIndex);
                if (!trainingSet.Accepted.ContainsKey(vector) &&
                    !trainingSet.Rejected.ContainsKey(vector))
                {
                    var target = new SuspiciousNode {
                        Vector   = vector,
                        GroupKey = groupPath,
                        Used     = false,
                    };
                    if (accepted)
                    {
                        if (!rejected)
                        {
                            acceptAccept[groupIndex].Add(target);
                        }
                        else
                        {
                            acceptReject[groupIndex].Add(target);
                        }
                    }
                    else
                    {
                        if (!rejected)
                        {
                            rejectAccept[groupIndex].Add(target);
                        }
                        else
                        {
                            rejectReject[groupIndex].Add(target);
                        }
                    }
                }
                if (!accepted)
                {
                    correctlyRejected++;
                }
                else if (!rejected)
                {
                    wronglyAccepted++;
                    wrongNodeCount += encodingResult.Vector2Count[vector];
                    wronglyAcceptedFeatures.Add(vector);
                }
                else
                {
                    correctlyRejectedInRejecting++;
                }
            }
            Console.WriteLine("done");
            Console.WriteLine(
                "WA: " + wronglyAccepted + ", WR: " + wronglyRejected + "/"
                + wronglyRejectedInRejecting + ", CA: "
                + correctlyAccepted + ", CR: " + correctlyRejected + "/"
                + correctlyRejectedInRejecting);
            Console.WriteLine(
                "TR: "
                + trainingSet.Count + ", AF: "
                + String.Join(", ", classifier.CountAcceptingFeatures())
                + ", RF: "
                + String.Join(", ", classifier.CountRejectingFeatures()));
            var wrongFeatureCount = wronglyAccepted + wronglyRejected
                                    + wronglyRejectedInRejecting;

            var selector = new SuspiciousNodeSelector(classifier.AcceptingFeatureBitMask,
                                                      classifier.RejectingFeatureBitMask);
            var suspiciousNodes = selector.SelectSuspiciousNodes(count, classifier, acceptAccept,
                                                                 acceptReject, rejectAccept, rejectReject);

            return(new ClassificationResult(suspiciousNodes, wronglyAcceptedFeatures,
                                            wronglyRejectedFeatures,
                                            wrongFeatureCount, wrongNodeCount, encodingResult));
        }
 private static int DetermineStrongCount(int groupIndex, Classifier classifier)
 {
     return(classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount
                                 ? StronglyTargetCount : StronglyTargetCount / 2);
 }
 private static int DetermineCount(Classifier classifier, int groupIndex)
 {
     return(classifier.CountAcceptingFeatures().ElementAt(groupIndex) > ThresholdvectorCount
                                 ? TargetCount : TargetCount / 2);
 }
Exemple #20
0
		public GroupCache(EncodingResult encodingResult, Classifier classifier) {
			_encodingResult = encodingResult;
			UpdateGroupCache(classifier);
		}
		public IEnumerable<SuspiciousNode> SelectNodesForFastRejectionLearning(
				IReadOnlyList<List<SuspiciousNode>> candidateNodesLists,
				Classifier classifier) {
			for (int i = 0; i < candidateNodesLists.Count; i++) {
				var candidates = candidateNodesLists[i];
				var rejectingVector = classifier.Units[i].Rejecting;
				foreach (var cnadidate in candidates) {
					cnadidate.BitsCount = LearningExperimentUtil.CountBits(
							(cnadidate.Vector & _rejectingFeatureBitMask) | rejectingVector);
				}
				candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
				var count = DetermineCount(classifier, i);
				for (int j = candidates.Count - 1; j >= 0; j--) {
					var candidate = candidates[j];
					if (candidate.Used) {
						continue;
					}
					yield return candidate;
					candidate.Used = true;
					if (--count == 0) {
						break;
					}
				}
			}
		}
Exemple #22
0
 public GroupCache(EncodingResult encodingResult, Classifier classifier)
 {
     _encodingResult = encodingResult;
     UpdateGroupCache(classifier);
 }
        public ClassificationResult Classify(
                int count, Classifier classifier, GroupCache groupCache,
                EncodingResult encodingResult,
                RevealedVectorSet trainingSet = null) {
            trainingSet = trainingSet ?? new RevealedVectorSet();
            var correctlyAccepted = 0;
            var correctlyRejected = 0;
            var wronglyAccepted = 0;
            var wronglyRejected = 0;
            var correctlyRejectedInRejecting = 0;
            var wronglyRejectedInRejecting = 0;
            var rejectAccept = new List<List<SuspiciousNode>>();
            var rejectReject = new List<List<SuspiciousNode>>();
            var acceptAccept = new List<List<SuspiciousNode>>();
            var acceptReject = new List<List<SuspiciousNode>>();
            for (int i = 0; i < classifier.GroupCount; i++) {
                rejectAccept.Add(new List<SuspiciousNode>());
                rejectReject.Add(new List<SuspiciousNode>());
                acceptAccept.Add(new List<SuspiciousNode>());
                acceptReject.Add(new List<SuspiciousNode>());
            }

            var wrongNodeCount = 0;
            var wronglyRejectedFeatures = new List<BigInteger>();
            var wronglyAcceptedFeatures = new List<BigInteger>();
            foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Accepted) {
                var vector = vectorAndGroupPath.Key;
                var groupPath = vectorAndGroupPath.Value;
                var groupIndex = groupCache.GetGroupIndex(vector);
                var rejected = classifier.IsRejected(vector, groupIndex);
                var accepted = classifier.IsAccepted(vector, groupIndex);
                if (!trainingSet.Accepted.ContainsKey(vector)
                    && !trainingSet.Rejected.ContainsKey(vector)) {
                    var target = new SuspiciousNode {
                        Vector = vector,
                        GroupKey = groupPath,
                        Used = false,
                    };
                    if (accepted) {
                        if (!rejected) {
                            acceptAccept[groupIndex].Add(target);
                        } else {
                            acceptReject[groupIndex].Add(target);
                        }
                    } else {
                        if (!rejected) {
                            rejectAccept[groupIndex].Add(target);
                        } else {
                            rejectReject[groupIndex].Add(target);
                        }
                    }
                }
                if (!accepted) {
                    wronglyRejected++;
                    wrongNodeCount += encodingResult.Vector2Count[vector];
                    wronglyRejectedFeatures.Add(vector);
                } else if (!rejected) {
                    correctlyAccepted++;
                } else {
                    wronglyRejectedInRejecting++;
                    wrongNodeCount += encodingResult.Vector2Count[vector];
                    wronglyRejectedFeatures.Add(vector);
                }
            }

            foreach (var vectorAndGroupPath in encodingResult.IdealVectorSet.Rejected) {
                var vector = vectorAndGroupPath.Key;
                var groupPath = vectorAndGroupPath.Value;
                var groupIndex = groupCache.GetGroupIndex(vector);
                var rejected = classifier.IsRejected(vector, groupIndex);
                var accepted = classifier.IsAccepted(vector, groupIndex);
                if (!trainingSet.Accepted.ContainsKey(vector)
                    && !trainingSet.Rejected.ContainsKey(vector)) {
                    var target = new SuspiciousNode {
                        Vector = vector,
                        GroupKey = groupPath,
                        Used = false,
                    };
                    if (accepted) {
                        if (!rejected) {
                            acceptAccept[groupIndex].Add(target);
                        } else {
                            acceptReject[groupIndex].Add(target);
                        }
                    } else {
                        if (!rejected) {
                            rejectAccept[groupIndex].Add(target);
                        } else {
                            rejectReject[groupIndex].Add(target);
                        }
                    }
                }
                if (!accepted) {
                    correctlyRejected++;
                } else if (!rejected) {
                    wronglyAccepted++;
                    wrongNodeCount += encodingResult.Vector2Count[vector];
                    wronglyAcceptedFeatures.Add(vector);
                } else {
                    correctlyRejectedInRejecting++;
                }
            }
            Console.WriteLine("done");
            Console.WriteLine(
                    "WA: " + wronglyAccepted + ", WR: " + wronglyRejected + "/"
                    + wronglyRejectedInRejecting + ", CA: "
                    + correctlyAccepted + ", CR: " + correctlyRejected + "/"
                    + correctlyRejectedInRejecting);
            Console.WriteLine(
                    "TR: "
                    + trainingSet.Count + ", AF: "
                    + String.Join(", ", classifier.CountAcceptingFeatures())
                    + ", RF: "
                    + String.Join(", ", classifier.CountRejectingFeatures()));
            var wrongFeatureCount = wronglyAccepted + wronglyRejected
                                    + wronglyRejectedInRejecting;

            var selector = new SuspiciousNodeSelector(classifier.AcceptingFeatureBitMask,
                    classifier.RejectingFeatureBitMask);
            var suspiciousNodes = selector.SelectSuspiciousNodes(count, classifier, acceptAccept,
                    acceptReject, rejectAccept, rejectReject);

            return new ClassificationResult(suspiciousNodes, wronglyAcceptedFeatures,
                    wronglyRejectedFeatures,
                    wrongFeatureCount, wrongNodeCount, encodingResult);
        }