Ejemplo n.º 1
0
 public FeatureEncoder(
         ISet<string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet) {
     _selectedNodeNames = selectedNodeNames;
     _extractor = extractor;
     _featureString2Bit = CreateFeatureString2Bit(featureSet);
     _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit);
 }
Ejemplo n.º 2
0
 public FeatureEncoder(
     ISet <string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet)
 {
     _selectedNodeNames = selectedNodeNames;
     _extractor         = extractor;
     _featureString2Bit = CreateFeatureString2Bit(featureSet);
     _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit);
 }
Ejemplo n.º 3
0
 public Classifier(IEnumerable<string> groupPaths, FeatuerSet featureSet) {
     AcceptingFeatureCount = featureSet.AcceptingFeatureCount;
     RejectingFeatureCount = featureSet.RejectingFeatureCount;
     AllFeatureCount = AcceptingFeatureCount + RejectingFeatureCount;
     AllFeatureBitMask = (BigInteger.One << featureSet.FeatureCount) - BigInteger.One;
     AcceptingFeatureBitMask = (BigInteger.One << featureSet.AcceptingFeatureCount)
                               - BigInteger.One;
     RejectingFeatureBitMask = AllFeatureBitMask ^ AcceptingFeatureBitMask;
     GroupPaths = groupPaths.ToList();
     Initialize();
 }
Ejemplo n.º 4
0
 public Classifier(IEnumerable <string> groupPaths, FeatuerSet featureSet)
 {
     AcceptingFeatureCount   = featureSet.AcceptingFeatureCount;
     RejectingFeatureCount   = featureSet.RejectingFeatureCount;
     AllFeatureCount         = AcceptingFeatureCount + RejectingFeatureCount;
     AllFeatureBitMask       = (BigInteger.One << featureSet.FeatureCount) - BigInteger.One;
     AcceptingFeatureBitMask = (BigInteger.One << featureSet.AcceptingFeatureCount)
                               - BigInteger.One;
     RejectingFeatureBitMask = AllFeatureBitMask ^ AcceptingFeatureBitMask;
     GroupPaths = groupPaths.ToList();
     Initialize();
 }
Ejemplo n.º 5
0
        private static IDictionary <string, BigInteger> CreateFeatureString2Bit(
            FeatuerSet featuerSet)
        {
            var featureString2Bit   = new Dictionary <string, BigInteger>();
            var masterFeatureVector = BigInteger.One;

            foreach (var featureStr in featuerSet.AcceptingFeatures)
            {
                featureString2Bit.Add(featureStr, masterFeatureVector);
                masterFeatureVector <<= 1;
            }
            foreach (var featureStr in featuerSet.RejectingFeatures)
            {
                featureString2Bit.Add(featureStr, masterFeatureVector);
                masterFeatureVector <<= 1;
            }
            return(featureString2Bit.ToImmutableDictionary());
        }
Ejemplo n.º 6
0
        public LearningResult Learn(
            ICollection <string> seedPaths, ICollection <string> codePaths, string searchPattern,
            StreamWriter writer = null)
        {
            var allCsts   = GenerateValidCsts(codePaths);
            var seedCsts  = GenerateValidCsts(seedPaths).ToList();
            var seedNodes = seedCsts
                            .SelectMany(
                cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames))
                            .Where(ProtectedIsAcceptedUsingOracle)
                            .ToList();

            var seedCst        = seedCsts.First();
            var seedCode       = seedCst.Code;
            var structuredCode = new StructuredCode(seedCode);

            var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes);
            var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst);

            SeedNodeSet.Create(acceptingFragments, this);

            var preparingTime = Environment.TickCount;
            var extractor     = CreateExtractor();
            var seedNodeSet   = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this);

            Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count
                              + " (" + acceptingFragments.Count + ")");
            Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count
                              + " (" + rejectingFragments.Count + ")");

            var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments);
            var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">");
            var classifier = new Classifier(groupPaths, featureSet);

            Console.WriteLine(
                "#Features: " + featureSet.AcceptingFeatureCount + ", "
                + featureSet.RejectingFeatureCount);
            Console.WriteLine("Inner: " + extractor.IsInner);

            var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor,
                                                    featureSet);
            var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet);

            Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount);
            if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet()
                .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet()))
            {
                var others = encodingResult.IdealRejectedVector2GroupPath;
                var vector =
                    encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey);
                foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector))
                {
                    Console.WriteLine(Experiment.Beautify(featureString));
                }
                throw new Exception("Master predicates can't classify elements!");
            }

            var groupCache  = new GroupCache(encodingResult, classifier);
            var trainingSet = encodingResult.CreateTrainingVectorSet();

            classifier.Create(trainingSet, groupCache);
            Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder);
            Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime));

            var count   = 0;
            var sumTime = Environment.TickCount;
            ClassificationResult classificationResult;

            while (true)
            {
                var time = Environment.TickCount;
                classificationResult = Classify(count, classifier, groupCache, encodingResult,
                                                trainingSet);
                if (classificationResult.SuspiciousNodes == null)
                {
                    break;
                }

                var additionalAcceptedSet = RevealSuspiciousElements(
                    encodingResult.IdealAcceptedVector2GroupPath.Keys,
                    classificationResult.SuspiciousNodes, encodingResult, trainingSet);
                if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache))
                {
                    count++;
                }
                else
                {
                    count = 0;
                }

                Console.WriteLine("Time: " + (Environment.TickCount - time));
            }
            classifier.MakeImmutable();
            Console.WriteLine();
            Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime));
            var trainingVectorCount = trainingSet.Count;
            var idealVectorCount    = encodingResult.IdealVectorSet.Count;

            Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount);

            if (writer != null)
            {
                encodingResult.WriteResult(writer, trainingSet);
            }

            foreach (var groupPath in classifier.GroupPaths)
            {
                Console.WriteLine(groupPath);
            }

            classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache);

            return(new LearningResult {
                ClassificationResult = classificationResult,
                Classifier = classifier,
                EncodingResult = encodingResult,
                FeatureEncoder = featureEncoder,
            });
        }
Ejemplo n.º 7
0
 private static IDictionary<string, BigInteger> CreateFeatureString2Bit(
         FeatuerSet featuerSet) {
     var featureString2Bit = new Dictionary<string, BigInteger>();
     var masterFeatureVector = BigInteger.One;
     foreach (var featureStr in featuerSet.AcceptingFeatures) {
         featureString2Bit.Add(featureStr, masterFeatureVector);
         masterFeatureVector <<= 1;
     }
     foreach (var featureStr in featuerSet.RejectingFeatures) {
         featureString2Bit.Add(featureStr, masterFeatureVector);
         masterFeatureVector <<= 1;
     }
     return featureString2Bit.ToImmutableDictionary();
 }
Ejemplo n.º 8
0
        public LearningResult Learn(
                ICollection<string> seedPaths, ICollection<string> codePaths, string searchPattern,
                StreamWriter writer = null) {
            var allCsts = GenerateValidCsts(codePaths);
            var seedCsts = GenerateValidCsts(seedPaths).ToList();
            var seedNodes = seedCsts
                    .SelectMany(
                            cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames))
                    .Where(ProtectedIsAcceptedUsingOracle)
                    .ToList();

            var seedCst = seedCsts.First();
            var seedCode = seedCst.Code;
            var structuredCode = new StructuredCode(seedCode);

            var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes);
            var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst);

            SeedNodeSet.Create(acceptingFragments, this);

            var preparingTime = Environment.TickCount;
            var extractor = CreateExtractor();
            var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this);
            Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count
                              + " (" + acceptingFragments.Count + ")");
            Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count
                              + " (" + rejectingFragments.Count + ")");

            var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments);
            var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">");
            var classifier = new Classifier(groupPaths, featureSet);
            Console.WriteLine(
                    "#Features: " + featureSet.AcceptingFeatureCount + ", "
                    + featureSet.RejectingFeatureCount);
            Console.WriteLine("Inner: " + extractor.IsInner);

            var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor,
                    featureSet);
            var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet);
            Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount);
            if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet()
                    .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) {
                var others = encodingResult.IdealRejectedVector2GroupPath;
                var vector =
                        encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey);
                foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) {
                    Console.WriteLine(Experiment.Beautify(featureString));
                }
                throw new Exception("Master predicates can't classify elements!");
            }

            var groupCache = new GroupCache(encodingResult, classifier);
            var trainingSet = encodingResult.CreateTrainingVectorSet();
            classifier.Create(trainingSet, groupCache);
            Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder);
            Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime));

            var count = 0;
            var sumTime = Environment.TickCount;
            ClassificationResult classificationResult;
            while (true) {
                var time = Environment.TickCount;
                classificationResult = Classify(count, classifier, groupCache, encodingResult,
                        trainingSet);
                if (classificationResult.SuspiciousNodes == null) {
                    break;
                }

                var additionalAcceptedSet = RevealSuspiciousElements(
                        encodingResult.IdealAcceptedVector2GroupPath.Keys,
                        classificationResult.SuspiciousNodes, encodingResult, trainingSet);
                if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) {
                    count++;
                } else {
                    count = 0;
                }

                Console.WriteLine("Time: " + (Environment.TickCount - time));
            }
            classifier.MakeImmutable();
            Console.WriteLine();
            Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime));
            var trainingVectorCount = trainingSet.Count;
            var idealVectorCount = encodingResult.IdealVectorSet.Count;
            Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount);

            if (writer != null) {
                encodingResult.WriteResult(writer, trainingSet);
            }

            foreach (var groupPath in classifier.GroupPaths) {
                Console.WriteLine(groupPath);
            }

            classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache);

            return new LearningResult {
                ClassificationResult = classificationResult,
                Classifier = classifier,
                EncodingResult = encodingResult,
                FeatureEncoder = featureEncoder,
            };
        }