private IEnumerable <SuspiciousNode> SelectSuspiciousElementsWithMaskForFastRejectionLearning(
     List <List <SuspiciousNode> > candidateNodesLists, BigInteger xor, BigInteger mask,
     Classifier classifier)
 {
     for (int i = 0; i < candidateNodesLists.Count; i++)
     {
         var vector         = BigInteger.Zero;
         var classifierUnit = classifier.Units[i];
         var candidates     = candidateNodesLists[i]
                              .OrderBy(
             t => LearningExperimentUtil.CountBits(
                 classifierUnit.Accepting
                 & ((t.Vector & _rejectingFeatureBitMask) ^ _rejectingFeatureBitMask)))
                              .ToList();
         foreach (var candidate in candidates)
         {
             var newVector = (vector | (candidate.Vector ^ xor)) & mask;
             if (newVector == vector)
             {
                 continue;
             }
             vector = newVector;
             yield return(candidate);
         }
     }
 }
        public IEnumerable <SuspiciousNode> SelectSuspiciousAcceptedNodes(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates = candidateNodesLists[i];
                foreach (var candidate in candidates)
                {
                    candidate.BitsCount =
                        LearningExperimentUtil.CountBits(candidate.Vector & _acceptingFeatureBitMask);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var count = DetermineCount(classifier, i);
                foreach (var candidate in candidates)
                {
                    if (candidate.Used)
                    {
                        continue;
                    }
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
 private IEnumerable <SuspiciousNode> SelectSuspiciousElementsWithMaskWithSmallGrowing(
     IReadOnlyList <List <SuspiciousNode> > candidateNodesLists, BigInteger xor, BigInteger mask)
 {
     for (int i = 0; i < candidateNodesLists.Count; i++)
     {
         var vector     = BigInteger.Zero;
         var candidates = candidateNodesLists[i];
         while (true)
         {
             var            minDiffCount = int.MaxValue;
             SuspiciousNode newNode      = null;
             foreach (var candidate in candidates)
             {
                 var newVector = (vector | (candidate.Vector ^ xor)) & mask;
                 var diff      = newVector ^ vector;
                 var diffCount = LearningExperimentUtil.CountBits(diff);
                 if (diffCount > 0 && minDiffCount > diffCount)
                 {
                     minDiffCount = diffCount;
                     vector       = newVector;
                     newNode      = candidate;
                 }
             }
             if (newNode != null)
             {
                 yield return(newNode);
             }
             else
             {
                 break;
             }
         }
     }
 }
        public IEnumerable <SuspiciousNode> SelectNodesForFastRejectionLearning(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates      = candidateNodesLists[i];
                var rejectingVector = classifier.Units[i].Rejecting;
                foreach (var cnadidate in candidates)
                {
                    cnadidate.BitsCount = LearningExperimentUtil.CountBits(
                        (cnadidate.Vector & _rejectingFeatureBitMask) | rejectingVector);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var count = DetermineCount(classifier, i);
                for (int j = candidates.Count - 1; j >= 0; j--)
                {
                    var candidate = candidates[j];
                    if (candidate.Used)
                    {
                        continue;
                    }
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
        public IEnumerable <SuspiciousNode> SelectNodesForSlowAcceptanceLearning(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates      = candidateNodesLists[i];
                var acceptingVector = classifier.Units[i].Accepting;
                foreach (var target in candidates)
                {
                    target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var count = DetermineCount(classifier, i);
                for (int j = candidates.Count - 1; j >= 0; j--)
                {
                    var candidate = candidates[j];
                    if (candidate.Used)
                    {
                        continue;
                    }
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
Beispiel #6
0
        /// <summary>
        /// Update SurroundingRange, TargetRange, and Node properties then return the last index of the code processed.
        /// </summary>
        /// <param name="structuredCode">The structured code processed</param>
        /// <param name="cst">The concrete syntax tree</param>
        /// <param name="fragments"></param>
        /// <returns>The updated last index of the code processed</returns>
        public static List <SeedNode> ConstructAcceptingFragments(StructuredCode structuredCode, CstNode cst, IList <SelectedFragment> fragments)
        {
            var seedNodes = CreateSeedNodes(structuredCode, cst, fragments);

            var uppermostSeedAcceptedNodes = seedNodes
                                             .Select(node => node.Node.AncestorWithSingleChild())
                                             .ToImmutableHashSet();
            // We can select multiple nodes in corresponding to a fragment selected by a user
            // and it means that we have multiple choices for selecting node names to filter nodes
            // This code tries to select good node names to not filter nodes wanted by a user
            var selectedNodeNames = LearningExperimentUtil.FindGoodNodeNames(uppermostSeedAcceptedNodes)
                                    .ToImmutableHashSet();

            foreach (var seedNode in seedNodes)
            {
                // Update the node in corresponding to the selected node names keeping the code range of the node
                seedNode.Node = seedNode.Node.DescendantsOfSingleAndSelf()
                                .First(e => selectedNodeNames.Contains(e.Name));
                var rootNode = seedNode.SurroundingRange.FindInnermostNode(cst);
                var node     = seedNode.Node;
                var path     = node.Name;
                while ((node = node.Parent) != rootNode)
                {
                    path = path + "<" + node.Name + node.RuleId;
                }
                seedNode.Path = path;
            }
            return(seedNodes);
        }
Beispiel #7
0
        public EncodingResult Encode(
            ICollection <string> codePaths, IEnumerable <CstNode> allCsts,
            LearningExperiment oracle, SeedNodeSet seedNodeSet = null)
        {
            var fileName = codePaths.Count > 0
                    ? string.Join(",", codePaths).GetHashCode() + "_" +
                           (codePaths.First() + "," + codePaths.Last() + ",").GetHashCode() + "_"
                           + codePaths.Count + ".encoded"
                    : null;
            var formatter = new BinaryFormatter();

            if (fileName != null && File.Exists(fileName))
            {
                using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read)) {
                    try {
                        var ret = ((EncodingResult)formatter.Deserialize(fs)).MakeImmutable();
                        Console.WriteLine("############### Warning ###############");
                        Console.WriteLine("Cache file of encoded result is used.");
                        Console.WriteLine("#######################################");
                        return(ret);
                    } catch (Exception e) {
                        Console.Error.WriteLine(e);
                    }
                }
            }

            var allUppermostNodes = allCsts.SelectMany(
                cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, _selectedNodeNames));

            var result = new EncodingResult();

            if (seedNodeSet != null)
            {
                result.SeedAcceptedNodeCount = seedNodeSet.AcceptedNodes.Count;
                result.SeedNodeCount         = result.SeedAcceptedNodeCount
                                               + seedNodeSet.RejectedNodes.Count;
                EncodeSeedNodes(
                    seedNodeSet.AcceptedNodes, result, result.IdealAcceptedVector2GroupPath,
                    result.SeedAcceptedVector2GroupPath, oracle);
                EncodeSeedNodes(
                    seedNodeSet.RejectedNodes, result, result.IdealRejectedVector2GroupPath,
                    result.SeedRejectedVector2GroupPath, oracle);
            }
            EncodeTargetNodes(allUppermostNodes, result, oracle);

            if (fileName != null)
            {
                using (var fs = new FileStream(fileName, FileMode.Create, FileAccess.Write)) {
                    formatter.Serialize(fs, result);
                }
            }
            return(result.MakeImmutable());
        }
Beispiel #8
0
        private void UpdateVector2GroupPath(
            IDictionary <BigInteger, string> vector2GroupPath, BigInteger vector, CstNode node)
        {
            var groupPath         = GetGroupPathFromNode(node);
            var existingGroupPath = vector2GroupPath.GetValueOrDefault(vector);

            if (existingGroupPath == null)
            {
                vector2GroupPath.Add(vector, groupPath);
            }
            else
            {
                vector2GroupPath[vector] = LearningExperimentUtil.GetCommonSuffix(
                    existingGroupPath, groupPath);
            }
        }
        public IEnumerable <SuspiciousNode> SelectNodesForSlowRejectionLearningStrongly(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates    = candidateNodesLists[i];
                var rejectingUnit = classifier.Units[i].Rejecting;
                foreach (var target in candidates)
                {
                    target.BitsCount = LearningExperimentUtil.CountBits(
                        (target.Vector & _rejectingFeatureBitMask) | rejectingUnit);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var vector = _rejectingFeatureBitMask;
                var count  = DetermineStrongCount(i, classifier);
                foreach (var candidate in candidates)
                {
                    if (!candidate.Used)
                    {
                        continue;
                    }
                    var newVector = vector & candidate.Vector;
                    if (newVector == vector)
                    {
                        continue;
                    }
                    vector = newVector;
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
        private SuspiciousNode SelectMostDifferentElement(
            IEnumerable <BigInteger> existings, IEnumerable <SuspiciousNode> candidates,
            BigInteger mask)
        {
            if (!existings.Any())
            {
                return(candidates.FirstOrDefault());
            }
            var            maxDiff = 0;
            SuspiciousNode ret     = null;

            foreach (var candidate in candidates)
            {
                var vector = candidate.Vector & mask;
                var diff   = existings.Min(f => LearningExperimentUtil.CountBits((f & mask) ^ vector));
                if (maxDiff < diff)
                {
                    maxDiff = diff;
                    ret     = candidate;
                }
            }
            return(ret);
        }
        public IEnumerable <SuspiciousNode> SelectNodesForFastAcceptanceLearningStrongly(
            IReadOnlyList <List <SuspiciousNode> > candidateNodesLists,
            Classifier classifier)
        {
            for (int i = 0; i < candidateNodesLists.Count; i++)
            {
                var candidates      = candidateNodesLists[i];
                var acceptingVector = classifier.Units[i].Accepting;
                foreach (var target in candidates)
                {
                    target.BitsCount = LearningExperimentUtil.CountBits(target.Vector & acceptingVector);
                }
                candidates.Sort((t1, t2) => t1.BitsCount.CompareTo(t2.BitsCount));
                var vector = BigInteger.Zero;
                var count  = DetermineStrongCount(i, classifier);
                foreach (var candidate in candidates)
                {
                    if (candidate.Used)
                    {
                        continue;
                    }
                    var newVector = (vector | candidate.Vector) & _acceptingFeatureBitMask;
                    if (newVector == vector)
                    {
                        continue;
                    }
                    vector = newVector;
                    yield return(candidate);

                    candidate.Used = true;
                    if (--count == 0)
                    {
                        break;
                    }
                }
            }
        }
Beispiel #12
0
        public LearningResult Learn(
            ICollection <string> seedPaths, ICollection <string> codePaths, string searchPattern,
            StreamWriter writer = null)
        {
            var allCsts   = GenerateValidCsts(codePaths);
            var seedCsts  = GenerateValidCsts(seedPaths).ToList();
            var seedNodes = seedCsts
                            .SelectMany(
                cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames))
                            .Where(ProtectedIsAcceptedUsingOracle)
                            .ToList();

            var seedCst        = seedCsts.First();
            var seedCode       = seedCst.Code;
            var structuredCode = new StructuredCode(seedCode);

            var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes);
            var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst);

            SeedNodeSet.Create(acceptingFragments, this);

            var preparingTime = Environment.TickCount;
            var extractor     = CreateExtractor();
            var seedNodeSet   = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this);

            Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count
                              + " (" + acceptingFragments.Count + ")");
            Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count
                              + " (" + rejectingFragments.Count + ")");

            var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments);
            var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">");
            var classifier = new Classifier(groupPaths, featureSet);

            Console.WriteLine(
                "#Features: " + featureSet.AcceptingFeatureCount + ", "
                + featureSet.RejectingFeatureCount);
            Console.WriteLine("Inner: " + extractor.IsInner);

            var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor,
                                                    featureSet);
            var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet);

            Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount);
            if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet()
                .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet()))
            {
                var others = encodingResult.IdealRejectedVector2GroupPath;
                var vector =
                    encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey);
                foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector))
                {
                    Console.WriteLine(Experiment.Beautify(featureString));
                }
                throw new Exception("Master predicates can't classify elements!");
            }

            var groupCache  = new GroupCache(encodingResult, classifier);
            var trainingSet = encodingResult.CreateTrainingVectorSet();

            classifier.Create(trainingSet, groupCache);
            Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder);
            Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime));

            var count   = 0;
            var sumTime = Environment.TickCount;
            ClassificationResult classificationResult;

            while (true)
            {
                var time = Environment.TickCount;
                classificationResult = Classify(count, classifier, groupCache, encodingResult,
                                                trainingSet);
                if (classificationResult.SuspiciousNodes == null)
                {
                    break;
                }

                var additionalAcceptedSet = RevealSuspiciousElements(
                    encodingResult.IdealAcceptedVector2GroupPath.Keys,
                    classificationResult.SuspiciousNodes, encodingResult, trainingSet);
                if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache))
                {
                    count++;
                }
                else
                {
                    count = 0;
                }

                Console.WriteLine("Time: " + (Environment.TickCount - time));
            }
            classifier.MakeImmutable();
            Console.WriteLine();
            Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime));
            var trainingVectorCount = trainingSet.Count;
            var idealVectorCount    = encodingResult.IdealVectorSet.Count;

            Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount);

            if (writer != null)
            {
                encodingResult.WriteResult(writer, trainingSet);
            }

            foreach (var groupPath in classifier.GroupPaths)
            {
                Console.WriteLine(groupPath);
            }

            classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache);

            return(new LearningResult {
                ClassificationResult = classificationResult,
                Classifier = classifier,
                EncodingResult = encodingResult,
                FeatureEncoder = featureEncoder,
            });
        }
Beispiel #13
0
 public int CountUsingOracle(CstNode cst)
 {
     return(LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)
            .Count(IsAcceptedUsingOracle));
 }
Beispiel #14
0
 public IEnumerable <int> CountRejectingFeatures()
 {
     return(Units.Select(c =>
                         LearningExperimentUtil.CountBits(c.Rejecting >> AcceptingFeatureCount)));
 }
Beispiel #15
0
 public IEnumerable <int> CountAcceptingFeatures()
 {
     return(Units.Select(c =>
                         LearningExperimentUtil.CountBits(c.Accepting & AcceptingFeatureBitMask)));
 }