Пример #1
0
        /// <summary>
        /// takes the two sources and performs the GST 
        /// </summary>
        /// <param name="sourcePath1"></param>
        /// <param name="sourcePath2"></param>
        public ComparisonModel(string name, Int64 evalRunID, string sourcePath1, string sourcePath2)
        {
            Name = name;
            EvaluationRunID = evalRunID;

            var directory = Path.GetDirectoryName(Path.GetFullPath(sourcePath1));
            bool tmplFileExists = File.Exists(Path.Combine(directory, "template.c"));

            var watch = Stopwatch.StartNew();

            var tmplFile = Directory.GetFiles(directory, "template.c").FirstOrDefault();
            var path1 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath1, tmplFile) : sourcePath1;
            var path2 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath2, tmplFile) : sourcePath2;
            var factory = new MutexTokenFactory();
            var tokens1 = factory.GetTokenWrapperListFromFile(path1);
            var tokens2 = factory.GetTokenWrapperListFromFile(path2);

            cLogger.DebugFormat("TokenStream Length: {0} -- {1}", tokens1.Count(), tokens2.Count());
            var algo = new GSTAlgorithm<GSTToken<TokenWrapper>>(
                tokens1.ToGSTTokenList<TokenWrapper>(),
                tokens2.ToGSTTokenList<TokenWrapper>());

            algo.RunToCompletion();
            Result = algo.Similarity;

            Source1 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath1), factory.GetJoinedTokenString(tokens1));
            Source2 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath2), factory.GetJoinedTokenString(tokens2));
            SQLFacade.Instance.CreateComparison(name, Result, watch.ElapsedMilliseconds, evalRunID, Source1.ID, Source2.ID);
        }
Пример #2
0
        /// <summary>
        /// takes the two sources and performs the GST
        /// </summary>
        /// <param name="sourcePath1"></param>
        /// <param name="sourcePath2"></param>
        public ComparisonModel(string name, Int64 evalRunID, string sourcePath1, string sourcePath2)
        {
            Name            = name;
            EvaluationRunID = evalRunID;

            var  directory      = Path.GetDirectoryName(Path.GetFullPath(sourcePath1));
            bool tmplFileExists = File.Exists(Path.Combine(directory, "template.c"));

            var watch = Stopwatch.StartNew();

            var tmplFile = Directory.GetFiles(directory, "template.c").FirstOrDefault();
            var path1    = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath1, tmplFile) : sourcePath1;
            var path2    = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath2, tmplFile) : sourcePath2;
            var factory  = new MutexTokenFactory();
            var tokens1  = factory.GetTokenWrapperListFromFile(path1);
            var tokens2  = factory.GetTokenWrapperListFromFile(path2);


            cLogger.DebugFormat("TokenStream Length: {0} -- {1}", tokens1.Count(), tokens2.Count());
            var algo = new GSTAlgorithm <GSTToken <TokenWrapper> >(
                tokens1.ToGSTTokenList <TokenWrapper>(),
                tokens2.ToGSTTokenList <TokenWrapper>());

            algo.RunToCompletion();
            Result = algo.Similarity;

            Source1 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath1), factory.GetJoinedTokenString(tokens1));
            Source2 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath2), factory.GetJoinedTokenString(tokens2));
            SQLFacade.Instance.CreateComparison(name, Result, watch.ElapsedMilliseconds, evalRunID, Source1.ID, Source2.ID);
        }
Пример #3
0
        public void RunToCompletion()
        {
            Assert.False(Algorithm.Finished);
            Algorithm.RunToCompletion();

            Assert.True(Algorithm.Finished);
        }
Пример #4
0
        /// <summary>
        /// evaluation of accuracy only.
        /// DO NOT TRY THIS AT HOME!
        /// </summary>
        private static void doEvaluate()
        {
            var doneList = new List <string>();

            var args = Environment.GetCommandLineArgs();

            var dir = new DirectoryInfo(@"myDir");

            if (args.Length > 1)
            {
                dir = new DirectoryInfo(args[1]);
            }

            Console.WriteLine("directory: {0}", dir.FullName);
            foreach (var subdir in dir.GetDirectories().OrderBy(sd => sd.Name))
            {
                DirectoryInfo subdir1    = subdir;
                var           otherFiles = dir.GetDirectories().Where(od => subdir1.Name != od.Name).OrderBy(od => od.Name).Select(od => new FileInfo(Path.Combine(od.FullName, "main.c")));


                var fileA = new FileInfo(Path.Combine(subdir.FullName, "main.c"));

                var tokensA = GetTokens(fileA);


                foreach (var fileB in otherFiles)
                {
                    var format1 = string.Format("{0}-{1}", fileA.Directory.Name, fileB.Directory.Name);
                    var format2 = string.Format("{1}-{0}", fileA.Directory.Name, fileB.Directory.Name);

                    if (doneList.Any(str => str.Equals(format1) || str.Equals(format2)))
                    {
                        continue;
                    }

                    doneList.Add(format1);
                    var tokensB = GetTokens(fileB);

                    var algo = new GSTAlgorithm <GSTToken <TokenWrapper> >(tokensA, tokensB)
                    {
                        MinimumMatchLength = 5
                    };
                    algo.RunToCompletion();
                    Console.WriteLine("{0}-{1}:{2}", fileA.Directory.Name, fileB.Directory.Name, algo.Similarity);
                }
            }


            Console.WriteLine("FINISHED EVAL");
            Environment.Exit(0);
        }
Пример #5
0
        private static void Calculate(int len)
        {
            var string1 = src1.Substring(0, len);
            var string2 = src2.Substring(0, len);
            //len += 200; // double length in size
            var watch = Stopwatch.StartNew();

            //Console.WriteLine("String length: A = {0}, B = {1}", string1.Length, string2.Length);
            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new HashingGSTAlgorithm <GSTToken <char> >(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();
                //Console.WriteLine("finished hashing run {0} in {1}", i, watch.Elapsed);
            }
            var runtimeHashing = watch.Elapsed;

            //Console.WriteLine("runtime hashing: {0}", ((int)runtimeHashing.TotalMilliseconds / testRuns));

            watch = Stopwatch.StartNew();

            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new GSTAlgorithm <GSTToken <char> >(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();
            }
            var ratio = (double)watch.ElapsedTicks / runtimeHashing.Ticks;

            Console.WriteLine("runtimes: {0}, {1}, ratio: {2} for {3}",
                              watch.ElapsedMilliseconds / testRuns,
                              runtimeHashing.TotalMilliseconds / testRuns,
                              ratio,
                              len);

            ResultList.Sort(String.CompareOrdinal);
            ResultList.Add(string.Format("{3:000};{0};{1};{2}",
                                         (watch.Elapsed.TotalMilliseconds / testRuns),
                                         (runtimeHashing.TotalMilliseconds / testRuns),
                                         ratio,
                                         len));
        }
Пример #6
0
        /// <summary>
        /// evaluation of GST speed only.
        /// DO NOT TRY THIS AT HOME!
        /// </summary>
        private static void EvaluateGSTSpeed()
        {
            var files = Directory.GetFiles(@"C:\Quelltexte", @"main.c", SearchOption.AllDirectories).Select(GetTokens).ToList();



            //Console.WriteLine(string.Format("total product count: {0}", cartesianProduct.Count()));

            long cRuntime   = 0;
            int  similarity = 0;
            IEnumerable <GSTTokenList <GSTToken <TokenWrapper> >[]> product = null;

            for (int i = 100; i >= 0; i--)
            {
                var cartesianProduct = from first in files
                                       from second in files
                                       select new[] { first, second };

                GSTAlgorithm <GSTToken <TokenWrapper> > algorithm = null;
                foreach (var set in cartesianProduct)
                {
                    var alg = new GSTAlgorithm <GSTToken <TokenWrapper> >(set[0], set[1])
                    {
                        MinimumMatchLength = 5
                    };
                    algorithm = alg;
                    var watch = Stopwatch.StartNew();
                    alg.RunToCompletion();

                    cRuntime += watch.ElapsedTicks;
                    watch.Stop();
                }
                similarity += algorithm.Similarity;
                product     = cartesianProduct;
            }

            Console.WriteLine("Number: {0}, Similarity: {1}", product.Count(), similarity);



            Console.WriteLine("finished in {0} seconds", TimeSpan.FromTicks(cRuntime).TotalSeconds / 100);
            Console.ReadLine();
            Environment.Exit(0);
        }
Пример #7
0
        /// <summary>
        /// evaluation of accuracy only. 
        /// DO NOT TRY THIS AT HOME!
        /// </summary>
        private static void doEvaluate()
        {
            var doneList = new List<string>();

            var args = Environment.GetCommandLineArgs();

            var dir = new DirectoryInfo(@"myDir");

            if(args.Length > 1)
                dir = new DirectoryInfo(args[1]);

            Console.WriteLine("directory: {0}", dir.FullName);
            foreach (var subdir in dir.GetDirectories().OrderBy(sd => sd.Name))
            {
                DirectoryInfo subdir1 = subdir;
                var otherFiles = dir.GetDirectories().Where(od => subdir1.Name != od.Name).OrderBy(od => od.Name).Select(od => new FileInfo(Path.Combine(od.FullName, "main.c")));

                var fileA = new FileInfo(Path.Combine(subdir.FullName, "main.c"));

                var tokensA = GetTokens(fileA);

                foreach(var fileB in otherFiles)
                {
                    var format1 = string.Format("{0}-{1}", fileA.Directory.Name, fileB.Directory.Name);
                    var format2 = string.Format("{1}-{0}", fileA.Directory.Name, fileB.Directory.Name);

                    if (doneList.Any(str => str.Equals(format1) || str.Equals(format2)))
                        continue;

                    doneList.Add(format1);
                    var tokensB = GetTokens(fileB);

                    var algo = new GSTAlgorithm<GSTToken<TokenWrapper>>(tokensA, tokensB) {MinimumMatchLength = 5};
                    algo.RunToCompletion();
                    Console.WriteLine("{0}-{1}:{2}", fileA.Directory.Name, fileB.Directory.Name, algo.Similarity);
                }
            }

            Console.WriteLine("FINISHED EVAL");
            Environment.Exit(0);
        }
Пример #8
0
        /// <summary>
        /// compares the two algorithms against each other over a rather large file (~8000 chars)
        /// </summary>
        private static void EvaluateSpeed()
        {
            XmlConfigurator.Configure(new FileInfo("log4net.xml"));
            src1 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-01.c");
            src2 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-02.c");
            var testRuns = 2;

            int len = 100;

            // JIT compile both algorithms before we start
            AbstractGSTAlgorithm <GSTToken <char> > initalgo = new HashingGSTAlgorithm <GSTToken <char> >(
                GSTHelper.FromString(src1.Substring(0, 100)),
                GSTHelper.FromString(src2.Substring(0, 100)));

            initalgo.RunToCompletion();

            initalgo = new GSTAlgorithm <GSTToken <char> >(GSTHelper.FromString(src1.Substring(0, 100)),
                                                           GSTHelper.FromString(src2.Substring(0, 100)));
            initalgo.RunToCompletion();

            var liLen1 = new List <int>();
            var liLen2 = new List <int>();
            var liLen3 = new List <int>();
            int c      = 0;

            while (len < src1.Length && len < src2.Length)
            {
                switch (c++ % 3)
                {
                case 0:
                    liLen1.Add(len);
                    break;

                case 1:
                    liLen2.Add(len);
                    break;

                default:
                    liLen3.Add(len);
                    break;
                }

                len += 100;
            }

            Length1 = liLen1.ToArray();
            Length2 = liLen2.ToArray();
            Length3 = liLen3.ToArray();

            var t1 = new Thread(Start1);

            t1.Start();

            var t2 = new Thread(Start2);

            t2.Start();

            var t3 = new Thread(Start3);

            t3.Start();


            Console.WriteLine("Main thread joining");
            t1.Join();
            t2.Join();
            t3.Join();

            File.WriteAllLines(@"test\default_set\performance.txt", ResultList);

            Console.WriteLine("finished all runs");
            Console.ReadLine();
        }
Пример #9
0
        /// <summary>
        /// evaluation of GST speed only. 
        /// DO NOT TRY THIS AT HOME!
        /// </summary>
        private static void EvaluateGSTSpeed()
        {
            var files = Directory.GetFiles(@"C:\Quelltexte", @"main.c", SearchOption.AllDirectories).Select(GetTokens).ToList();

            //Console.WriteLine(string.Format("total product count: {0}", cartesianProduct.Count()));

            long cRuntime = 0;
            int similarity = 0;
            IEnumerable<GSTTokenList<GSTToken<TokenWrapper>>[]> product = null;

            for (int i = 100; i >= 0; i--)
            {
                var cartesianProduct = from first in files
                                       from second in files
                                       select new[] { first, second };

                GSTAlgorithm<GSTToken<TokenWrapper>> algorithm = null;
                foreach (var set in cartesianProduct)
                {
                    var alg = new GSTAlgorithm<GSTToken<TokenWrapper>>(set[0], set[1]) {MinimumMatchLength = 5};
                    algorithm = alg;
                    var watch = Stopwatch.StartNew();
                    alg.RunToCompletion();

                    cRuntime += watch.ElapsedTicks;
                    watch.Stop();
                }
                similarity += algorithm.Similarity;
                product = cartesianProduct;
            }

            Console.WriteLine("Number: {0}, Similarity: {1}", product.Count(), similarity);

            Console.WriteLine("finished in {0} seconds", TimeSpan.FromTicks(cRuntime).TotalSeconds / 100);
            Console.ReadLine();
            Environment.Exit(0);
        }
Пример #10
0
        private static void Calculate(int len)
        {
            var string1 = src1.Substring(0, len);
            var string2 = src2.Substring(0, len);
            //len += 200; // double length in size
            var watch = Stopwatch.StartNew();

            //Console.WriteLine("String length: A = {0}, B = {1}", string1.Length, string2.Length);
            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new HashingGSTAlgorithm<GSTToken<char>>(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();
                //Console.WriteLine("finished hashing run {0} in {1}", i, watch.Elapsed);
            }
            var runtimeHashing = watch.Elapsed;
            //Console.WriteLine("runtime hashing: {0}", ((int)runtimeHashing.TotalMilliseconds / testRuns));

            watch = Stopwatch.StartNew();

            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new GSTAlgorithm<GSTToken<char>>(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();

            }
            var ratio = (double)watch.ElapsedTicks / runtimeHashing.Ticks;
            Console.WriteLine("runtimes: {0}, {1}, ratio: {2} for {3}",
                watch.ElapsedMilliseconds / testRuns,
                runtimeHashing.TotalMilliseconds / testRuns,
                ratio,
                len);

            ResultList.Sort(String.CompareOrdinal);
            ResultList.Add(string.Format("{3:000};{0};{1};{2}",
                (watch.Elapsed.TotalMilliseconds / testRuns),
                (runtimeHashing.TotalMilliseconds / testRuns),
                ratio,
                len));
        }
Пример #11
0
        /// <summary>
        /// compares the two algorithms against each other over a rather large file (~8000 chars)
        /// </summary>
        private static void EvaluateSpeed()
        {
            XmlConfigurator.Configure(new FileInfo("log4net.xml"));
            src1 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-01.c");
            src2 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-02.c");
            var testRuns = 2;

            int len = 100;

            // JIT compile both algorithms before we start
            AbstractGSTAlgorithm<GSTToken<char>> initalgo = new HashingGSTAlgorithm<GSTToken<char>>(
                        GSTHelper.FromString(src1.Substring(0, 100)),
                        GSTHelper.FromString(src2.Substring(0, 100)));

            initalgo.RunToCompletion();

            initalgo = new GSTAlgorithm<GSTToken<char>>(GSTHelper.FromString(src1.Substring(0, 100)),
                        GSTHelper.FromString(src2.Substring(0, 100)));
            initalgo.RunToCompletion();

            var liLen1 = new List<int>();
            var liLen2 = new List<int>();
            var liLen3 = new List<int>();
            int c = 0;

            while(len < src1.Length && len < src2.Length)
            {
                switch (c++ % 3)
                {
                    case 0:
                        liLen1.Add(len);
                        break;
                    case 1:
                        liLen2.Add(len);
                        break;
                    default:
                        liLen3.Add(len);
                        break;
                }

                len += 100;
            }

            Length1 = liLen1.ToArray();
            Length2 = liLen2.ToArray();
            Length3 = liLen3.ToArray();

            var t1 = new Thread(Start1);
            t1.Start();

            var t2 = new Thread(Start2);
            t2.Start();

            var t3 = new Thread(Start3);
            t3.Start();

            Console.WriteLine("Main thread joining");
            t1.Join();
            t2.Join();
            t3.Join();

            File.WriteAllLines(@"test\default_set\performance.txt", ResultList);

            Console.WriteLine("finished all runs");
            Console.ReadLine();
        }