Esempio n. 1
0
        /// <summary>
        /// starts the comparison.
        /// After this method has completed you can retrieve the maximum found similarity
        /// </summary>
        public int Calculate()
        {
            int max = 0;

            cLogger.DebugFormat("calculating similarity");

            foreach (var data in ReferenceData)
            {
                // create the list here, because this way it is local to this run
                // ==> more functional and separated
                var sourceTokens    = Tokens.ToGSTTokenList();
                var referenceTokens = Factory.GetTokenWrapperEnumerable(data.Tokens).ToGSTTokenList();
                var algorithm       = new HashingGSTAlgorithm <GSTToken <TokenWrapper> >(sourceTokens, referenceTokens)
                {
                    MinimumMatchLength = DEFAULT_MML
                };

                algorithm.RunToCompletion();

                cLogger.DebugFormat("similarity compared to {0}:{1}", data.StudentIdentifier, algorithm.Similarity);
                if (MaximumSimilarity < algorithm.Similarity)
                {
                    MaximumSimilarity      = algorithm.Similarity;
                    MaxSimilarityStudentID = data.StudentIdentifier;
                }
            }

            return(max);
        }
Esempio n. 2
0
        public void MinimizesHashes()
        {
            Algorithm = new HashingGSTAlgorithm<GSTToken<char>>(GSTHelper.FromString("XeLATst"), GSTHelper.FromString("LATunik")) { MinimumMatchLength = 3};
            Algorithm.DoOneRun();

            Assert.AreEqual(1, Algorithm.HashesA.Count, string.Format("expected 1 after minimize, but: A = {0}", Algorithm.HashesA.Count));
            Assert.AreEqual(1, Algorithm.HashesB.Count, string.Format("expected 1 after minimize, but: B = {0}", Algorithm.HashesB.Count));
        }
Esempio n. 3
0
        public void MinimizesHashes()
        {
            Algorithm = new HashingGSTAlgorithm <GSTToken <char> >(GSTHelper.FromString("XeLATst"), GSTHelper.FromString("LATunik"))
            {
                MinimumMatchLength = 3
            };
            Algorithm.DoOneRun();

            Assert.AreEqual(1, Algorithm.HashesA.Count, string.Format("expected 1 after minimize, but: A = {0}", Algorithm.HashesA.Count));
            Assert.AreEqual(1, Algorithm.HashesB.Count, string.Format("expected 1 after minimize, but: B = {0}", Algorithm.HashesB.Count));
        }
Esempio n. 4
0
        public void SetUp()
        {
            abstractGSTAlgorithmTest = new AbstractGSTAlgorithmTest(typeof(GSTAlgorithm <GSTToken <char> >));
            var listA = GSTHelper.FromString("Hallo");
            var listB = GSTHelper.FromString("Hallo");

            Algorithm = new HashingGSTAlgorithm <GSTToken <char> >(listA, listB)
            {
                MinimumMatchLength = 3
            };
        }
Esempio n. 5
0
        public void EqualityComparerTest()
        {
            var dict1 = HashingGSTAlgorithm <GSTToken <char> > .CreateHashMap(GSTHelper.FromString("Hallo"), 3).ToArray();

            var dict2 = HashingGSTAlgorithm <GSTToken <char> > .CreateHashMap(GSTHelper.FromString("Hallo"), 3).ToArray();

            var comparer = HashingGSTAlgorithm <GSTToken <char> > .HashingEntity.Comparer;

            for (int i = 0; i < dict1.Length; i++)
            {
                Assert.AreEqual(dict1[i].Key, dict2[i].Key, string.Format("dict1 {0}, dict2 {1}", dict1[i].Value[0], dict2[i].Value[0]));
                Assert.IsTrue(comparer.Equals(dict1[i].Value[0], dict2[i].Value[0]));
            }
        }
Esempio n. 6
0
        /// <summary>
        /// Compares the two files and returns the Similarity. 
        /// Lexer used: MutexCLexer
        /// Algorithm used: HashingGSTAlgorithm (MML = 8)
        /// </summary>
        /// <param name="path1"></param>
        /// <param name="path2"></param>
        /// <returns></returns>
        public static Int32 CompareFiles(string path1, string path2)
        {
            var factory = new MutexTokenFactory();

            var tokens1 = factory.GetTokenWrapperListFromFile(path1);
            var tokens2 = factory.GetTokenWrapperListFromFile(path2);

            var algo = new HashingGSTAlgorithm<GSTToken<TokenWrapper>>(
                tokens1.ToGSTTokenList<TokenWrapper>(),
                tokens2.ToGSTTokenList<TokenWrapper>()) {MinimumMatchLength = 8};
            algo.RunToCompletion();

            return algo.Similarity;
        }
Esempio n. 7
0
        private static void Calculate(int len)
        {
            var string1 = src1.Substring(0, len);
            var string2 = src2.Substring(0, len);
            //len += 200; // double length in size
            var watch = Stopwatch.StartNew();

            //Console.WriteLine("String length: A = {0}, B = {1}", string1.Length, string2.Length);
            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new HashingGSTAlgorithm <GSTToken <char> >(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();
                //Console.WriteLine("finished hashing run {0} in {1}", i, watch.Elapsed);
            }
            var runtimeHashing = watch.Elapsed;

            //Console.WriteLine("runtime hashing: {0}", ((int)runtimeHashing.TotalMilliseconds / testRuns));

            watch = Stopwatch.StartNew();

            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new GSTAlgorithm <GSTToken <char> >(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();
            }
            var ratio = (double)watch.ElapsedTicks / runtimeHashing.Ticks;

            Console.WriteLine("runtimes: {0}, {1}, ratio: {2} for {3}",
                              watch.ElapsedMilliseconds / testRuns,
                              runtimeHashing.TotalMilliseconds / testRuns,
                              ratio,
                              len);

            ResultList.Sort(String.CompareOrdinal);
            ResultList.Add(string.Format("{3:000};{0};{1};{2}",
                                         (watch.Elapsed.TotalMilliseconds / testRuns),
                                         (runtimeHashing.TotalMilliseconds / testRuns),
                                         ratio,
                                         len));
        }
Esempio n. 8
0
        /// <summary>
        /// Compares the two files and returns the Similarity.
        /// Lexer used: MutexCLexer
        /// Algorithm used: HashingGSTAlgorithm (MML = 8)
        /// </summary>
        /// <param name="path1"></param>
        /// <param name="path2"></param>
        /// <returns></returns>
        public static Int32 CompareFiles(string path1, string path2)
        {
            var factory = new MutexTokenFactory();

            var tokens1 = factory.GetTokenWrapperListFromFile(path1);
            var tokens2 = factory.GetTokenWrapperListFromFile(path2);

            var algo = new HashingGSTAlgorithm <GSTToken <TokenWrapper> >(
                tokens1.ToGSTTokenList <TokenWrapper>(),
                tokens2.ToGSTTokenList <TokenWrapper>())
            {
                MinimumMatchLength = 8
            };

            algo.RunToCompletion();


            return(algo.Similarity);
        }
Esempio n. 9
0
        public void ANTLRITokenVersusMutexTokenImpl()
        {
            var factory = new MutexTokenFactory();
            var itokens = factory.GetTokenWrapperListFromSource("void main(int argc, char** argv)").ToGSTTokenList();

            var tokennames = new[] { "VOID", "IDENTIFIER", "INTEGER_DATATYPE", "IDENTIFIER", "POINTER_DATATYPE", "IDENTIFIER" };
            var mutextokens = factory.GetTokenWrapperEnumerable(tokennames).ToGSTTokenList();

            for (int i = 0; i < itokens.Count; i++)
            {
                Assert.AreEqual(itokens[i].GetHashCode(), mutextokens[i].GetHashCode());
            }

            var algo = new HashingGSTAlgorithm<GSTToken<TokenWrapper>>(itokens, mutextokens)
            {
                MinimumMatchLength = 3
            };

            algo.RunToCompletion();

            Assert.AreEqual(100, algo.Similarity);
        }
Esempio n. 10
0
        public void ANTLRITokenVersusMutexTokenImpl()
        {
            var factory = new MutexTokenFactory();
            var itokens = factory.GetTokenWrapperListFromSource("void main(int argc, char** argv)").ToGSTTokenList();

            var tokennames  = new[] { "VOID", "IDENTIFIER", "INTEGER_DATATYPE", "IDENTIFIER", "POINTER_DATATYPE", "IDENTIFIER" };
            var mutextokens = factory.GetTokenWrapperEnumerable(tokennames).ToGSTTokenList();

            for (int i = 0; i < itokens.Count; i++)
            {
                Assert.AreEqual(itokens[i].GetHashCode(), mutextokens[i].GetHashCode());
            }

            var algo = new HashingGSTAlgorithm <GSTToken <TokenWrapper> >(itokens, mutextokens)
            {
                MinimumMatchLength = 3
            };

            algo.RunToCompletion();


            Assert.AreEqual(100, algo.Similarity);
        }
Esempio n. 11
0
        /// <summary>
        /// compares the two algorithms against each other over a rather large file (~8000 chars)
        /// </summary>
        private static void EvaluateSpeed()
        {
            XmlConfigurator.Configure(new FileInfo("log4net.xml"));
            src1 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-01.c");
            src2 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-02.c");
            var testRuns = 2;

            int len = 100;

            // JIT compile both algorithms before we start
            AbstractGSTAlgorithm <GSTToken <char> > initalgo = new HashingGSTAlgorithm <GSTToken <char> >(
                GSTHelper.FromString(src1.Substring(0, 100)),
                GSTHelper.FromString(src2.Substring(0, 100)));

            initalgo.RunToCompletion();

            initalgo = new GSTAlgorithm <GSTToken <char> >(GSTHelper.FromString(src1.Substring(0, 100)),
                                                           GSTHelper.FromString(src2.Substring(0, 100)));
            initalgo.RunToCompletion();

            var liLen1 = new List <int>();
            var liLen2 = new List <int>();
            var liLen3 = new List <int>();
            int c      = 0;

            while (len < src1.Length && len < src2.Length)
            {
                switch (c++ % 3)
                {
                case 0:
                    liLen1.Add(len);
                    break;

                case 1:
                    liLen2.Add(len);
                    break;

                default:
                    liLen3.Add(len);
                    break;
                }

                len += 100;
            }

            Length1 = liLen1.ToArray();
            Length2 = liLen2.ToArray();
            Length3 = liLen3.ToArray();

            var t1 = new Thread(Start1);

            t1.Start();

            var t2 = new Thread(Start2);

            t2.Start();

            var t3 = new Thread(Start3);

            t3.Start();


            Console.WriteLine("Main thread joining");
            t1.Join();
            t2.Join();
            t3.Join();

            File.WriteAllLines(@"test\default_set\performance.txt", ResultList);

            Console.WriteLine("finished all runs");
            Console.ReadLine();
        }
Esempio n. 12
0
        /// <summary>
        /// starts the comparison. 
        /// After this method has completed you can retrieve the maximum found similarity 
        /// </summary>
        public int Calculate()
        {
            int max = 0;

            cLogger.DebugFormat("calculating similarity");

            foreach (var data in ReferenceData)
            {
                // create the list here, because this way it is local to this run
                // ==> more functional and separated
                var sourceTokens = Tokens.ToGSTTokenList();
                var referenceTokens = Factory.GetTokenWrapperEnumerable(data.Tokens).ToGSTTokenList();
                var algorithm = new HashingGSTAlgorithm<GSTToken<TokenWrapper>>(sourceTokens, referenceTokens)
                {
                    MinimumMatchLength = DEFAULT_MML
                };

                algorithm.RunToCompletion();

                cLogger.DebugFormat("similarity compared to {0}:{1}", data.StudentIdentifier, algorithm.Similarity);
                if (MaximumSimilarity < algorithm.Similarity)
                {
                    MaximumSimilarity = algorithm.Similarity;
                    MaxSimilarityStudentID = data.StudentIdentifier;
                }
            }

            return max;
        }
Esempio n. 13
0
        public void SetUp()
        {
            abstractGSTAlgorithmTest = new AbstractGSTAlgorithmTest(typeof(GSTAlgorithm<GSTToken<char>>));
            var listA = GSTHelper.FromString("Hallo");
            var listB = GSTHelper.FromString("Hallo");

            Algorithm = new HashingGSTAlgorithm<GSTToken<char>>(listA, listB)
            {
                MinimumMatchLength = 3
            };
        }
Esempio n. 14
0
        private static void Calculate(int len)
        {
            var string1 = src1.Substring(0, len);
            var string2 = src2.Substring(0, len);
            //len += 200; // double length in size
            var watch = Stopwatch.StartNew();

            //Console.WriteLine("String length: A = {0}, B = {1}", string1.Length, string2.Length);
            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new HashingGSTAlgorithm<GSTToken<char>>(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();
                //Console.WriteLine("finished hashing run {0} in {1}", i, watch.Elapsed);
            }
            var runtimeHashing = watch.Elapsed;
            //Console.WriteLine("runtime hashing: {0}", ((int)runtimeHashing.TotalMilliseconds / testRuns));

            watch = Stopwatch.StartNew();

            for (int i = testRuns; i >= 0; i--)
            {
                var algo = new GSTAlgorithm<GSTToken<char>>(
                    GSTHelper.FromString(string1),
                    GSTHelper.FromString(string2));

                algo.RunToCompletion();

            }
            var ratio = (double)watch.ElapsedTicks / runtimeHashing.Ticks;
            Console.WriteLine("runtimes: {0}, {1}, ratio: {2} for {3}",
                watch.ElapsedMilliseconds / testRuns,
                runtimeHashing.TotalMilliseconds / testRuns,
                ratio,
                len);

            ResultList.Sort(String.CompareOrdinal);
            ResultList.Add(string.Format("{3:000};{0};{1};{2}",
                (watch.Elapsed.TotalMilliseconds / testRuns),
                (runtimeHashing.TotalMilliseconds / testRuns),
                ratio,
                len));
        }
Esempio n. 15
0
        /// <summary>
        /// compares the two algorithms against each other over a rather large file (~8000 chars)
        /// </summary>
        private static void EvaluateSpeed()
        {
            XmlConfigurator.Configure(new FileInfo("log4net.xml"));
            src1 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-01.c");
            src2 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-02.c");
            var testRuns = 2;

            int len = 100;

            // JIT compile both algorithms before we start
            AbstractGSTAlgorithm<GSTToken<char>> initalgo = new HashingGSTAlgorithm<GSTToken<char>>(
                        GSTHelper.FromString(src1.Substring(0, 100)),
                        GSTHelper.FromString(src2.Substring(0, 100)));

            initalgo.RunToCompletion();

            initalgo = new GSTAlgorithm<GSTToken<char>>(GSTHelper.FromString(src1.Substring(0, 100)),
                        GSTHelper.FromString(src2.Substring(0, 100)));
            initalgo.RunToCompletion();

            var liLen1 = new List<int>();
            var liLen2 = new List<int>();
            var liLen3 = new List<int>();
            int c = 0;

            while(len < src1.Length && len < src2.Length)
            {
                switch (c++ % 3)
                {
                    case 0:
                        liLen1.Add(len);
                        break;
                    case 1:
                        liLen2.Add(len);
                        break;
                    default:
                        liLen3.Add(len);
                        break;
                }

                len += 100;
            }

            Length1 = liLen1.ToArray();
            Length2 = liLen2.ToArray();
            Length3 = liLen3.ToArray();

            var t1 = new Thread(Start1);
            t1.Start();

            var t2 = new Thread(Start2);
            t2.Start();

            var t3 = new Thread(Start3);
            t3.Start();

            Console.WriteLine("Main thread joining");
            t1.Join();
            t2.Join();
            t3.Join();

            File.WriteAllLines(@"test\default_set\performance.txt", ResultList);

            Console.WriteLine("finished all runs");
            Console.ReadLine();
        }