/// <summary> /// takes the two sources and performs the GST /// </summary> /// <param name="sourcePath1"></param> /// <param name="sourcePath2"></param> public ComparisonModel(string name, Int64 evalRunID, string sourcePath1, string sourcePath2) { Name = name; EvaluationRunID = evalRunID; var directory = Path.GetDirectoryName(Path.GetFullPath(sourcePath1)); bool tmplFileExists = File.Exists(Path.Combine(directory, "template.c")); var watch = Stopwatch.StartNew(); var tmplFile = Directory.GetFiles(directory, "template.c").FirstOrDefault(); var path1 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath1, tmplFile) : sourcePath1; var path2 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath2, tmplFile) : sourcePath2; var factory = new MutexTokenFactory(); var tokens1 = factory.GetTokenWrapperListFromFile(path1); var tokens2 = factory.GetTokenWrapperListFromFile(path2); cLogger.DebugFormat("TokenStream Length: {0} -- {1}", tokens1.Count(), tokens2.Count()); var algo = new GSTAlgorithm<GSTToken<TokenWrapper>>( tokens1.ToGSTTokenList<TokenWrapper>(), tokens2.ToGSTTokenList<TokenWrapper>()); algo.RunToCompletion(); Result = algo.Similarity; Source1 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath1), factory.GetJoinedTokenString(tokens1)); Source2 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath2), factory.GetJoinedTokenString(tokens2)); SQLFacade.Instance.CreateComparison(name, Result, watch.ElapsedMilliseconds, evalRunID, Source1.ID, Source2.ID); }
/// <summary> /// takes the two sources and performs the GST /// </summary> /// <param name="sourcePath1"></param> /// <param name="sourcePath2"></param> public ComparisonModel(string name, Int64 evalRunID, string sourcePath1, string sourcePath2) { Name = name; EvaluationRunID = evalRunID; var directory = Path.GetDirectoryName(Path.GetFullPath(sourcePath1)); bool tmplFileExists = File.Exists(Path.Combine(directory, "template.c")); var watch = Stopwatch.StartNew(); var tmplFile = Directory.GetFiles(directory, "template.c").FirstOrDefault(); var path1 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath1, tmplFile) : sourcePath1; var path2 = tmplFileExists ? TemplatingHelper.StripTemplateFromSourceFile(sourcePath2, tmplFile) : sourcePath2; var factory = new MutexTokenFactory(); var tokens1 = factory.GetTokenWrapperListFromFile(path1); var tokens2 = factory.GetTokenWrapperListFromFile(path2); cLogger.DebugFormat("TokenStream Length: {0} -- {1}", tokens1.Count(), tokens2.Count()); var algo = new GSTAlgorithm <GSTToken <TokenWrapper> >( tokens1.ToGSTTokenList <TokenWrapper>(), tokens2.ToGSTTokenList <TokenWrapper>()); algo.RunToCompletion(); Result = algo.Similarity; Source1 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath1), factory.GetJoinedTokenString(tokens1)); Source2 = new SourceModel(Path.GetFileNameWithoutExtension(sourcePath2), factory.GetJoinedTokenString(tokens2)); SQLFacade.Instance.CreateComparison(name, Result, watch.ElapsedMilliseconds, evalRunID, Source1.ID, Source2.ID); }
private void StartAlgorithm(object sender, RoutedEventArgs e) { Algorithm = new GSTAlgorithm <GSTToken <char> >( GSTHelper.FromString(TextABox.GetTextWithoutLineBreaks().Replace("\r\n", "AA")), GSTHelper.FromString(TextBBox.GetTextWithoutLineBreaks().Replace("\r\n", "AA"))) { MinimumMatchLength = Int32.Parse(MMLTB.Text) - 1 }; //WriteToConsole(TextABox.GetTextWithoutLineBreaks()); Reset(); }
public void SetUp() { abstractGSTAlgorithmTest = new AbstractGSTAlgorithmTest(typeof(GSTAlgorithm <GSTToken <char> >)); var listA = GSTHelper.FromString("Hallo"); var listB = GSTHelper.FromString("Hallo"); Algorithm = new GSTAlgorithm <GSTToken <char> >(listA, listB) { MinimumMatchLength = 3 }; }
/// <summary> /// evaluation of accuracy only. /// DO NOT TRY THIS AT HOME! /// </summary> private static void doEvaluate() { var doneList = new List <string>(); var args = Environment.GetCommandLineArgs(); var dir = new DirectoryInfo(@"myDir"); if (args.Length > 1) { dir = new DirectoryInfo(args[1]); } Console.WriteLine("directory: {0}", dir.FullName); foreach (var subdir in dir.GetDirectories().OrderBy(sd => sd.Name)) { DirectoryInfo subdir1 = subdir; var otherFiles = dir.GetDirectories().Where(od => subdir1.Name != od.Name).OrderBy(od => od.Name).Select(od => new FileInfo(Path.Combine(od.FullName, "main.c"))); var fileA = new FileInfo(Path.Combine(subdir.FullName, "main.c")); var tokensA = GetTokens(fileA); foreach (var fileB in otherFiles) { var format1 = string.Format("{0}-{1}", fileA.Directory.Name, fileB.Directory.Name); var format2 = string.Format("{1}-{0}", fileA.Directory.Name, fileB.Directory.Name); if (doneList.Any(str => str.Equals(format1) || str.Equals(format2))) { continue; } doneList.Add(format1); var tokensB = GetTokens(fileB); var algo = new GSTAlgorithm <GSTToken <TokenWrapper> >(tokensA, tokensB) { MinimumMatchLength = 5 }; algo.RunToCompletion(); Console.WriteLine("{0}-{1}:{2}", fileA.Directory.Name, fileB.Directory.Name, algo.Similarity); } } Console.WriteLine("FINISHED EVAL"); Environment.Exit(0); }
private static void Calculate(int len) { var string1 = src1.Substring(0, len); var string2 = src2.Substring(0, len); //len += 200; // double length in size var watch = Stopwatch.StartNew(); //Console.WriteLine("String length: A = {0}, B = {1}", string1.Length, string2.Length); for (int i = testRuns; i >= 0; i--) { var algo = new HashingGSTAlgorithm <GSTToken <char> >( GSTHelper.FromString(string1), GSTHelper.FromString(string2)); algo.RunToCompletion(); //Console.WriteLine("finished hashing run {0} in {1}", i, watch.Elapsed); } var runtimeHashing = watch.Elapsed; //Console.WriteLine("runtime hashing: {0}", ((int)runtimeHashing.TotalMilliseconds / testRuns)); watch = Stopwatch.StartNew(); for (int i = testRuns; i >= 0; i--) { var algo = new GSTAlgorithm <GSTToken <char> >( GSTHelper.FromString(string1), GSTHelper.FromString(string2)); algo.RunToCompletion(); } var ratio = (double)watch.ElapsedTicks / runtimeHashing.Ticks; Console.WriteLine("runtimes: {0}, {1}, ratio: {2} for {3}", watch.ElapsedMilliseconds / testRuns, runtimeHashing.TotalMilliseconds / testRuns, ratio, len); ResultList.Sort(String.CompareOrdinal); ResultList.Add(string.Format("{3:000};{0};{1};{2}", (watch.Elapsed.TotalMilliseconds / testRuns), (runtimeHashing.TotalMilliseconds / testRuns), ratio, len)); }
/// <summary> /// evaluation of GST speed only. /// DO NOT TRY THIS AT HOME! /// </summary> private static void EvaluateGSTSpeed() { var files = Directory.GetFiles(@"C:\Quelltexte", @"main.c", SearchOption.AllDirectories).Select(GetTokens).ToList(); //Console.WriteLine(string.Format("total product count: {0}", cartesianProduct.Count())); long cRuntime = 0; int similarity = 0; IEnumerable <GSTTokenList <GSTToken <TokenWrapper> >[]> product = null; for (int i = 100; i >= 0; i--) { var cartesianProduct = from first in files from second in files select new[] { first, second }; GSTAlgorithm <GSTToken <TokenWrapper> > algorithm = null; foreach (var set in cartesianProduct) { var alg = new GSTAlgorithm <GSTToken <TokenWrapper> >(set[0], set[1]) { MinimumMatchLength = 5 }; algorithm = alg; var watch = Stopwatch.StartNew(); alg.RunToCompletion(); cRuntime += watch.ElapsedTicks; watch.Stop(); } similarity += algorithm.Similarity; product = cartesianProduct; } Console.WriteLine("Number: {0}, Similarity: {1}", product.Count(), similarity); Console.WriteLine("finished in {0} seconds", TimeSpan.FromTicks(cRuntime).TotalSeconds / 100); Console.ReadLine(); Environment.Exit(0); }
/// <summary> /// evaluation of accuracy only. /// DO NOT TRY THIS AT HOME! /// </summary> private static void doEvaluate() { var doneList = new List<string>(); var args = Environment.GetCommandLineArgs(); var dir = new DirectoryInfo(@"myDir"); if(args.Length > 1) dir = new DirectoryInfo(args[1]); Console.WriteLine("directory: {0}", dir.FullName); foreach (var subdir in dir.GetDirectories().OrderBy(sd => sd.Name)) { DirectoryInfo subdir1 = subdir; var otherFiles = dir.GetDirectories().Where(od => subdir1.Name != od.Name).OrderBy(od => od.Name).Select(od => new FileInfo(Path.Combine(od.FullName, "main.c"))); var fileA = new FileInfo(Path.Combine(subdir.FullName, "main.c")); var tokensA = GetTokens(fileA); foreach(var fileB in otherFiles) { var format1 = string.Format("{0}-{1}", fileA.Directory.Name, fileB.Directory.Name); var format2 = string.Format("{1}-{0}", fileA.Directory.Name, fileB.Directory.Name); if (doneList.Any(str => str.Equals(format1) || str.Equals(format2))) continue; doneList.Add(format1); var tokensB = GetTokens(fileB); var algo = new GSTAlgorithm<GSTToken<TokenWrapper>>(tokensA, tokensB) {MinimumMatchLength = 5}; algo.RunToCompletion(); Console.WriteLine("{0}-{1}:{2}", fileA.Directory.Name, fileB.Directory.Name, algo.Similarity); } } Console.WriteLine("FINISHED EVAL"); Environment.Exit(0); }
private void StartAlgorithm(object sender, RoutedEventArgs e) { Algorithm = new GSTAlgorithm<GSTToken<char>>( GSTHelper.FromString(TextABox.GetTextWithoutLineBreaks().Replace("\r\n", "AA")), GSTHelper.FromString(TextBBox.GetTextWithoutLineBreaks().Replace("\r\n", "AA"))) { MinimumMatchLength = Int32.Parse(MMLTB.Text) - 1 }; //WriteToConsole(TextABox.GetTextWithoutLineBreaks()); Reset(); }
/// <summary> /// compares the two algorithms against each other over a rather large file (~8000 chars) /// </summary> private static void EvaluateSpeed() { XmlConfigurator.Configure(new FileInfo("log4net.xml")); src1 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-01.c"); src2 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-02.c"); var testRuns = 2; int len = 100; // JIT compile both algorithms before we start AbstractGSTAlgorithm <GSTToken <char> > initalgo = new HashingGSTAlgorithm <GSTToken <char> >( GSTHelper.FromString(src1.Substring(0, 100)), GSTHelper.FromString(src2.Substring(0, 100))); initalgo.RunToCompletion(); initalgo = new GSTAlgorithm <GSTToken <char> >(GSTHelper.FromString(src1.Substring(0, 100)), GSTHelper.FromString(src2.Substring(0, 100))); initalgo.RunToCompletion(); var liLen1 = new List <int>(); var liLen2 = new List <int>(); var liLen3 = new List <int>(); int c = 0; while (len < src1.Length && len < src2.Length) { switch (c++ % 3) { case 0: liLen1.Add(len); break; case 1: liLen2.Add(len); break; default: liLen3.Add(len); break; } len += 100; } Length1 = liLen1.ToArray(); Length2 = liLen2.ToArray(); Length3 = liLen3.ToArray(); var t1 = new Thread(Start1); t1.Start(); var t2 = new Thread(Start2); t2.Start(); var t3 = new Thread(Start3); t3.Start(); Console.WriteLine("Main thread joining"); t1.Join(); t2.Join(); t3.Join(); File.WriteAllLines(@"test\default_set\performance.txt", ResultList); Console.WriteLine("finished all runs"); Console.ReadLine(); }
/// <summary> /// evaluation of GST speed only. /// DO NOT TRY THIS AT HOME! /// </summary> private static void EvaluateGSTSpeed() { var files = Directory.GetFiles(@"C:\Quelltexte", @"main.c", SearchOption.AllDirectories).Select(GetTokens).ToList(); //Console.WriteLine(string.Format("total product count: {0}", cartesianProduct.Count())); long cRuntime = 0; int similarity = 0; IEnumerable<GSTTokenList<GSTToken<TokenWrapper>>[]> product = null; for (int i = 100; i >= 0; i--) { var cartesianProduct = from first in files from second in files select new[] { first, second }; GSTAlgorithm<GSTToken<TokenWrapper>> algorithm = null; foreach (var set in cartesianProduct) { var alg = new GSTAlgorithm<GSTToken<TokenWrapper>>(set[0], set[1]) {MinimumMatchLength = 5}; algorithm = alg; var watch = Stopwatch.StartNew(); alg.RunToCompletion(); cRuntime += watch.ElapsedTicks; watch.Stop(); } similarity += algorithm.Similarity; product = cartesianProduct; } Console.WriteLine("Number: {0}, Similarity: {1}", product.Count(), similarity); Console.WriteLine("finished in {0} seconds", TimeSpan.FromTicks(cRuntime).TotalSeconds / 100); Console.ReadLine(); Environment.Exit(0); }
private static void Calculate(int len) { var string1 = src1.Substring(0, len); var string2 = src2.Substring(0, len); //len += 200; // double length in size var watch = Stopwatch.StartNew(); //Console.WriteLine("String length: A = {0}, B = {1}", string1.Length, string2.Length); for (int i = testRuns; i >= 0; i--) { var algo = new HashingGSTAlgorithm<GSTToken<char>>( GSTHelper.FromString(string1), GSTHelper.FromString(string2)); algo.RunToCompletion(); //Console.WriteLine("finished hashing run {0} in {1}", i, watch.Elapsed); } var runtimeHashing = watch.Elapsed; //Console.WriteLine("runtime hashing: {0}", ((int)runtimeHashing.TotalMilliseconds / testRuns)); watch = Stopwatch.StartNew(); for (int i = testRuns; i >= 0; i--) { var algo = new GSTAlgorithm<GSTToken<char>>( GSTHelper.FromString(string1), GSTHelper.FromString(string2)); algo.RunToCompletion(); } var ratio = (double)watch.ElapsedTicks / runtimeHashing.Ticks; Console.WriteLine("runtimes: {0}, {1}, ratio: {2} for {3}", watch.ElapsedMilliseconds / testRuns, runtimeHashing.TotalMilliseconds / testRuns, ratio, len); ResultList.Sort(String.CompareOrdinal); ResultList.Add(string.Format("{3:000};{0};{1};{2}", (watch.Elapsed.TotalMilliseconds / testRuns), (runtimeHashing.TotalMilliseconds / testRuns), ratio, len)); }
/// <summary> /// compares the two algorithms against each other over a rather large file (~8000 chars) /// </summary> private static void EvaluateSpeed() { XmlConfigurator.Configure(new FileInfo("log4net.xml")); src1 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-01.c"); src2 = File.ReadAllText(@"test\default_set\TPLV04-S01-02\main-02.c"); var testRuns = 2; int len = 100; // JIT compile both algorithms before we start AbstractGSTAlgorithm<GSTToken<char>> initalgo = new HashingGSTAlgorithm<GSTToken<char>>( GSTHelper.FromString(src1.Substring(0, 100)), GSTHelper.FromString(src2.Substring(0, 100))); initalgo.RunToCompletion(); initalgo = new GSTAlgorithm<GSTToken<char>>(GSTHelper.FromString(src1.Substring(0, 100)), GSTHelper.FromString(src2.Substring(0, 100))); initalgo.RunToCompletion(); var liLen1 = new List<int>(); var liLen2 = new List<int>(); var liLen3 = new List<int>(); int c = 0; while(len < src1.Length && len < src2.Length) { switch (c++ % 3) { case 0: liLen1.Add(len); break; case 1: liLen2.Add(len); break; default: liLen3.Add(len); break; } len += 100; } Length1 = liLen1.ToArray(); Length2 = liLen2.ToArray(); Length3 = liLen3.ToArray(); var t1 = new Thread(Start1); t1.Start(); var t2 = new Thread(Start2); t2.Start(); var t3 = new Thread(Start3); t3.Start(); Console.WriteLine("Main thread joining"); t1.Join(); t2.Join(); t3.Join(); File.WriteAllLines(@"test\default_set\performance.txt", ResultList); Console.WriteLine("finished all runs"); Console.ReadLine(); }
public void SetUp() { abstractGSTAlgorithmTest = new AbstractGSTAlgorithmTest(typeof(GSTAlgorithm<GSTToken<char>>)); var listA = GSTHelper.FromString("Hallo"); var listB = GSTHelper.FromString("Hallo"); Algorithm = new GSTAlgorithm<GSTToken<char>>(listA, listB) { MinimumMatchLength = 3 }; }