public ValueTuple <double, double> GetSimilarityTo(CodeAnalyzer analyzer) { int lineToCompareCount = analyzer.lines.Count; bool[] isSet = new bool[lineToCompareCount]; int[] highestSimIdxes = new int[lineToCompareCount]; float[] highestSims = new float[lineToCompareCount]; Queue <int> queue = new Queue <int>(); bool reEstimate = false; Action <int> EstimateSimilarity = idx => { var str1 = lines[idx]; CCore.Log("EstimateSimilarity: {0}", str1); float highestSim = 0.0f; int highestIdx = -1; // get most similar line for (int j = 0; j < analyzer.lines.Count; j++) { // skip if already find the same line if (highestSims[j] == 1.0f) { continue; } var str2 = analyzer.lines[j]; (int min, int max)pair = (Math.Min(str1.Length, str2.Length), Math.Max(str1.Length, str2.Length)); if (reEstimate) { // skip estimate if highestSim absolutely < highestSims[j] if (pair.min / (float)pair.max < highestSims[j]) { continue; } } else { // skip estimate if longer string length > 2 * shorter && already has >50% sim string if (pair.max >> 1 >= pair.min && highestSims[j] >= 0.5f) { continue; } } float strSim = Levenshtein(str1, str2); if (highestSim < strSim && strSim > highestSims[j]) { highestSim = strSim; highestIdx = j; } // find same line, quit loop if (highestSim == 1.0f) { break; } }
string RemoveRedundancy(string str) { string tmp = str; Action <string> ignore = s => { while (true) { var redundancyIdx = tmp.IndexOf(s + s); if (redundancyIdx >= 0) { tmp = tmp.Remove(redundancyIdx, s.Length); } else { break; } } }; string[] ignoreList = { " ", ";", "\"\"", "\t" }; foreach (var s in ignoreList) { ignore(s); } List <string> split = tmp.Split(';').ToList(); if (split == null) { return(tmp); } for (int i = split.Count - 1; i >= 0; i--) { int sum = 0; foreach (var s in ignoreList) { sum += split[i].Count(c => s.Contains(c)); } if (sum != split[i].Length) { split.RemoveAt(i); } } for (int i = 0; i < split.Count; i++) { tmp = tmp.Remove(tmp.IndexOf(split[i]), split[i].Length); } ignore(";"); if (str != tmp) { CCore.Log("RemoveRedundancy: {0} -> {1}", str, tmp); } return(tmp); }
static bool Parse(string arg) { var lower = arg.ToLower(); if (lower.Contains(suspectStr)) { CCore.Log("Parse: {0}", arg); suspectedSim = double.Parse(lower.Substring(lower.IndexOf(suspectStr) + suspectStr.Length)); return(true); } switch (lower) { case "-ignoreredundancy": CCore.Log("Parse: {0}", arg); ignoreRedundancy = true; return(true); case "-ignorecommend": CCore.Log("Parse: {0}", arg); ignoreCommend = true; return(true); default: return(false); } }
/* * static void Main(string[] args) * { * #if DEBUG * Console.ReadKey(); #endif * Console.WriteLine(); * * List<string> fileName = new List<string>(); * * for (int i = 0; i < args.Length; i++) * { * if (Parse(args[i])) * { * continue; * } * else * { * fileName.Add(args[i]); * } * } * * try * { * for (int i = 0; i < fileName.Count - 1; i++) * { * using (FileStream file1 = new FileStream(fileName[i], FileMode.Open)) * { * var analyze1 = new CodeAnalyzer(file1); * for (int j = i + 1; j < fileName.Count; j++) * { * using (FileStream file2 = new FileStream(fileName[j], FileMode.Open)) * { * var analyze2 = new CodeAnalyzer(file2); * Console.WriteLine("Compare {0} to {1}", fileName[i], fileName[j]); * var sim = analyze1.GetSimilarityTo(analyze2); * Console.WriteLine("Similarity: {0}", sim); * if (sim >= suspectedSim) * { * Warning(); * } * Console.WriteLine(); * } * } * } * } * } * catch (FileNotFoundException e) * { * Console.WriteLine("[ERROR] {0} file not found!", e.FileName); * throw; * } #if DEBUG * Console.ReadLine(); #endif * } */ static public ValueTuple <double, double> CompareFile(string path1, string path2) { ValueTuple <double, double> tuple = (double.NaN, double.NaN); try { using (FileStream file1 = new FileStream(path1, FileMode.Open, FileAccess.Read)) { var analyze1 = new CodeAnalyzer(file1); using (FileStream file2 = new FileStream(path2, FileMode.Open, FileAccess.Read)) { var analyze2 = new CodeAnalyzer(file2); CCore.Log("Compare {0} to {1}", path1, path2); tuple = analyze1.GetSimilarityTo(analyze2); CCore.Log("Similarity: {0}", tuple); if (tuple.Item1 >= suspectedSim || tuple.Item2 >= suspectedSim) { Warning(); } CCore.Log(); } } } catch (FileNotFoundException e) { CCore.Log("[ERROR] {0} file not found!", e.FileName); throw; } return(tuple); }
static void Warning() { CCore.Log("[WARNING] too high similarity!"); //MessageBox.Show("[WARNING] too high similarity!", "Compare Result"); }