public ValueTuple <double, double> GetSimilarityTo(CodeAnalyzer analyzer) { int lineToCompareCount = analyzer.lines.Count; bool[] isSet = new bool[lineToCompareCount]; int[] highestSimIdxes = new int[lineToCompareCount]; float[] highestSims = new float[lineToCompareCount]; Queue <int> queue = new Queue <int>(); bool reEstimate = false; Action <int> EstimateSimilarity = idx => { var str1 = lines[idx]; CCore.Log("EstimateSimilarity: {0}", str1); float highestSim = 0.0f; int highestIdx = -1; // get most similar line for (int j = 0; j < analyzer.lines.Count; j++) { // skip if already find the same line if (highestSims[j] == 1.0f) { continue; } var str2 = analyzer.lines[j]; (int min, int max)pair = (Math.Min(str1.Length, str2.Length), Math.Max(str1.Length, str2.Length)); if (reEstimate) { // skip estimate if highestSim absolutely < highestSims[j] if (pair.min / (float)pair.max < highestSims[j]) { continue; } } else { // skip estimate if longer string length > 2 * shorter && already has >50% sim string if (pair.max >> 1 >= pair.min && highestSims[j] >= 0.5f) { continue; } } float strSim = Levenshtein(str1, str2); if (highestSim < strSim && strSim > highestSims[j]) { highestSim = strSim; highestIdx = j; } // find same line, quit loop if (highestSim == 1.0f) { break; } }
/* * static void Main(string[] args) * { * #if DEBUG * Console.ReadKey(); #endif * Console.WriteLine(); * * List<string> fileName = new List<string>(); * * for (int i = 0; i < args.Length; i++) * { * if (Parse(args[i])) * { * continue; * } * else * { * fileName.Add(args[i]); * } * } * * try * { * for (int i = 0; i < fileName.Count - 1; i++) * { * using (FileStream file1 = new FileStream(fileName[i], FileMode.Open)) * { * var analyze1 = new CodeAnalyzer(file1); * for (int j = i + 1; j < fileName.Count; j++) * { * using (FileStream file2 = new FileStream(fileName[j], FileMode.Open)) * { * var analyze2 = new CodeAnalyzer(file2); * Console.WriteLine("Compare {0} to {1}", fileName[i], fileName[j]); * var sim = analyze1.GetSimilarityTo(analyze2); * Console.WriteLine("Similarity: {0}", sim); * if (sim >= suspectedSim) * { * Warning(); * } * Console.WriteLine(); * } * } * } * } * } * catch (FileNotFoundException e) * { * Console.WriteLine("[ERROR] {0} file not found!", e.FileName); * throw; * } #if DEBUG * Console.ReadLine(); #endif * } */ static public ValueTuple <double, double> CompareFile(string path1, string path2) { ValueTuple <double, double> tuple = (double.NaN, double.NaN); try { using (FileStream file1 = new FileStream(path1, FileMode.Open, FileAccess.Read)) { var analyze1 = new CodeAnalyzer(file1); using (FileStream file2 = new FileStream(path2, FileMode.Open, FileAccess.Read)) { var analyze2 = new CodeAnalyzer(file2); CCore.Log("Compare {0} to {1}", path1, path2); tuple = analyze1.GetSimilarityTo(analyze2); CCore.Log("Similarity: {0}", tuple); if (tuple.Item1 >= suspectedSim || tuple.Item2 >= suspectedSim) { Warning(); } CCore.Log(); } } } catch (FileNotFoundException e) { CCore.Log("[ERROR] {0} file not found!", e.FileName); throw; } return(tuple); }