Пример #1
0
        public ValueTuple <double, double> GetSimilarityTo(CodeAnalyzer analyzer)
        {
            int lineToCompareCount = analyzer.lines.Count;

            bool[]      isSet           = new bool[lineToCompareCount];
            int[]       highestSimIdxes = new int[lineToCompareCount];
            float[]     highestSims     = new float[lineToCompareCount];
            Queue <int> queue           = new Queue <int>();

            bool reEstimate = false;

            Action <int> EstimateSimilarity = idx =>
            {
                var str1 = lines[idx];
                CCore.Log("EstimateSimilarity: {0}", str1);

                float highestSim = 0.0f;
                int   highestIdx = -1;
                // get most similar line
                for (int j = 0; j < analyzer.lines.Count; j++)
                {
                    // skip if already find the same line
                    if (highestSims[j] == 1.0f)
                    {
                        continue;
                    }

                    var str2 = analyzer.lines[j];

                    (int min, int max)pair = (Math.Min(str1.Length, str2.Length), Math.Max(str1.Length, str2.Length));
                    if (reEstimate)
                    {
                        // skip estimate if highestSim absolutely < highestSims[j]
                        if (pair.min / (float)pair.max < highestSims[j])
                        {
                            continue;
                        }
                    }
                    else
                    {
                        // skip estimate if longer string length > 2 * shorter && already has >50% sim string
                        if (pair.max >> 1 >= pair.min && highestSims[j] >= 0.5f)
                        {
                            continue;
                        }
                    }

                    float strSim = Levenshtein(str1, str2);
                    if (highestSim < strSim && strSim > highestSims[j])
                    {
                        highestSim = strSim;
                        highestIdx = j;
                    }

                    // find same line, quit loop
                    if (highestSim == 1.0f)
                    {
                        break;
                    }
                }
Пример #2
0
        /*
         * static void Main(string[] args)
         * {
         *
         #if DEBUG
         *  Console.ReadKey();
         #endif
         *  Console.WriteLine();
         *
         *  List<string> fileName = new List<string>();
         *
         *  for (int i = 0; i < args.Length; i++)
         *  {
         *      if (Parse(args[i]))
         *      {
         *          continue;
         *      }
         *      else
         *      {
         *          fileName.Add(args[i]);
         *      }
         *  }
         *
         *  try
         *  {
         *      for (int i = 0; i < fileName.Count - 1; i++)
         *      {
         *          using (FileStream file1 = new FileStream(fileName[i], FileMode.Open))
         *          {
         *              var analyze1 = new CodeAnalyzer(file1);
         *              for (int j = i + 1; j < fileName.Count; j++)
         *              {
         *                  using (FileStream file2 = new FileStream(fileName[j], FileMode.Open))
         *                  {
         *                      var analyze2 = new CodeAnalyzer(file2);
         *                      Console.WriteLine("Compare {0} to {1}", fileName[i], fileName[j]);
         *                      var sim = analyze1.GetSimilarityTo(analyze2);
         *                      Console.WriteLine("Similarity: {0}", sim);
         *                      if (sim >= suspectedSim)
         *                      {
         *                          Warning();
         *                      }
         *                      Console.WriteLine();
         *                  }
         *              }
         *          }
         *      }
         *  }
         *  catch (FileNotFoundException e)
         *  {
         *      Console.WriteLine("[ERROR] {0} file not found!", e.FileName);
         *      throw;
         *  }
         #if DEBUG
         *  Console.ReadLine();
         #endif
         * }
         */
        static public ValueTuple <double, double> CompareFile(string path1, string path2)
        {
            ValueTuple <double, double> tuple = (double.NaN, double.NaN);

            try
            {
                using (FileStream file1 = new FileStream(path1, FileMode.Open, FileAccess.Read))
                {
                    var analyze1 = new CodeAnalyzer(file1);
                    using (FileStream file2 = new FileStream(path2, FileMode.Open, FileAccess.Read))
                    {
                        var analyze2 = new CodeAnalyzer(file2);
                        CCore.Log("Compare {0} to {1}", path1, path2);
                        tuple = analyze1.GetSimilarityTo(analyze2);
                        CCore.Log("Similarity: {0}", tuple);
                        if (tuple.Item1 >= suspectedSim || tuple.Item2 >= suspectedSim)
                        {
                            Warning();
                        }
                        CCore.Log();
                    }
                }
            }
            catch (FileNotFoundException e)
            {
                CCore.Log("[ERROR] {0} file not found!", e.FileName);
                throw;
            }

            return(tuple);
        }