Beispiel #1
0
        public ValueTuple <double, double> GetSimilarityTo(CodeAnalyzer analyzer)
        {
            int lineToCompareCount = analyzer.lines.Count;

            bool[]      isSet           = new bool[lineToCompareCount];
            int[]       highestSimIdxes = new int[lineToCompareCount];
            float[]     highestSims     = new float[lineToCompareCount];
            Queue <int> queue           = new Queue <int>();

            bool reEstimate = false;

            Action <int> EstimateSimilarity = idx =>
            {
                var str1 = lines[idx];
                CCore.Log("EstimateSimilarity: {0}", str1);

                float highestSim = 0.0f;
                int   highestIdx = -1;
                // get most similar line
                for (int j = 0; j < analyzer.lines.Count; j++)
                {
                    // skip if already find the same line
                    if (highestSims[j] == 1.0f)
                    {
                        continue;
                    }

                    var str2 = analyzer.lines[j];

                    (int min, int max)pair = (Math.Min(str1.Length, str2.Length), Math.Max(str1.Length, str2.Length));
                    if (reEstimate)
                    {
                        // skip estimate if highestSim absolutely < highestSims[j]
                        if (pair.min / (float)pair.max < highestSims[j])
                        {
                            continue;
                        }
                    }
                    else
                    {
                        // skip estimate if longer string length > 2 * shorter && already has >50% sim string
                        if (pair.max >> 1 >= pair.min && highestSims[j] >= 0.5f)
                        {
                            continue;
                        }
                    }

                    float strSim = Levenshtein(str1, str2);
                    if (highestSim < strSim && strSim > highestSims[j])
                    {
                        highestSim = strSim;
                        highestIdx = j;
                    }

                    // find same line, quit loop
                    if (highestSim == 1.0f)
                    {
                        break;
                    }
                }
Beispiel #2
0
        string RemoveRedundancy(string str)
        {
            string tmp = str;

            Action <string> ignore = s =>
            {
                while (true)
                {
                    var redundancyIdx = tmp.IndexOf(s + s);
                    if (redundancyIdx >= 0)
                    {
                        tmp = tmp.Remove(redundancyIdx, s.Length);
                    }
                    else
                    {
                        break;
                    }
                }
            };

            string[] ignoreList = { " ", ";", "\"\"", "\t" };
            foreach (var s in ignoreList)
            {
                ignore(s);
            }

            List <string> split = tmp.Split(';').ToList();

            if (split == null)
            {
                return(tmp);
            }

            for (int i = split.Count - 1; i >= 0; i--)
            {
                int sum = 0;
                foreach (var s in ignoreList)
                {
                    sum += split[i].Count(c => s.Contains(c));
                }
                if (sum != split[i].Length)
                {
                    split.RemoveAt(i);
                }
            }

            for (int i = 0; i < split.Count; i++)
            {
                tmp = tmp.Remove(tmp.IndexOf(split[i]), split[i].Length);
            }

            ignore(";");

            if (str != tmp)
            {
                CCore.Log("RemoveRedundancy: {0} -> {1}", str, tmp);
            }

            return(tmp);
        }
Beispiel #3
0
        static bool Parse(string arg)
        {
            var lower = arg.ToLower();

            if (lower.Contains(suspectStr))
            {
                CCore.Log("Parse: {0}", arg);
                suspectedSim = double.Parse(lower.Substring(lower.IndexOf(suspectStr) + suspectStr.Length));
                return(true);
            }
            switch (lower)
            {
            case "-ignoreredundancy":
                CCore.Log("Parse: {0}", arg);
                ignoreRedundancy = true;
                return(true);

            case "-ignorecommend":
                CCore.Log("Parse: {0}", arg);
                ignoreCommend = true;
                return(true);

            default:
                return(false);
            }
        }
Beispiel #4
0
        /*
         * static void Main(string[] args)
         * {
         *
         #if DEBUG
         *  Console.ReadKey();
         #endif
         *  Console.WriteLine();
         *
         *  List<string> fileName = new List<string>();
         *
         *  for (int i = 0; i < args.Length; i++)
         *  {
         *      if (Parse(args[i]))
         *      {
         *          continue;
         *      }
         *      else
         *      {
         *          fileName.Add(args[i]);
         *      }
         *  }
         *
         *  try
         *  {
         *      for (int i = 0; i < fileName.Count - 1; i++)
         *      {
         *          using (FileStream file1 = new FileStream(fileName[i], FileMode.Open))
         *          {
         *              var analyze1 = new CodeAnalyzer(file1);
         *              for (int j = i + 1; j < fileName.Count; j++)
         *              {
         *                  using (FileStream file2 = new FileStream(fileName[j], FileMode.Open))
         *                  {
         *                      var analyze2 = new CodeAnalyzer(file2);
         *                      Console.WriteLine("Compare {0} to {1}", fileName[i], fileName[j]);
         *                      var sim = analyze1.GetSimilarityTo(analyze2);
         *                      Console.WriteLine("Similarity: {0}", sim);
         *                      if (sim >= suspectedSim)
         *                      {
         *                          Warning();
         *                      }
         *                      Console.WriteLine();
         *                  }
         *              }
         *          }
         *      }
         *  }
         *  catch (FileNotFoundException e)
         *  {
         *      Console.WriteLine("[ERROR] {0} file not found!", e.FileName);
         *      throw;
         *  }
         #if DEBUG
         *  Console.ReadLine();
         #endif
         * }
         */
        static public ValueTuple <double, double> CompareFile(string path1, string path2)
        {
            ValueTuple <double, double> tuple = (double.NaN, double.NaN);

            try
            {
                using (FileStream file1 = new FileStream(path1, FileMode.Open, FileAccess.Read))
                {
                    var analyze1 = new CodeAnalyzer(file1);
                    using (FileStream file2 = new FileStream(path2, FileMode.Open, FileAccess.Read))
                    {
                        var analyze2 = new CodeAnalyzer(file2);
                        CCore.Log("Compare {0} to {1}", path1, path2);
                        tuple = analyze1.GetSimilarityTo(analyze2);
                        CCore.Log("Similarity: {0}", tuple);
                        if (tuple.Item1 >= suspectedSim || tuple.Item2 >= suspectedSim)
                        {
                            Warning();
                        }
                        CCore.Log();
                    }
                }
            }
            catch (FileNotFoundException e)
            {
                CCore.Log("[ERROR] {0} file not found!", e.FileName);
                throw;
            }

            return(tuple);
        }
Beispiel #5
0
 static void Warning()
 {
     CCore.Log("[WARNING] too high similarity!");
     //MessageBox.Show("[WARNING] too high similarity!", "Compare Result");
 }