Esempio n. 1
0
        /// <summary>
        /// Calculates the total similarity using the similarities of files from Winnowing and file property comparison algorithm.
        /// </summary>
        /// <param name="winnowSim">The similarity of two files using the Winnow algorithm.</param>
        /// <param name="propSim">The similarity of two files using the file properties comparison algorithm.</param>
        /// <param name="task">The task which contains the two files being compared.</param>
        /// <returns>The total similarity of two files.</returns>
        private double CalculateSimilarity(double winnowSim, double propSim, Model.Models.Task task)
        {
            double algParamsSum = task.Pproperties + task.Pwinnowing;
            double propCoeff    = (double)task.Pproperties / algParamsSum;
            double winnowCoeff  = (double)task.Pwinnowing / algParamsSum;

            double result = propCoeff * propSim + winnowCoeff * winnowSim;

            return(result);
        }
Esempio n. 2
0
        /// <summary>
        /// Compares files of a given task.
        /// </summary>
        /// <param name="task">A task whose files need to be compared.</param>
        /// <returns>A collection of comparison results.</returns>
        public List <ComparisonResult> CompareFiles(Model.Models.Task task)
        {
            var files = _fileEntryController.GetFilesByTask(task.Id);
            List <ComparisonResult> results = new List <ComparisonResult>();
            var previousResults             = _comparisonResultController.GetComparisonResultsByTask(task.Id);

            if (!FileComparisonNeeded(task.Id))
            {
                return(previousResults.OrderByDescending(r => r.Similarity).ToList());
            }

            for (int i = 0; i < files.Count; i++)
            {
                for (int j = i + 1; j < files.Count; j++)
                {
                    var foundPrevResult = previousResults.Where(r => (r.File1Id == files[i].Id && r.File2Id == files[j].Id) || (r.File1Id == files[j].Id && r.File2Id == files[i].Id)).ToList();
                    if (foundPrevResult.Count != 0)
                    {
                        foreach (var res in foundPrevResult)
                        {
                            results.Add(res);
                        }
                        continue;
                    }
                    if (files[i].FileAuthor.Equals(files[j].FileAuthor))
                    {
                        continue;
                    }
                    if (!files[i].FileExtension.Equals(files[j].FileExtension))
                    {
                        continue;
                    }

                    double hashSimilarity, winnowSimilarity, propertiesSimilarity, similarity;
                    DetermineAllSimilarities(task, files[i], files[j], out hashSimilarity, out winnowSimilarity, out propertiesSimilarity, out similarity);

                    ComparisonResult comparisonResult = new ComparisonResult()
                    {
                        Jmbag1               = files[i].FileAuthor,
                        Jmbag2               = files[j].FileAuthor,
                        TaskId               = task.Id,
                        File1Id              = files[i].Id,
                        File2Id              = files[j].Id,
                        Similarity           = similarity * 100,
                        PropertiesSimilarity = propertiesSimilarity * 100,
                        CheckSumSimilarity   = hashSimilarity * 100,
                        WinnowingSimilarity  = winnowSimilarity * 100
                    };
                    _comparisonResultController.AddComparisonResult(comparisonResult);
                    results.Add(comparisonResult);
                }
            }
            return(results.OrderByDescending(r => r.Similarity).ToList());
        }
Esempio n. 3
0
        /// <summary>
        /// Compares two files and determines the values of similarities for each algorithm used to compare files.
        /// </summary>
        /// <param name="task">The task which contains the two files being compared.</param>
        /// <param name="file1">The first file that is being compared.</param>
        /// <param name="file2">The second file that is being compared.</param>
        /// <param name="hashSim">Calculated similarity of two files using the CheckSum algorithm.</param>
        /// <param name="winnowSim">Calculated similarity of two files using the Winnow algorithm.</param>
        /// <param name="propertiesSim">Calculated similarity of two files using the file properties comparison algorithm.</param>
        /// <param name="totalSim">Value of the total similarity considering the calculated similarities
        /// from all of the algorithms being used.</param>
        private void DetermineAllSimilarities(Model.Models.Task task, FileEntry file1, FileEntry file2,
                                              out double hashSim, out double winnowSim, out double propertiesSim, out double totalSim)
        {
            hashSim = CompareHashValues(file1.FileCheckSumAfterPreprocessing, file2.FileCheckSumAfterPreprocessing);

            if (hashSim == 0)
            {
                winnowSim     = CompareFilesWithWinnow(file1.FileContentAfterLexAnalysis, file2.FileContentAfterLexAnalysis, task);
                propertiesSim = CompareFileProperties(file1, file2);
                totalSim      = CalculateSimilarity(winnowSim, propertiesSim, task);
            }
            else
            {
                winnowSim     = 0;
                propertiesSim = 0;
                totalSim      = 1;
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Removes all the unnecessary characters in files with the correct extensions.
        /// </summary>
        /// <param name="correctFiles">A collection containing file names with the correct extensions.</param>
        /// <param name="clearedFilesPath">The path where the files with the correct extensions are located.</param>
        /// <param name="taskFiles">A collection containing files of a given task.</param>
        /// <param name="task">The task for which the files were loaded.</param>
        /// <param name="author">The author of the correct files that are being processed.</param>
        public void ProcessCorrectFiles(List <string> correctFiles, string clearedFilesPath, List <FileEntry> taskFiles, Model.Models.Task task, string author)
        {
            foreach (var cf in correctFiles)
            {
                string path = clearedFilesPath + Path.GetFileName(cf);
                try
                {
                    File.Copy(cf, path);
                }
                catch (Exception ex)
                {
                    File.Copy(cf, path + "(1)");
                }

                string fileContent             = File.ReadAllText(path);
                string hashBeforePreprocessing = CalculateHash(fileContent);
                long   fileSize      = new System.IO.FileInfo(path).Length;
                int    noOfLines     = File.ReadLines(cf).Count();
                string fileExtension = DetermineFileExtension(Path.GetExtension(Path.GetFileName(cf)));

                int noOfEmptyLines = Regex.Matches(fileContent, "\n\n").Count
                                     + Regex.Matches(fileContent, "\r\r").Count
                                     + Regex.Matches(fileContent, "\r\n\r\n").Count;

                noOfLineComments  = 0;
                noOfBlockComments = 0;
                fileContent       = RemoveComments(path, fileContent);
                fileContent       = RemoveUnnecessaryCharsFromFile(fileContent);

                ILexicalAnalyzer lexAnalyzer           = LexerFactory.GetLexicalAnalyzer(fileExtension);
                string           fileContentAfterLexer = lexAnalyzer.Tokenize(fileContent);
                fileContent           = PrepareInputForAnalysis(fileContent);
                fileContentAfterLexer = PrepareInputForAnalysis(fileContentAfterLexer);
                string hashAfterPreprocessing = CalculateHash(fileContent);

                taskFiles.Add(new FileEntry()
                {
                    FileName   = Path.GetFileName(cf),
                    FileAuthor = author,
                    FileCheckSumBeforePreprocessing = hashBeforePreprocessing,
                    FileCheckSumAfterPreprocessing  = hashAfterPreprocessing,
                    FileSize                    = fileSize,
                    FileNoOfLines               = noOfLines,
                    FileNoOfEmptyLines          = noOfEmptyLines,
                    FileNoOfLineComments        = noOfLineComments,
                    FileNoOfBlockComments       = noOfBlockComments,
                    FileContent                 = fileContent,
                    FileContentAfterLexAnalysis = fileContentAfterLexer,
                    FileExtension               = fileExtension,
                    TaskId = task.Id
                });
            }
        }
Esempio n. 5
0
 /// <summary>
 /// Compares two files using Winnow algorithm.
 /// </summary>
 /// <param name="input1">The content of the first file.</param>
 /// <param name="input2">The content of the second file.</param>
 /// <param name="task">The task which contains the two files being compared.</param>
 /// <returns>The Winnow similarity of two files.</returns>
 private double CompareFilesWithWinnow(string input1, string input2, Model.Models.Task task)
 {
     return(WinnowAlgorithm.Compare(input1, input2, task.KgramLength, task.WindowSize));
 }