/// <summary> /// Calculates the total similarity using the similarities of files from Winnowing and file property comparison algorithm. /// </summary> /// <param name="winnowSim">The similarity of two files using the Winnow algorithm.</param> /// <param name="propSim">The similarity of two files using the file properties comparison algorithm.</param> /// <param name="task">The task which contains the two files being compared.</param> /// <returns>The total similarity of two files.</returns> private double CalculateSimilarity(double winnowSim, double propSim, Model.Models.Task task) { double algParamsSum = task.Pproperties + task.Pwinnowing; double propCoeff = (double)task.Pproperties / algParamsSum; double winnowCoeff = (double)task.Pwinnowing / algParamsSum; double result = propCoeff * propSim + winnowCoeff * winnowSim; return(result); }
/// <summary> /// Compares files of a given task. /// </summary> /// <param name="task">A task whose files need to be compared.</param> /// <returns>A collection of comparison results.</returns> public List <ComparisonResult> CompareFiles(Model.Models.Task task) { var files = _fileEntryController.GetFilesByTask(task.Id); List <ComparisonResult> results = new List <ComparisonResult>(); var previousResults = _comparisonResultController.GetComparisonResultsByTask(task.Id); if (!FileComparisonNeeded(task.Id)) { return(previousResults.OrderByDescending(r => r.Similarity).ToList()); } for (int i = 0; i < files.Count; i++) { for (int j = i + 1; j < files.Count; j++) { var foundPrevResult = previousResults.Where(r => (r.File1Id == files[i].Id && r.File2Id == files[j].Id) || (r.File1Id == files[j].Id && r.File2Id == files[i].Id)).ToList(); if (foundPrevResult.Count != 0) { foreach (var res in foundPrevResult) { results.Add(res); } continue; } if (files[i].FileAuthor.Equals(files[j].FileAuthor)) { continue; } if (!files[i].FileExtension.Equals(files[j].FileExtension)) { continue; } double hashSimilarity, winnowSimilarity, propertiesSimilarity, similarity; DetermineAllSimilarities(task, files[i], files[j], out hashSimilarity, out winnowSimilarity, out propertiesSimilarity, out similarity); ComparisonResult comparisonResult = new ComparisonResult() { Jmbag1 = files[i].FileAuthor, Jmbag2 = files[j].FileAuthor, TaskId = task.Id, File1Id = files[i].Id, File2Id = files[j].Id, Similarity = similarity * 100, PropertiesSimilarity = propertiesSimilarity * 100, CheckSumSimilarity = hashSimilarity * 100, WinnowingSimilarity = winnowSimilarity * 100 }; _comparisonResultController.AddComparisonResult(comparisonResult); results.Add(comparisonResult); } } return(results.OrderByDescending(r => r.Similarity).ToList()); }
/// <summary> /// Compares two files and determines the values of similarities for each algorithm used to compare files. /// </summary> /// <param name="task">The task which contains the two files being compared.</param> /// <param name="file1">The first file that is being compared.</param> /// <param name="file2">The second file that is being compared.</param> /// <param name="hashSim">Calculated similarity of two files using the CheckSum algorithm.</param> /// <param name="winnowSim">Calculated similarity of two files using the Winnow algorithm.</param> /// <param name="propertiesSim">Calculated similarity of two files using the file properties comparison algorithm.</param> /// <param name="totalSim">Value of the total similarity considering the calculated similarities /// from all of the algorithms being used.</param> private void DetermineAllSimilarities(Model.Models.Task task, FileEntry file1, FileEntry file2, out double hashSim, out double winnowSim, out double propertiesSim, out double totalSim) { hashSim = CompareHashValues(file1.FileCheckSumAfterPreprocessing, file2.FileCheckSumAfterPreprocessing); if (hashSim == 0) { winnowSim = CompareFilesWithWinnow(file1.FileContentAfterLexAnalysis, file2.FileContentAfterLexAnalysis, task); propertiesSim = CompareFileProperties(file1, file2); totalSim = CalculateSimilarity(winnowSim, propertiesSim, task); } else { winnowSim = 0; propertiesSim = 0; totalSim = 1; } }
/// <summary> /// Removes all the unnecessary characters in files with the correct extensions. /// </summary> /// <param name="correctFiles">A collection containing file names with the correct extensions.</param> /// <param name="clearedFilesPath">The path where the files with the correct extensions are located.</param> /// <param name="taskFiles">A collection containing files of a given task.</param> /// <param name="task">The task for which the files were loaded.</param> /// <param name="author">The author of the correct files that are being processed.</param> public void ProcessCorrectFiles(List <string> correctFiles, string clearedFilesPath, List <FileEntry> taskFiles, Model.Models.Task task, string author) { foreach (var cf in correctFiles) { string path = clearedFilesPath + Path.GetFileName(cf); try { File.Copy(cf, path); } catch (Exception ex) { File.Copy(cf, path + "(1)"); } string fileContent = File.ReadAllText(path); string hashBeforePreprocessing = CalculateHash(fileContent); long fileSize = new System.IO.FileInfo(path).Length; int noOfLines = File.ReadLines(cf).Count(); string fileExtension = DetermineFileExtension(Path.GetExtension(Path.GetFileName(cf))); int noOfEmptyLines = Regex.Matches(fileContent, "\n\n").Count + Regex.Matches(fileContent, "\r\r").Count + Regex.Matches(fileContent, "\r\n\r\n").Count; noOfLineComments = 0; noOfBlockComments = 0; fileContent = RemoveComments(path, fileContent); fileContent = RemoveUnnecessaryCharsFromFile(fileContent); ILexicalAnalyzer lexAnalyzer = LexerFactory.GetLexicalAnalyzer(fileExtension); string fileContentAfterLexer = lexAnalyzer.Tokenize(fileContent); fileContent = PrepareInputForAnalysis(fileContent); fileContentAfterLexer = PrepareInputForAnalysis(fileContentAfterLexer); string hashAfterPreprocessing = CalculateHash(fileContent); taskFiles.Add(new FileEntry() { FileName = Path.GetFileName(cf), FileAuthor = author, FileCheckSumBeforePreprocessing = hashBeforePreprocessing, FileCheckSumAfterPreprocessing = hashAfterPreprocessing, FileSize = fileSize, FileNoOfLines = noOfLines, FileNoOfEmptyLines = noOfEmptyLines, FileNoOfLineComments = noOfLineComments, FileNoOfBlockComments = noOfBlockComments, FileContent = fileContent, FileContentAfterLexAnalysis = fileContentAfterLexer, FileExtension = fileExtension, TaskId = task.Id }); } }
/// <summary> /// Compares two files using Winnow algorithm. /// </summary> /// <param name="input1">The content of the first file.</param> /// <param name="input2">The content of the second file.</param> /// <param name="task">The task which contains the two files being compared.</param> /// <returns>The Winnow similarity of two files.</returns> private double CompareFilesWithWinnow(string input1, string input2, Model.Models.Task task) { return(WinnowAlgorithm.Compare(input1, input2, task.KgramLength, task.WindowSize)); }