public async Task SaveMostSimilarSubmissionAsync(MostSimilarSubmission mostSimilarSubmission) { using (var ts = new TransactionScope(TransactionScopeOption.Required, TimeSpan.FromSeconds(30), TransactionScopeAsyncFlowOption.Enabled)) { db.AddOrUpdate(mostSimilarSubmission, p => p.SubmissionId == mostSimilarSubmission.SubmissionId); await db.SaveChangesAsync().ConfigureAwait(false); ts.Complete(); } }
public async Task SaveMostSimilarSubmissionAsync(MostSimilarSubmission mostSimilarSubmission) { await FuncUtils.TrySeveralTimesAsync(async() => { using (var ts = new TransactionScope(TransactionScopeOption.Required, TimeSpan.FromSeconds(30), TransactionScopeAsyncFlowOption.Enabled)) { db.AddOrUpdate(mostSimilarSubmission, p => p.SubmissionId == mostSimilarSubmission.SubmissionId); await db.SaveChangesAsync().ConfigureAwait(false); ts.Complete(); return(0); } }, 3, () => Task.Delay(30)); }
public async Task TrySaveMostSimilarSubmissionAsync(MostSimilarSubmission mostSimilarSubmission) { try { var executionStrategy = new NpgsqlRetryingExecutionStrategy(db, 3); await executionStrategy.ExecuteAsync(async() => { using (var ts = new TransactionScope(TransactionScopeOption.Required, TimeSpan.FromSeconds(30), TransactionScopeAsyncFlowOption.Enabled)) { db.AddOrUpdate(mostSimilarSubmission, p => p.SubmissionId == mostSimilarSubmission.SubmissionId); await db.SaveChangesAsync().ConfigureAwait(false); ts.Complete(); return(0); } }); } catch (InvalidOperationException ex) { log.Warn(ex); } }
// Работа метода описана в классе PlagiarismDetectorConfiguration public async Task <List <Plagiarism> > GetPlagiarismsAsync(Submission submission, SuspicionLevels suspicionLevels, int submissionInfluenceLimitInMonths) { /* Dictionaries by submission id and snippet type */ var tokensMatchedInThisSubmission = new DefaultDictionary <Tuple <int, SnippetType>, HashSet <int> >(); var tokensMatchedInOtherSubmissions = new DefaultDictionary <Tuple <int, SnippetType>, HashSet <int> >(); var maxSnippetsCountFirstSearch = configuration.AntiPlagiarism.PlagiarismDetector.CountOfColdestSnippetsUsedToFirstSearch; var maxSnippetsCountSecondSearch = configuration.AntiPlagiarism.PlagiarismDetector.CountOfColdestSnippetsUsedToSecondSearch; var maxSubmissionsAfterFirstSearch = configuration.AntiPlagiarism.PlagiarismDetector.MaxSubmissionsAfterFirstSearch; var authorsCountThreshold = configuration.AntiPlagiarism.PlagiarismDetector.SnippetAuthorsCountThreshold; /* We make two queries for finding suspicion submissions: first query is more limited by snippets count (`maxSnippetsCountFirstSearch` from configuration). * For the first query we are looking for all submissions which are similar to our submission and filter only top-`maxSubmissionsAfterFirstSearch` by matched snippets count */ var snippetsOccurrencesFirstSearch = await snippetsRepo.GetSnippetsOccurrencesForSubmissionAsync( submission, maxSnippetsCountFirstSearch, authorsCountMinThreshold : 2, authorsCountMaxThreshold : authorsCountThreshold ).ConfigureAwait(false); var snippetsIdsFirstSearch = new HashSet <int>(snippetsOccurrencesFirstSearch.Select(o => o.SnippetId)); log.Info($"Found following snippets after first search: {string.Join(", ", snippetsIdsFirstSearch)}"); var useSubmissionsFromDate = DateTime.Now.AddMonths(-submissionInfluenceLimitInMonths); var suspicionSubmissionIds = await snippetsRepo.GetSubmissionIdsWithSameSnippets( snippetsIdsFirstSearch, /* Filter only submissions BY THIS client, THIS task, THIS language and NOT BY THIS author */ o => o.Submission.ClientId == submission.ClientId && o.Submission.TaskId == submission.TaskId && o.Submission.Language == submission.Language && o.Submission.AuthorId != submission.AuthorId && o.Submission.AddingTime > useSubmissionsFromDate, maxSubmissionsAfterFirstSearch ); log.Info($"Found following submissions after first search: {string.Join(", ", suspicionSubmissionIds)}"); var snippetsOccurrences = await snippetsRepo.GetSnippetsOccurrencesForSubmissionAsync(submission, maxSnippetsCountSecondSearch, 0, authorsCountThreshold).ConfigureAwait(false); var snippetsIds = new HashSet <int>(snippetsOccurrences.Select(o => o.SnippetId)); var allOtherOccurrences = (await snippetsRepo.GetSnippetsOccurrences( snippetsIds, /* Filter only snippet occurences in submissions BY THIS client, THIS task, THIS language and NOT BY THIS author */ o => o.Submission.ClientId == submission.ClientId && o.Submission.TaskId == submission.TaskId && o.Submission.Language == submission.Language && o.Submission.AuthorId != submission.AuthorId && /* ... and only in submissions filterer by first query */ suspicionSubmissionIds.Contains(o.SubmissionId) )).GroupBy(o => o.SnippetId).ToDictionary(kvp => kvp.Key, kvp => kvp.ToList()); var snippetsStatistics = await snippetsRepo.GetSnippetsStatisticsAsync(submission.ClientId, submission.TaskId, submission.Language, snippetsIds).ConfigureAwait(false); var matchedSnippets = new DefaultDictionary <int, List <MatchedSnippet> >(); var authorsCount = await submissionsRepo.GetAuthorsCountAsync(submission.ClientId, submission.TaskId, submission.Language, submissionInfluenceLimitInMonths).ConfigureAwait(false); foreach (var snippetOccurrence in snippetsOccurrences) { var otherOccurrences = allOtherOccurrences.GetOrDefault(snippetOccurrence.SnippetId, new List <SnippetOccurence>()); var snippet = snippetOccurrence.Snippet; var snippetType = snippet.SnippetType; foreach (var otherOccurence in otherOccurrences) { for (var i = 0; i < snippet.TokensCount; i++) { var tokenIndexInThisSubmission = snippetOccurrence.FirstTokenIndex + i; var tokenIndexInOtherSubmission = otherOccurence.FirstTokenIndex + i; tokensMatchedInThisSubmission[Tuple.Create(otherOccurence.SubmissionId, snippetType)].Add(tokenIndexInThisSubmission); tokensMatchedInOtherSubmissions[Tuple.Create(otherOccurence.SubmissionId, snippetType)].Add(tokenIndexInOtherSubmission); } matchedSnippets[otherOccurence.SubmissionId].Add(new MatchedSnippet { SnippetType = snippetType, TokensCount = snippet.TokensCount, OriginalSubmissionFirstTokenIndex = snippetOccurrence.FirstTokenIndex, PlagiarismSubmissionFirstTokenIndex = otherOccurence.FirstTokenIndex, SnippetFrequency = GetSnippetFrequency(snippetsStatistics[snippet.Id], authorsCount), }); } } var plagiarismSubmissionIds = tokensMatchedInOtherSubmissions.Keys.Select(tuple => tuple.Item1).ToList(); var plagiarismSubmissions = await submissionsRepo.GetSubmissionsByIdsAsync(plagiarismSubmissionIds).ConfigureAwait(false); var plagiarisms = new List <Plagiarism>(); var allSnippetTypes = GetAllSnippetTypes(); var thisSubmissionLength = submission.TokensCount; MostSimilarSubmission mostSimilarSubmission = null; foreach (var plagiarismSubmission in plagiarismSubmissions) { var unionLength = 0; foreach (var snippetType in allSnippetTypes) { var submissionIdWithSnippetType = Tuple.Create(plagiarismSubmission.Id, snippetType); if (!tokensMatchedInThisSubmission.ContainsKey(submissionIdWithSnippetType)) { continue; } unionLength += tokensMatchedInThisSubmission[submissionIdWithSnippetType].Count; unionLength += tokensMatchedInOtherSubmissions[submissionIdWithSnippetType].Count; } var plagiarismSubmissionLength = plagiarismSubmission.TokensCount; var totalLength = thisSubmissionLength + plagiarismSubmissionLength; var weight = totalLength == 0 ? 0 : ((double)unionLength) / totalLength; /* Normalize weight */ weight /= allSnippetTypes.Count; if (mostSimilarSubmission == null || mostSimilarSubmission.Weight < weight) { mostSimilarSubmission = new MostSimilarSubmission { Weight = weight, SubmissionId = submission.Id, SimilarSubmissionId = plagiarismSubmission.Id, Timestamp = DateTime.Now } } ; log.Info($"Link weight between submisions {submission.Id} and {plagiarismSubmission.Id} is {weight}. Union length is {unionLength}."); if (weight < suspicionLevels.FaintSuspicion) { continue; } plagiarisms.Add(BuildPlagiarismInfo(plagiarismSubmission, weight, matchedSnippets[plagiarismSubmission.Id])); } if (mostSimilarSubmission != null) { await mostSimilarSubmissionsRepo.TrySaveMostSimilarSubmissionAsync(mostSimilarSubmission).ConfigureAwait(false); } return(plagiarisms); }