public static async Task <PlagiarismInLocalDB> Find(string text) { var simplifiedText = TextManager.SimplifyText(text).Replace("\r\n", " "); var words = TextManager.WordsFromText(simplifiedText).ToArray(); var wordCount = words.Length; var plagiarismResult = new Dictionary <int, HashSet <(int DBDocIndex, int initialDocIndex)> >(); double vodnost = 0; for (int i = 0; i <= words.Length - Shingle.Lenght; i++) { if (TextManager.StopWords.Contains(words[i])) { vodnost++; } var documentIdToWordsPositions = SQLLoader.GetDocuments(Shingle.ListFromWords(words, i)); var plagiarismForShingle = Logic.FindPlagiarism(documentIdToWordsPositions, i); foreach (var kvp in plagiarismForShingle) { if (plagiarismResult.TryGetValue(kvp.Key, out HashSet <(int DBDocIndex, int initialDocIndex)> plagiarizedPositions)) { plagiarizedPositions.UnionWith(kvp.Value); }
public static async Task <PlagiarismInLocalDB> Find(string text) { var simplifiedText = TextManager.SimplifyText(text).Replace("\r\n", " "); var words = TextManager.WordsFromText(simplifiedText).ToArray(); var wordCount = words.Length; var plagiarismResult = new Dictionary <int, HashSet <(int DBDocIndex, int initialDocIndex)> >(); double vodnost = 0; for (int i = 0; i <= words.Length - Shingle.Lenght; i++) { if (TextManager.StopWords.Contains(words[i])) { vodnost++; } var documentIdToWordsPositions = SQLLoader.GetDocuments(Shingle.ListFromWords(words, i)); //var plagiarismForShingle = Logic.FindPlagiarism(documentIdToWordsPositions, i); //foreach (var kvp in plagiarismForShingle) //{ // if (plagiarismResult.TryGetValue(kvp.Key, out HashSet<(int DBDocIndex, int initialDocIndex)> plagiarizedPositions)) // { // plagiarizedPositions.UnionWith(kvp.Value); // } // else // { // plagiarismResult.Add(kvp.Key, new HashSet<(int, int)>(kvp.Value)); // } //} Console.WriteLine(i); } for (int i = words.Length - Shingle.Lenght + 1; i < words.Length; i++) { if (TextManager.StopWords.Contains(words[i])) { vodnost++; } } vodnost /= Convert.ToDouble(words.Length); double toshnotnost = (words.Length - words.Distinct().Count()) / Convert.ToDouble(words.Length); return(new PlagiarismInLocalDB { Words = words, WordCount = wordCount, PlagiarismResult = plagiarismResult, Vodnost = vodnost, Toshnotnost = toshnotnost, Text = text }); }
public static async Task <Plagiarism <int> > FindAsync(string[] initialWords, Dictionary <int, string> initialDocIndexToSimplifiedWord, int[] initialDocIndexes, string[] simplifiedWords) { string debugLog = ""; var plagiarismDB = new Plagiarism <int>(); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); var shinglesCount = initialDocIndexes.Length - Shingle.Lenght; var tasks = new Task <Plagiarism <int> > [shinglesCount + 1]; for (int i = 0; i <= shinglesCount; i++) { int savei = i; tasks[i] = Task <Plagiarism <int> > .Factory.StartNew(() => { List <string> wordsList = new List <string>(); for (int j = 0; j < Shingle.Lenght; j++) { wordsList.Add(initialDocIndexToSimplifiedWord[initialDocIndexes[savei + j]]); } var documentIdToDBDocWordsPositionsForShingle = SQLLoader.GetDocuments(wordsList); var initialDocIndexesForShingle = new List <int>(); for (int j = 0; j < Shingle.Lenght; j++) { initialDocIndexesForShingle.Add(initialDocIndexes[savei + j]); } var plagiarismDBForShingle = Plagiarism <int> .FindPlagiarism(documentIdToDBDocWordsPositionsForShingle, initialDocIndexesForShingle); return(plagiarismDBForShingle); }); } Task.WaitAll(tasks); for (int i = 0; i <= shinglesCount; i++) { var plagiarismDBForShingle = tasks[i].Result; plagiarismDB.Add(plagiarismDBForShingle); } stopwatch.Stop(); debugLog += "DB PLAG TIME " + stopwatch.ElapsedMilliseconds + " "; foreach (var kvp in plagiarismDB.SourceIdToInitialWordsIndexes) { plagiarismDB.SourceIdToInitialDocumentHtml.Add(kvp.Key, TextManager.ComposeHtmlText(initialWords, kvp.Value)); } plagiarismDB.DebugLogs = debugLog; return(plagiarismDB); }