Ejemplo n.º 1
0
        public static async Task <PlagiarismInLocalDB> Find(string text)
        {
            var simplifiedText = TextManager.SimplifyText(text).Replace("\r\n", " ");
            var words          = TextManager.WordsFromText(simplifiedText).ToArray();
            var wordCount      = words.Length;

            var    plagiarismResult = new Dictionary <int, HashSet <(int DBDocIndex, int initialDocIndex)> >();
            double vodnost          = 0;

            for (int i = 0; i <= words.Length - Shingle.Lenght; i++)
            {
                if (TextManager.StopWords.Contains(words[i]))
                {
                    vodnost++;
                }

                var documentIdToWordsPositions = SQLLoader.GetDocuments(Shingle.ListFromWords(words, i));

                var plagiarismForShingle = Logic.FindPlagiarism(documentIdToWordsPositions, i);
                foreach (var kvp in plagiarismForShingle)
                {
                    if (plagiarismResult.TryGetValue(kvp.Key, out HashSet <(int DBDocIndex, int initialDocIndex)> plagiarizedPositions))
                    {
                        plagiarizedPositions.UnionWith(kvp.Value);
                    }
Ejemplo n.º 2
0
        public static async Task <PlagiarismInLocalDB> Find(string text)
        {
            var simplifiedText = TextManager.SimplifyText(text).Replace("\r\n", " ");
            var words          = TextManager.WordsFromText(simplifiedText).ToArray();
            var wordCount      = words.Length;

            var    plagiarismResult = new Dictionary <int, HashSet <(int DBDocIndex, int initialDocIndex)> >();
            double vodnost          = 0;

            for (int i = 0; i <= words.Length - Shingle.Lenght; i++)
            {
                if (TextManager.StopWords.Contains(words[i]))
                {
                    vodnost++;
                }

                var documentIdToWordsPositions = SQLLoader.GetDocuments(Shingle.ListFromWords(words, i));

                //var plagiarismForShingle = Logic.FindPlagiarism(documentIdToWordsPositions, i);
                //foreach (var kvp in plagiarismForShingle)
                //{
                //    if (plagiarismResult.TryGetValue(kvp.Key, out HashSet<(int DBDocIndex, int initialDocIndex)> plagiarizedPositions))
                //    {
                //        plagiarizedPositions.UnionWith(kvp.Value);
                //    }
                //    else
                //    {
                //        plagiarismResult.Add(kvp.Key, new HashSet<(int, int)>(kvp.Value));
                //    }
                //}

                Console.WriteLine(i);
            }

            for (int i = words.Length - Shingle.Lenght + 1; i < words.Length; i++)
            {
                if (TextManager.StopWords.Contains(words[i]))
                {
                    vodnost++;
                }
            }

            vodnost /= Convert.ToDouble(words.Length);
            double toshnotnost = (words.Length - words.Distinct().Count()) / Convert.ToDouble(words.Length);

            return(new PlagiarismInLocalDB
            {
                Words = words,
                WordCount = wordCount,
                PlagiarismResult = plagiarismResult,
                Vodnost = vodnost,
                Toshnotnost = toshnotnost,
                Text = text
            });
        }
Ejemplo n.º 3
0
        public static async Task <Plagiarism <int> > FindAsync(string[] initialWords, Dictionary <int, string> initialDocIndexToSimplifiedWord, int[] initialDocIndexes, string[] simplifiedWords)
        {
            string debugLog     = "";
            var    plagiarismDB = new Plagiarism <int>();

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            var shinglesCount = initialDocIndexes.Length - Shingle.Lenght;
            var tasks         = new Task <Plagiarism <int> > [shinglesCount + 1];

            for (int i = 0; i <= shinglesCount; i++)
            {
                int savei = i;

                tasks[i] = Task <Plagiarism <int> > .Factory.StartNew(() =>
                {
                    List <string> wordsList = new List <string>();
                    for (int j = 0; j < Shingle.Lenght; j++)
                    {
                        wordsList.Add(initialDocIndexToSimplifiedWord[initialDocIndexes[savei + j]]);
                    }

                    var documentIdToDBDocWordsPositionsForShingle = SQLLoader.GetDocuments(wordsList);

                    var initialDocIndexesForShingle = new List <int>();
                    for (int j = 0; j < Shingle.Lenght; j++)
                    {
                        initialDocIndexesForShingle.Add(initialDocIndexes[savei + j]);
                    }

                    var plagiarismDBForShingle = Plagiarism <int> .FindPlagiarism(documentIdToDBDocWordsPositionsForShingle, initialDocIndexesForShingle);
                    return(plagiarismDBForShingle);
                });
            }

            Task.WaitAll(tasks);

            for (int i = 0; i <= shinglesCount; i++)
            {
                var plagiarismDBForShingle = tasks[i].Result;
                plagiarismDB.Add(plagiarismDBForShingle);
            }
            stopwatch.Stop();
            debugLog += "DB PLAG TIME " + stopwatch.ElapsedMilliseconds + " ";

            foreach (var kvp in plagiarismDB.SourceIdToInitialWordsIndexes)
            {
                plagiarismDB.SourceIdToInitialDocumentHtml.Add(kvp.Key, TextManager.ComposeHtmlText(initialWords, kvp.Value));
            }
            plagiarismDB.DebugLogs = debugLog;
            return(plagiarismDB);
        }