/// <summary>
        /// Counts how many words and how many times appears within each document, and checks the matching percentage.
        /// Commit Adrián
        /// </summary>
        /// <returns>The matching's results.</returns>
        public override ComparatorMatchingScore Run()
        {
            //Cuenta la pasalbras que aparecen en un documento (iquierda y derecha-).
            Dictionary <string, int[]> counter = new Dictionary <string, int[]>();

            foreach (string word in this.Left.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][0] += Left.WordAppearances[word];
            }

            foreach (string word in this.Right.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][1] += Right.WordAppearances[word];
            }

            //Comptar les aparences de paraules del fitxer d'exemple, per tal d'ignorar les dels fitxers anteriors.
            if (this.Sample != null)
            {
                foreach (string word in this.Sample.WordAppearances.Select(x => x.Key))
                {
                    if (counter.ContainsKey(word))
                    {
                        counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]);
                        counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]);

                        if (counter[word][0] == 0 && counter[word][1] == 0)
                        {
                            counter.Remove(word);
                        }
                    }
                }
            }

            //Defining the results headers
            ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL);

            cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
            cr.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

            //Calculate the matching for each individual word.
            foreach (string word in counter.Select(x => x.Key))
            {
                int   left  = counter[word][0];
                int   right = counter[word][1];
                float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left));

                cr.AddMatch(match);
                cr.DetailsData.Add(new object[] { word, left, right, match });
            }

            return(cr);
        }
        /// <summary>
        /// Comparal'exemple amb el fitxer especificat i exclou els paràgrafs que produeixen una coincidència falsa positiva entre la mostra i el document.
        /// </summary>
        /// <param name="doc">El document que es compararà amb la mostra.</param>
        private void ExcludeSampleMatches(Document doc)
        {
            if (this.Sample != null)
            {
                //In order to improve the performance, all the sample paragraphs will be excluded first from both documents (exact match only).
                foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key))
                {
                    doc.Paragraphs.Remove(paragraph);
                }

                int   leftLength, rightLength = 0;
                float totalMatch, lengthMatch, wordMath = 0f;
                ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc));

                for (int i = 0; i < sampleScore.DetailsData.Count; i++)
                {
                    leftLength  = (int)sampleScore.DetailsData[i][2];
                    rightLength = (int)sampleScore.DetailsData[i][3];
                    lengthMatch = (float)sampleScore.DetailsData[i][4];
                    wordMath    = (float)sampleScore.DetailsData[i][5];
                    totalMatch  = sampleScore.DetailsMatch[i];   //same as (float)sampleScore.DetailsData[i][6];

                    //TODO: allowing to use totalMatch value or the length + word matches (used to compute the total match).
                    //TODO: testing and tweaking necessary, also config loading from a settings file.
                    if (totalMatch >= 0.70f)
                    {
                        doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]);
                    }
                }
            }
        }
        /// <summary>
        /// Cuenta cuántas palabras y cuántas veces aparecen en cada documento y comprueba el porcentaje de coincidencia.
        /// </summary>
        /// <returns>Los resultados de la coincidencia.</returns>
        public override ComparatorMatchingScore Run()
        {
            //Counting the words appearences for each document (left and right).
            Dictionary <string, int[]> counter = new Dictionary <string, int[]>();

            foreach (string word in this.Left.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][0] += Left.WordAppearances[word];
            }

            foreach (string word in this.Right.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][1] += Right.WordAppearances[word];
            }

            //Counting sample file word appearences, in order to ignore those from the previous files.
            if (this.Sample != null)
            {
                foreach (string word in this.Sample.WordAppearances.Select(x => x.Key))
                {
                    if (counter.ContainsKey(word))
                    {
                        counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]);
                        counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]);

                        if (counter[word][0] == 0 && counter[word][1] == 0)
                        {
                            counter.Remove(word);
                        }
                    }
                }
            }

            //Defining the results headers
            ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL);

            cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
            cr.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

            //Calculate the matching for each individual word.
            foreach (string word in counter.Select(x => x.Key))
            {
                int   left  = counter[word][0];
                int   right = counter[word][1];
                float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left));

                cr.AddMatch(match);
                cr.DetailsData.Add(new object[] { word, left, right, match });
            }

            return(cr);
        }
Beispiel #4
0
        /// <summary>
        /// Compta quantes paraules i quantes vegades apareixen a cada document i comprova el percentatge de coincidència.
        /// </summary>
        /// <returns>The matching's results.</returns>
        public override ComparatorMatchingScore Run()
        {
            // Comptant les paraules que apareixen per a cada document (esquerra i dreta).
            Dictionary <string, int[]> counter = new Dictionary <string, int[]>();

            foreach (string word in this.Left.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][0] += Left.WordAppearances[word];
            }

            foreach (string word in this.Right.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][1] += Right.WordAppearances[word];
            }

            //Comptant les aparicions de paraules de fitxers de mostra, per tal d’ignorar les dels fitxers anteriors.
            if (this.Sample != null)
            {
                foreach (string word in this.Sample.WordAppearances.Select(x => x.Key))
                {
                    if (counter.ContainsKey(word))
                    {
                        counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]);
                        counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]);

                        if (counter[word][0] == 0 && counter[word][1] == 0)
                        {
                            counter.Remove(word);
                        }
                    }
                }
            }

            //Definició de les capçaleres de resultats
            ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL);

            cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
            cr.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

            //Calculeu la coincidència de cada paraula.
            foreach (string word in counter.Select(x => x.Key))
            {
                int   left  = counter[word][0];
                int   right = counter[word][1];
                float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left));

                cr.AddMatch(match);
                cr.DetailsData.Add(new object[] { word, left, right, match });
            }

            return(cr);
        }
Beispiel #5
0
        /// <summary>
        /// Compara l'exemple amb el fitxer especificat i exclou els paràgrafs que produeixen una coincidència falsa positiva entre la mostra i el document.
        /// </summary>
        /// <param name="doc">El document que es compararà amb la mostra.</param>
        private void ExcludeSampleMatches(Document doc)
        {
            if (this.Sample != null)
            {
                //Per tal de millorar el rendiment, tots els paràgrafs d'exemple s'exclouran en primer lloc dels dos documents (només coincidència exacta).
                foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key))
                {
                    doc.Paragraphs.Remove(paragraph);
                }

                int   leftLength, rightLength = 0;
                float totalMatch, lengthMatch, wordMath = 0f;
                ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc));

                for (int i = 0; i < sampleScore.DetailsData.Count; i++)
                {
                    leftLength  = (int)sampleScore.DetailsData[i][2];
                    rightLength = (int)sampleScore.DetailsData[i][3];
                    lengthMatch = (float)sampleScore.DetailsData[i][4];
                    wordMath    = (float)sampleScore.DetailsData[i][5];
                    totalMatch  = sampleScore.DetailsMatch[i];   //same as (float)sampleScore.DetailsData[i][6];

                    //TODO: permetent utilitzar el valor de total de missatges o la longitud + coincidències de paraules (que es fan servir per calcular la coincidència total).
                    //TODO: proves i ajustos necessaris, també la càrrega de configuració des d'un fitxer de configuració
                    if (totalMatch >= 0.70f)
                    {
                        doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]);
                    }
                }
            }
        }
Beispiel #6
0
        /// <summary>
        /// Cuenta cuántas palabras y cuántas veces aparecen en cada documento y comprueba el porcentaje de coincidencia.
        /// </summary>
        /// <returns>Los resultados de la coincidencia.</returns>
        public override ComparatorMatchingScore Run()
        {
            //Contando las apariciones de palabras para cada documento (izquierda y derecha).
            Dictionary <string, int[]> counter = new Dictionary <string, int[]>();

            foreach (string word in this.Left.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][0] += Left.WordAppearances[word];
            }

            foreach (string word in this.Right.WordAppearances.Select(x => x.Key))
            {
                if (!counter.ContainsKey(word))
                {
                    counter.Add(word, new int[] { 0, 0 });
                }
                counter[word][1] += Right.WordAppearances[word];
            }

            //Contar las apariciones de las palabras del archivo de muestra, para ignorar las de los archivos anteriores.
            if (this.Sample != null)
            {
                foreach (string word in this.Sample.WordAppearances.Select(x => x.Key))
                {
                    if (counter.ContainsKey(word))
                    {
                        counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]);
                        counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]);

                        if (counter[word][0] == 0 && counter[word][1] == 0)
                        {
                            counter.Remove(word);
                        }
                    }
                }
            }

            //Definición de los encabezados de resultados
            ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL);

            cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
            cr.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

            //Calcule la coincidencia de cada palabra individual.
            foreach (string word in counter.Select(x => x.Key))
            {
                int   left  = counter[word][0];
                int   right = counter[word][1];
                float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left));

                cr.AddMatch(match);
                cr.DetailsData.Add(new object[] { word, left, right, match });
            }

            return(cr);
        }
        /// <summary>
        /// Compares the sample with the given file and exclude the paragraphs that produces a false positive match between the sample an the document.
        /// </summary>
        /// <param name="doc">The document that will be compared with the sample.</param>
        private void ExcludeSampleMatches(Document doc)
        {
            if (this.Sample != null)
            {
                //In order to improve the performance, all the sample paragraphs will be excluded first from both documents (exact match only).
                //Per tal de millorar el rendiment, tots els paràgrafs d'exemple s'exclouen en primer lloc dels dos documents (només coincidència exacta)
                foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key))
                {
                    doc.Paragraphs.Remove(paragraph);
                }

                int   leftLength, rightLength = 0;
                float totalMatch, lengthMatch, wordMath = 0f;
                ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc));

                for (int i = 0; i < sampleScore.DetailsData.Count; i++)
                {
                    leftLength  = (int)sampleScore.DetailsData[i][2];
                    rightLength = (int)sampleScore.DetailsData[i][3];
                    lengthMatch = (float)sampleScore.DetailsData[i][4];
                    wordMath    = (float)sampleScore.DetailsData[i][5];
                    totalMatch  = sampleScore.DetailsMatch[i];   //same as (float)sampleScore.DetailsData[i][6]; el mateix que (float)sampleScore.DetailsData[i][6]

                    //TODO: allowing to use totalMatch value or the length + word matches (used to compute the total match).
                    //TODO: permet utilitzar el valor de total de missatges o la longitud + coincidències de paraules (que es fan servir per calcular la coincidència total).
                    //TODO: testing and tweaking necessary, also config loading from a settings file.
                    //TODO: és necessari provar i ajustar, també es carrega la configuració des d'un fitxer de configuració.
                    if (totalMatch >= 0.70f)
                    {
                        doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]);
                    }
                }
            }
        }
        /// <summary>
        /// Compares the sample with the given file and exclude the paragraphs that produces a false positive match between the sample an the document.
        /// </summary>
        /// <param name="doc">The document that will be compared with the sample.</param>
        private void ExcludeSampleMatches(Document doc)
        {
            if (this.Sample != null)
            {
                //Con el objetivo de mejorar el rendimiento, todos los paragrafos de muestra seran excluidos de primeras de ambos documentos (solo coincidencia)
                foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key))
                {
                    doc.Paragraphs.Remove(paragraph);
                }

                int   leftLength, rightLength = 0;
                float totalMatch, lengthMatch, wordMath = 0f;
                ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc));

                for (int i = 0; i < sampleScore.DetailsData.Count; i++)
                {
                    leftLength  = (int)sampleScore.DetailsData[i][2];
                    rightLength = (int)sampleScore.DetailsData[i][3];
                    lengthMatch = (float)sampleScore.DetailsData[i][4];
                    wordMath    = (float)sampleScore.DetailsData[i][5];
                    totalMatch  = sampleScore.DetailsMatch[i];   //Mismo funcionamiento que (float).sampleScore.DetailsData[i][6]

                    //TODO: permitiendo usar el valor de totlMatch o la coincidiencia de longitud+palabra (usada para computar el total para coincidencias)
                    //TODO: probando y retocando lo necesario, tambien configurada la carga desde un archivo de opciones.
                    if (totalMatch >= 0.70f)
                    {
                        doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]);
                    }
                }
            }
        }
        /// <summary>
        /// Uses the settings values for comparing a set of files between each other.
        /// </summary>
        public void CompareFiles()
        {
            //Initial Checks
            if (!Directory.Exists(this.Settings.Folder))
            {
                throw new Exceptions.FolderNotFoundException();
            }

            //Initial vars. including the set of files.
            Dictionary <string, ComparatorMatchingScore> results = new Dictionary <string, ComparatorMatchingScore>();
            List <string> files           = Directory.GetFiles(this.Settings.Folder, string.Format("*.{0}", this.Settings.Extension), (this.Settings.Recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly)).Where(x => !x.Equals(this.Settings.Sample)).ToList();
            List <Type>   comparatorTypes = GetComparatorTypes().ToList();

            _total    = files.Count() * files.Count() * comparatorTypes.Count;
            _computed = 0;

            //Loops over each pair of files (the files must be compared between each other in a relation "1 to many").
            for (int i = 0; i < files.Count(); i++)
            {
                string leftFilePath = files.ElementAt(i);

                for (int j = 0; j < files.Count(); j++)
                {
                    string rightFilePath = files.ElementAt(j);

                    //Instantiate and run every Comparator avoiding already computed ones and comparing a file with itself
                    if (rightFilePath != leftFilePath)
                    {
                        foreach (Type t in comparatorTypes)
                        {
                            ComparatorMatchingScore cms = null;
                            string key = GetComparatorKey(rightFilePath, leftFilePath, t);

                            if (results.ContainsKey(key))
                            {
                                //The existing results will be copied swapping the left and right files and reusing the already computed data.
                                ComparatorMatchingScore old = results[key];
                                cms = old.Copy(old.RightFileName, old.LeftFileName);
                            }
                            else
                            {
                                //New comparissons for left and right files must be performed using the current comparer.
                                var        comp   = Activator.CreateInstance(t, leftFilePath, rightFilePath, this.Settings);
                                MethodInfo method = comp.GetType().GetMethod("Run");
                                cms = (ComparatorMatchingScore)method.Invoke(comp, null);
                            }

                            _computed++;
                            results.Add(GetComparatorKey(leftFilePath, rightFilePath, t), cms);
                        }
                    }
                }
            }

            _computed            = _total;
            this.MatchingResults = results.Values.ToList();
        }
Beispiel #10
0
        /// <summary>
        /// Uses the settings values for comparing a set of files between each other.
        /// </summary>
        public void CompareFiles()
        {
            //Initial Checks
            if (!Directory.Exists(Settings.Instance.Get(Setting.GLOBAL_FOLDER)))
            {
                throw new FolderNotFoundException();
            }

            //Initial vars. including the set of files.
            string leftFilePath              = null;
            string rightFilePath             = null;
            List <FileMatchingScore> results = new List <FileMatchingScore>();
            List <string>            files   = Directory.GetFiles(Settings.Instance.Get(Setting.GLOBAL_FOLDER), string.Format("*.{0}", Settings.Instance.Get(Setting.GLOBAL_EXTENSION)), (Settings.Instance.Get(Setting.GLOBAL_RECURSIVE) == "true" ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly)).Where(x => !x.Equals(Settings.Instance.Get(Setting.GLOBAL_SAMPLE))).ToList();
            List <Type> comparatorTypes      = GetComparatorTypes().ToList();

            //The total combinations to calculate are the number of combinations without repetition for 2 elements over a set of N = (n over 2) = (n! / 2! (n-2)!)
            //The that total of combination, there will be performed a check for every comparator.
            _total   = (Factorial(files.Count()) / 2 * Factorial(files.Count() - 2)) * comparatorTypes.Count;
            _current = 0;

            //Loops over each pair of files (the files must be compared between each other in a relation "1 to many").
            for (int i = 0; i < files.Count(); i++)
            {
                leftFilePath = files.ElementAt(i);

                for (int j = i + 1; j < files.Count(); j++)
                {
                    rightFilePath = files.ElementAt(j);

                    //Create the score for the given file pair
                    FileMatchingScore fpr = new FileMatchingScore(Path.GetFullPath(leftFilePath), Path.GetFullPath(rightFilePath));

                    //Instantiate and run every Comparator
                    foreach (Type t in comparatorTypes)
                    {
                        var        comp   = Activator.CreateInstance(t, leftFilePath, rightFilePath, Settings.Instance.Get(Setting.GLOBAL_SAMPLE));
                        MethodInfo method = comp.GetType().GetMethod("Run");

                        //Once the object is instantiated, the Run method is invoked.
                        ComparatorMatchingScore cms = (ComparatorMatchingScore)method.Invoke(comp, null);
                        fpr.ComparatorResults.Add(cms);
                        _current++;
                    }

                    results.Add(fpr);
                }
            }

            this.MatchingResults = results;
        }
Beispiel #11
0
        private void ExcludeSamplePartialMatches(Document doc, float threshold)
        {
            if (this.Sample == null)
            {
                return;
            }

            ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc));

            for (int i = 0; i < sampleScore.DetailsData.Count; i++)
            {
                if (sampleScore.DetailsMatch[i] >= threshold)
                {
                    doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]);
                }
            }
        }
Beispiel #12
0
        private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter)
        {
            //Defining the results headers
            ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Paragraph Word Counter", DisplayLevel.DETAILED);

            cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Match" };
            cr.DetailsFormat  = new string[] { "{0:L50}", "{0:L50}", "{0:P2}" };

            //Calculate the matching for each individual word within each paragraph.
            foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key))
            {
                Dictionary <string, int[]> wordCounter = paragraphCounter[paragraphs];

                //Counting for each word inside an especific paragraph
                cr.Child = new DetailsMatchingScore();
                cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
                cr.Child.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

                foreach (string word in wordCounter.Select(x => x.Key))
                {
                    int countLeft  = wordCounter[word][0];
                    int countRight = wordCounter[word][1];

                    //Mathing with word appearences
                    float match = (countLeft == 0 || countRight == 0 ? 0 :(countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft));

                    //Adding the details for each word
                    cr.Child.AddMatch(match);
                    cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, match });
                }

                //Adding the details for each paragraph
                cr.AddMatch(cr.Child.Matching);
                cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], cr.Child.Matching });
            }

            return(cr);
        }
        private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter)
        {
            // Definició de les capçaleres de resultats
            ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Paragraph Word Counter", DisplayLevel.DETAILED);

            cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Match" };
            cr.DetailsFormat  = new string[] { "{0:L50}", "{0:L50}", "{0:P2}" };

            // Calculeu la coincidència de cada paraula dins de cada paràgraf.
            foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key))
            {
                Dictionary <string, int[]> wordCounter = paragraphCounter[paragraphs];

                // Comptar per a cada paraula dins d’un paràgraf específic
                cr.Child = new DetailsMatchingScore();
                cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
                cr.Child.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

                foreach (string word in wordCounter.Select(x => x.Key))
                {
                    int countLeft  = wordCounter[word][0];
                    int countRight = wordCounter[word][1];

                    // Trencar amb les aparences de paraules
                    float match = (countLeft == 0 || countRight == 0 ? 0 :(countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft));

                    // Afegir els detalls de cada paraula
                    cr.Child.AddMatch(match);
                    cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, match });
                }

                // Afegir els detalls de cada paràgraf
                cr.AddMatch(cr.Child.Matching);
                cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], cr.Child.Matching });
            }

            return(cr);
        }
        private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter)
        {
            //Definiendo los resultados de la cabecera
            ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Paragraph Word Counter", DisplayLevel.DETAILED);

            cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Match" };
            cr.DetailsFormat  = new string[] { "{0:L50}", "{0:L50}", "{0:P2}" };

            //Calcular las igualdades para cada palabra individual de cada paragrafo
            foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key))
            {
                Dictionary <string, int[]> wordCounter = paragraphCounter[paragraphs];

                //Contando cada palabra dentro de un parágrafo específico
                cr.Child = new DetailsMatchingScore();
                cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
                cr.Child.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

                foreach (string word in wordCounter.Select(x => x.Key))
                {
                    int countLeft  = wordCounter[word][0];
                    int countRight = wordCounter[word][1];

                    //Coincidencia con las apariencias de las palabras
                    float match = (countLeft == 0 || countRight == 0 ? 0 :(countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft));

                    //Añadiendo los detalles para cada palabra
                    cr.Child.AddMatch(match);
                    cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, match });
                }

                //Añadiendo los detalles para cada párrafo
                cr.AddMatch(cr.Child.Matching);
                cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], cr.Child.Matching });
            }

            return(cr);
        }
        private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter)
        {
            //Defining the results headers
            ComparatorMatchingScore cr = new ComparatorMatchingScore("Paragraph Word Counter");

            cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Left legth", "Right length", "Length match", "Word match", "Total match" };
            cr.DetailsFormat  = new string[] { "{0:L50}", "{0:L50}", "{0}", "{0}", "{0:P2}", "{0:P2}", "{0:P2}" };

            //Calculate the matching for each individual word within each paragraph.
            float match, matchWord, matchLength = 0;
            int   leftLengt, rightLength, countLeft, countRight = 0;
            Dictionary <string, int[]> wordCounter = null;

            foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key))
            {
                wordCounter = paragraphCounter[paragraphs];

                //Matching with paragraph length
                leftLengt   = wordCounter.Values.Select(x => x[0]).Where(x => x > 0).Count();
                rightLength = wordCounter.Values.Select(x => x[1]).Where(x => x > 0).Count();

                if (leftLengt == 0 || rightLength == 0)
                {
                    matchLength = 0;
                }
                else
                {
                    matchLength = (leftLengt < rightLength ? (float)leftLengt / (float)rightLength : (float)rightLength / (float)leftLengt);
                }

                //Counting for each word inside an especific paragraph
                cr.Child = new DetailsMatchingScore();
                cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
                cr.Child.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

                foreach (string word in wordCounter.Select(x => x.Key))
                {
                    countLeft  = wordCounter[word][0];
                    countRight = wordCounter[word][1];

                    //Mathing with word appearences
                    if (countLeft == 0 || countRight == 0)
                    {
                        matchWord = 0;
                    }
                    else
                    {
                        matchWord = (countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft);
                    }

                    //Adding the details for each word
                    cr.Child.AddMatch(matchWord);
                    cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, matchWord });
                }

                //Adding the details for each paragraph, the total match is: 75% for words - 25% for length (must be tested in order to tweak) and add the info to the detils.
                match = (cr.Child.Matching * 0.75f + matchLength * 0.25f);
                cr.AddMatch(match);
                cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], leftLengt, rightLength, matchLength, cr.Child.Matching, match });
            }

            return(cr);
        }
Beispiel #16
0
        private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter)
        {
            //Definició dels encapçalaments de resultats
            ComparatorMatchingScore cr = new ComparatorMatchingScore("Paragraph Word Counter");

            cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Left legth", "Right length", "Length match", "Word match", "Total match" };
            cr.DetailsFormat  = new string[] { "{0:L50}", "{0:L50}", "{0}", "{0}", "{0:P2}", "{0:P2}", "{0:P2}" };

            //Calculem la concordança per a cada paraula individual dins de cada paràgraf
            float match, matchWord, matchLength = 0;
            int   leftLengt, rightLength, countLeft, countRight = 0;
            Dictionary <string, int[]> wordCounter = null;

            foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key))
            {
                wordCounter = paragraphCounter[paragraphs];

                //Coincidència amb la longitud del paràgraf
                leftLengt   = wordCounter.Values.Select(x => x[0]).Where(x => x > 0).Count();
                rightLength = wordCounter.Values.Select(x => x[1]).Where(x => x > 0).Count();

                if (leftLengt == 0 || rightLength == 0)
                {
                    matchLength = 0;
                }
                else
                {
                    matchLength = (leftLengt < rightLength ? (float)leftLengt / (float)rightLength : (float)rightLength / (float)leftLengt);
                }

                //Comptant per a cada paraula dins d'un paràgraf específic
                cr.Child = new DetailsMatchingScore();
                cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" };
                cr.Child.DetailsFormat  = new string[] { "{0}", "{0}", "{0}", "{0:P2}" };

                foreach (string word in wordCounter.Select(x => x.Key))
                {
                    countLeft  = wordCounter[word][0];
                    countRight = wordCounter[word][1];

                    //Matemàtica amb aplicacions de paraules
                    if (countLeft == 0 || countRight == 0)
                    {
                        matchWord = 0;
                    }
                    else
                    {
                        matchWord = (countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft);
                    }

                    //Afegint els detalls de cada paraula
                    cr.Child.AddMatch(matchWord);
                    cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, matchWord });
                }

                //Si s'afegeixen els detalls de cada paràgraf, la coincidència total és: 75% per a les paraules - 25% per a la durada (s'ha de provar per tal de modificar) i afegir la informació als detalls.
                match = (cr.Child.Matching * 0.75f + matchLength * 0.25f);
                cr.AddMatch(match);
                cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], leftLengt, rightLength, matchLength, cr.Child.Matching, match });
            }

            return(cr);
        }