/// <summary> /// Counts how many words and how many times appears within each document, and checks the matching percentage. /// Commit Adrián /// </summary> /// <returns>The matching's results.</returns> public override ComparatorMatchingScore Run() { //Cuenta la pasalbras que aparecen en un documento (iquierda y derecha-). Dictionary <string, int[]> counter = new Dictionary <string, int[]>(); foreach (string word in this.Left.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][0] += Left.WordAppearances[word]; } foreach (string word in this.Right.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][1] += Right.WordAppearances[word]; } //Comptar les aparences de paraules del fitxer d'exemple, per tal d'ignorar les dels fitxers anteriors. if (this.Sample != null) { foreach (string word in this.Sample.WordAppearances.Select(x => x.Key)) { if (counter.ContainsKey(word)) { counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]); counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]); if (counter[word][0] == 0 && counter[word][1] == 0) { counter.Remove(word); } } } } //Defining the results headers ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL); cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; //Calculate the matching for each individual word. foreach (string word in counter.Select(x => x.Key)) { int left = counter[word][0]; int right = counter[word][1]; float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left)); cr.AddMatch(match); cr.DetailsData.Add(new object[] { word, left, right, match }); } return(cr); }
/// <summary> /// Comparal'exemple amb el fitxer especificat i exclou els paràgrafs que produeixen una coincidència falsa positiva entre la mostra i el document. /// </summary> /// <param name="doc">El document que es compararà amb la mostra.</param> private void ExcludeSampleMatches(Document doc) { if (this.Sample != null) { //In order to improve the performance, all the sample paragraphs will be excluded first from both documents (exact match only). foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key)) { doc.Paragraphs.Remove(paragraph); } int leftLength, rightLength = 0; float totalMatch, lengthMatch, wordMath = 0f; ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc)); for (int i = 0; i < sampleScore.DetailsData.Count; i++) { leftLength = (int)sampleScore.DetailsData[i][2]; rightLength = (int)sampleScore.DetailsData[i][3]; lengthMatch = (float)sampleScore.DetailsData[i][4]; wordMath = (float)sampleScore.DetailsData[i][5]; totalMatch = sampleScore.DetailsMatch[i]; //same as (float)sampleScore.DetailsData[i][6]; //TODO: allowing to use totalMatch value or the length + word matches (used to compute the total match). //TODO: testing and tweaking necessary, also config loading from a settings file. if (totalMatch >= 0.70f) { doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]); } } } }
/// <summary> /// Cuenta cuántas palabras y cuántas veces aparecen en cada documento y comprueba el porcentaje de coincidencia. /// </summary> /// <returns>Los resultados de la coincidencia.</returns> public override ComparatorMatchingScore Run() { //Counting the words appearences for each document (left and right). Dictionary <string, int[]> counter = new Dictionary <string, int[]>(); foreach (string word in this.Left.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][0] += Left.WordAppearances[word]; } foreach (string word in this.Right.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][1] += Right.WordAppearances[word]; } //Counting sample file word appearences, in order to ignore those from the previous files. if (this.Sample != null) { foreach (string word in this.Sample.WordAppearances.Select(x => x.Key)) { if (counter.ContainsKey(word)) { counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]); counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]); if (counter[word][0] == 0 && counter[word][1] == 0) { counter.Remove(word); } } } } //Defining the results headers ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL); cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; //Calculate the matching for each individual word. foreach (string word in counter.Select(x => x.Key)) { int left = counter[word][0]; int right = counter[word][1]; float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left)); cr.AddMatch(match); cr.DetailsData.Add(new object[] { word, left, right, match }); } return(cr); }
/// <summary> /// Compta quantes paraules i quantes vegades apareixen a cada document i comprova el percentatge de coincidència. /// </summary> /// <returns>The matching's results.</returns> public override ComparatorMatchingScore Run() { // Comptant les paraules que apareixen per a cada document (esquerra i dreta). Dictionary <string, int[]> counter = new Dictionary <string, int[]>(); foreach (string word in this.Left.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][0] += Left.WordAppearances[word]; } foreach (string word in this.Right.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][1] += Right.WordAppearances[word]; } //Comptant les aparicions de paraules de fitxers de mostra, per tal d’ignorar les dels fitxers anteriors. if (this.Sample != null) { foreach (string word in this.Sample.WordAppearances.Select(x => x.Key)) { if (counter.ContainsKey(word)) { counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]); counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]); if (counter[word][0] == 0 && counter[word][1] == 0) { counter.Remove(word); } } } } //Definició de les capçaleres de resultats ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL); cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; //Calculeu la coincidència de cada paraula. foreach (string word in counter.Select(x => x.Key)) { int left = counter[word][0]; int right = counter[word][1]; float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left)); cr.AddMatch(match); cr.DetailsData.Add(new object[] { word, left, right, match }); } return(cr); }
/// <summary> /// Compara l'exemple amb el fitxer especificat i exclou els paràgrafs que produeixen una coincidència falsa positiva entre la mostra i el document. /// </summary> /// <param name="doc">El document que es compararà amb la mostra.</param> private void ExcludeSampleMatches(Document doc) { if (this.Sample != null) { //Per tal de millorar el rendiment, tots els paràgrafs d'exemple s'exclouran en primer lloc dels dos documents (només coincidència exacta). foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key)) { doc.Paragraphs.Remove(paragraph); } int leftLength, rightLength = 0; float totalMatch, lengthMatch, wordMath = 0f; ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc)); for (int i = 0; i < sampleScore.DetailsData.Count; i++) { leftLength = (int)sampleScore.DetailsData[i][2]; rightLength = (int)sampleScore.DetailsData[i][3]; lengthMatch = (float)sampleScore.DetailsData[i][4]; wordMath = (float)sampleScore.DetailsData[i][5]; totalMatch = sampleScore.DetailsMatch[i]; //same as (float)sampleScore.DetailsData[i][6]; //TODO: permetent utilitzar el valor de total de missatges o la longitud + coincidències de paraules (que es fan servir per calcular la coincidència total). //TODO: proves i ajustos necessaris, també la càrrega de configuració des d'un fitxer de configuració if (totalMatch >= 0.70f) { doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]); } } } }
/// <summary> /// Cuenta cuántas palabras y cuántas veces aparecen en cada documento y comprueba el porcentaje de coincidencia. /// </summary> /// <returns>Los resultados de la coincidencia.</returns> public override ComparatorMatchingScore Run() { //Contando las apariciones de palabras para cada documento (izquierda y derecha). Dictionary <string, int[]> counter = new Dictionary <string, int[]>(); foreach (string word in this.Left.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][0] += Left.WordAppearances[word]; } foreach (string word in this.Right.WordAppearances.Select(x => x.Key)) { if (!counter.ContainsKey(word)) { counter.Add(word, new int[] { 0, 0 }); } counter[word][1] += Right.WordAppearances[word]; } //Contar las apariciones de las palabras del archivo de muestra, para ignorar las de los archivos anteriores. if (this.Sample != null) { foreach (string word in this.Sample.WordAppearances.Select(x => x.Key)) { if (counter.ContainsKey(word)) { counter[word][0] = Math.Max(0, counter[word][0] - Sample.WordAppearances[word]); counter[word][1] = Math.Max(0, counter[word][1] - Sample.WordAppearances[word]); if (counter[word][0] == 0 && counter[word][1] == 0) { counter.Remove(word); } } } } //Definición de los encabezados de resultados ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Document Word Counter", DisplayLevel.FULL); cr.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; //Calcule la coincidencia de cada palabra individual. foreach (string word in counter.Select(x => x.Key)) { int left = counter[word][0]; int right = counter[word][1]; float match = (left == 0 || right == 0 ? 0 : (left < right ? (float)left / (float)right : (float)right / (float)left)); cr.AddMatch(match); cr.DetailsData.Add(new object[] { word, left, right, match }); } return(cr); }
/// <summary> /// Compares the sample with the given file and exclude the paragraphs that produces a false positive match between the sample an the document. /// </summary> /// <param name="doc">The document that will be compared with the sample.</param> private void ExcludeSampleMatches(Document doc) { if (this.Sample != null) { //In order to improve the performance, all the sample paragraphs will be excluded first from both documents (exact match only). //Per tal de millorar el rendiment, tots els paràgrafs d'exemple s'exclouen en primer lloc dels dos documents (només coincidència exacta) foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key)) { doc.Paragraphs.Remove(paragraph); } int leftLength, rightLength = 0; float totalMatch, lengthMatch, wordMath = 0f; ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc)); for (int i = 0; i < sampleScore.DetailsData.Count; i++) { leftLength = (int)sampleScore.DetailsData[i][2]; rightLength = (int)sampleScore.DetailsData[i][3]; lengthMatch = (float)sampleScore.DetailsData[i][4]; wordMath = (float)sampleScore.DetailsData[i][5]; totalMatch = sampleScore.DetailsMatch[i]; //same as (float)sampleScore.DetailsData[i][6]; el mateix que (float)sampleScore.DetailsData[i][6] //TODO: allowing to use totalMatch value or the length + word matches (used to compute the total match). //TODO: permet utilitzar el valor de total de missatges o la longitud + coincidències de paraules (que es fan servir per calcular la coincidència total). //TODO: testing and tweaking necessary, also config loading from a settings file. //TODO: és necessari provar i ajustar, també es carrega la configuració des d'un fitxer de configuració. if (totalMatch >= 0.70f) { doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]); } } } }
/// <summary> /// Compares the sample with the given file and exclude the paragraphs that produces a false positive match between the sample an the document. /// </summary> /// <param name="doc">The document that will be compared with the sample.</param> private void ExcludeSampleMatches(Document doc) { if (this.Sample != null) { //Con el objetivo de mejorar el rendimiento, todos los paragrafos de muestra seran excluidos de primeras de ambos documentos (solo coincidencia) foreach (string paragraph in this.Sample.Paragraphs.Select(x => x.Key)) { doc.Paragraphs.Remove(paragraph); } int leftLength, rightLength = 0; float totalMatch, lengthMatch, wordMath = 0f; ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc)); for (int i = 0; i < sampleScore.DetailsData.Count; i++) { leftLength = (int)sampleScore.DetailsData[i][2]; rightLength = (int)sampleScore.DetailsData[i][3]; lengthMatch = (float)sampleScore.DetailsData[i][4]; wordMath = (float)sampleScore.DetailsData[i][5]; totalMatch = sampleScore.DetailsMatch[i]; //Mismo funcionamiento que (float).sampleScore.DetailsData[i][6] //TODO: permitiendo usar el valor de totlMatch o la coincidiencia de longitud+palabra (usada para computar el total para coincidencias) //TODO: probando y retocando lo necesario, tambien configurada la carga desde un archivo de opciones. if (totalMatch >= 0.70f) { doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]); } } } }
/// <summary> /// Uses the settings values for comparing a set of files between each other. /// </summary> public void CompareFiles() { //Initial Checks if (!Directory.Exists(this.Settings.Folder)) { throw new Exceptions.FolderNotFoundException(); } //Initial vars. including the set of files. Dictionary <string, ComparatorMatchingScore> results = new Dictionary <string, ComparatorMatchingScore>(); List <string> files = Directory.GetFiles(this.Settings.Folder, string.Format("*.{0}", this.Settings.Extension), (this.Settings.Recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly)).Where(x => !x.Equals(this.Settings.Sample)).ToList(); List <Type> comparatorTypes = GetComparatorTypes().ToList(); _total = files.Count() * files.Count() * comparatorTypes.Count; _computed = 0; //Loops over each pair of files (the files must be compared between each other in a relation "1 to many"). for (int i = 0; i < files.Count(); i++) { string leftFilePath = files.ElementAt(i); for (int j = 0; j < files.Count(); j++) { string rightFilePath = files.ElementAt(j); //Instantiate and run every Comparator avoiding already computed ones and comparing a file with itself if (rightFilePath != leftFilePath) { foreach (Type t in comparatorTypes) { ComparatorMatchingScore cms = null; string key = GetComparatorKey(rightFilePath, leftFilePath, t); if (results.ContainsKey(key)) { //The existing results will be copied swapping the left and right files and reusing the already computed data. ComparatorMatchingScore old = results[key]; cms = old.Copy(old.RightFileName, old.LeftFileName); } else { //New comparissons for left and right files must be performed using the current comparer. var comp = Activator.CreateInstance(t, leftFilePath, rightFilePath, this.Settings); MethodInfo method = comp.GetType().GetMethod("Run"); cms = (ComparatorMatchingScore)method.Invoke(comp, null); } _computed++; results.Add(GetComparatorKey(leftFilePath, rightFilePath, t), cms); } } } } _computed = _total; this.MatchingResults = results.Values.ToList(); }
/// <summary> /// Uses the settings values for comparing a set of files between each other. /// </summary> public void CompareFiles() { //Initial Checks if (!Directory.Exists(Settings.Instance.Get(Setting.GLOBAL_FOLDER))) { throw new FolderNotFoundException(); } //Initial vars. including the set of files. string leftFilePath = null; string rightFilePath = null; List <FileMatchingScore> results = new List <FileMatchingScore>(); List <string> files = Directory.GetFiles(Settings.Instance.Get(Setting.GLOBAL_FOLDER), string.Format("*.{0}", Settings.Instance.Get(Setting.GLOBAL_EXTENSION)), (Settings.Instance.Get(Setting.GLOBAL_RECURSIVE) == "true" ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly)).Where(x => !x.Equals(Settings.Instance.Get(Setting.GLOBAL_SAMPLE))).ToList(); List <Type> comparatorTypes = GetComparatorTypes().ToList(); //The total combinations to calculate are the number of combinations without repetition for 2 elements over a set of N = (n over 2) = (n! / 2! (n-2)!) //The that total of combination, there will be performed a check for every comparator. _total = (Factorial(files.Count()) / 2 * Factorial(files.Count() - 2)) * comparatorTypes.Count; _current = 0; //Loops over each pair of files (the files must be compared between each other in a relation "1 to many"). for (int i = 0; i < files.Count(); i++) { leftFilePath = files.ElementAt(i); for (int j = i + 1; j < files.Count(); j++) { rightFilePath = files.ElementAt(j); //Create the score for the given file pair FileMatchingScore fpr = new FileMatchingScore(Path.GetFullPath(leftFilePath), Path.GetFullPath(rightFilePath)); //Instantiate and run every Comparator foreach (Type t in comparatorTypes) { var comp = Activator.CreateInstance(t, leftFilePath, rightFilePath, Settings.Instance.Get(Setting.GLOBAL_SAMPLE)); MethodInfo method = comp.GetType().GetMethod("Run"); //Once the object is instantiated, the Run method is invoked. ComparatorMatchingScore cms = (ComparatorMatchingScore)method.Invoke(comp, null); fpr.ComparatorResults.Add(cms); _current++; } results.Add(fpr); } } this.MatchingResults = results; }
private void ExcludeSamplePartialMatches(Document doc, float threshold) { if (this.Sample == null) { return; } ComparatorMatchingScore sampleScore = ComputeMatching(CompareParagraphs(this.Sample, doc)); for (int i = 0; i < sampleScore.DetailsData.Count; i++) { if (sampleScore.DetailsMatch[i] >= threshold) { doc.Paragraphs.Remove((string)sampleScore.DetailsData[i][1]); } } }
private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter) { //Defining the results headers ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Paragraph Word Counter", DisplayLevel.DETAILED); cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Match" }; cr.DetailsFormat = new string[] { "{0:L50}", "{0:L50}", "{0:P2}" }; //Calculate the matching for each individual word within each paragraph. foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key)) { Dictionary <string, int[]> wordCounter = paragraphCounter[paragraphs]; //Counting for each word inside an especific paragraph cr.Child = new DetailsMatchingScore(); cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.Child.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; foreach (string word in wordCounter.Select(x => x.Key)) { int countLeft = wordCounter[word][0]; int countRight = wordCounter[word][1]; //Mathing with word appearences float match = (countLeft == 0 || countRight == 0 ? 0 :(countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft)); //Adding the details for each word cr.Child.AddMatch(match); cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, match }); } //Adding the details for each paragraph cr.AddMatch(cr.Child.Matching); cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], cr.Child.Matching }); } return(cr); }
private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter) { // Definició de les capçaleres de resultats ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Paragraph Word Counter", DisplayLevel.DETAILED); cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Match" }; cr.DetailsFormat = new string[] { "{0:L50}", "{0:L50}", "{0:P2}" }; // Calculeu la coincidència de cada paraula dins de cada paràgraf. foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key)) { Dictionary <string, int[]> wordCounter = paragraphCounter[paragraphs]; // Comptar per a cada paraula dins d’un paràgraf específic cr.Child = new DetailsMatchingScore(); cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.Child.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; foreach (string word in wordCounter.Select(x => x.Key)) { int countLeft = wordCounter[word][0]; int countRight = wordCounter[word][1]; // Trencar amb les aparences de paraules float match = (countLeft == 0 || countRight == 0 ? 0 :(countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft)); // Afegir els detalls de cada paraula cr.Child.AddMatch(match); cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, match }); } // Afegir els detalls de cada paràgraf cr.AddMatch(cr.Child.Matching); cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], cr.Child.Matching }); } return(cr); }
private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter) { //Definiendo los resultados de la cabecera ComparatorMatchingScore cr = new ComparatorMatchingScore(this.Left.Name, this.Right.Name, "Paragraph Word Counter", DisplayLevel.DETAILED); cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Match" }; cr.DetailsFormat = new string[] { "{0:L50}", "{0:L50}", "{0:P2}" }; //Calcular las igualdades para cada palabra individual de cada paragrafo foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key)) { Dictionary <string, int[]> wordCounter = paragraphCounter[paragraphs]; //Contando cada palabra dentro de un parágrafo específico cr.Child = new DetailsMatchingScore(); cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.Child.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; foreach (string word in wordCounter.Select(x => x.Key)) { int countLeft = wordCounter[word][0]; int countRight = wordCounter[word][1]; //Coincidencia con las apariencias de las palabras float match = (countLeft == 0 || countRight == 0 ? 0 :(countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft)); //Añadiendo los detalles para cada palabra cr.Child.AddMatch(match); cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, match }); } //Añadiendo los detalles para cada párrafo cr.AddMatch(cr.Child.Matching); cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], cr.Child.Matching }); } return(cr); }
private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter) { //Defining the results headers ComparatorMatchingScore cr = new ComparatorMatchingScore("Paragraph Word Counter"); cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Left legth", "Right length", "Length match", "Word match", "Total match" }; cr.DetailsFormat = new string[] { "{0:L50}", "{0:L50}", "{0}", "{0}", "{0:P2}", "{0:P2}", "{0:P2}" }; //Calculate the matching for each individual word within each paragraph. float match, matchWord, matchLength = 0; int leftLengt, rightLength, countLeft, countRight = 0; Dictionary <string, int[]> wordCounter = null; foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key)) { wordCounter = paragraphCounter[paragraphs]; //Matching with paragraph length leftLengt = wordCounter.Values.Select(x => x[0]).Where(x => x > 0).Count(); rightLength = wordCounter.Values.Select(x => x[1]).Where(x => x > 0).Count(); if (leftLengt == 0 || rightLength == 0) { matchLength = 0; } else { matchLength = (leftLengt < rightLength ? (float)leftLengt / (float)rightLength : (float)rightLength / (float)leftLengt); } //Counting for each word inside an especific paragraph cr.Child = new DetailsMatchingScore(); cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.Child.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; foreach (string word in wordCounter.Select(x => x.Key)) { countLeft = wordCounter[word][0]; countRight = wordCounter[word][1]; //Mathing with word appearences if (countLeft == 0 || countRight == 0) { matchWord = 0; } else { matchWord = (countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft); } //Adding the details for each word cr.Child.AddMatch(matchWord); cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, matchWord }); } //Adding the details for each paragraph, the total match is: 75% for words - 25% for length (must be tested in order to tweak) and add the info to the detils. match = (cr.Child.Matching * 0.75f + matchLength * 0.25f); cr.AddMatch(match); cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], leftLengt, rightLength, matchLength, cr.Child.Matching, match }); } return(cr); }
private ComparatorMatchingScore ComputeMatching(Dictionary <string[], Dictionary <string, int[]> > paragraphCounter) { //Definició dels encapçalaments de resultats ComparatorMatchingScore cr = new ComparatorMatchingScore("Paragraph Word Counter"); cr.DetailsCaption = new string[] { "Left paragraph", "Right paragraph", "Left legth", "Right length", "Length match", "Word match", "Total match" }; cr.DetailsFormat = new string[] { "{0:L50}", "{0:L50}", "{0}", "{0}", "{0:P2}", "{0:P2}", "{0:P2}" }; //Calculem la concordança per a cada paraula individual dins de cada paràgraf float match, matchWord, matchLength = 0; int leftLengt, rightLength, countLeft, countRight = 0; Dictionary <string, int[]> wordCounter = null; foreach (string[] paragraphs in paragraphCounter.Select(x => x.Key)) { wordCounter = paragraphCounter[paragraphs]; //Coincidència amb la longitud del paràgraf leftLengt = wordCounter.Values.Select(x => x[0]).Where(x => x > 0).Count(); rightLength = wordCounter.Values.Select(x => x[1]).Where(x => x > 0).Count(); if (leftLengt == 0 || rightLength == 0) { matchLength = 0; } else { matchLength = (leftLengt < rightLength ? (float)leftLengt / (float)rightLength : (float)rightLength / (float)leftLengt); } //Comptant per a cada paraula dins d'un paràgraf específic cr.Child = new DetailsMatchingScore(); cr.Child.DetailsCaption = new string[] { "Word", "Left count", "Right count", "Match" }; cr.Child.DetailsFormat = new string[] { "{0}", "{0}", "{0}", "{0:P2}" }; foreach (string word in wordCounter.Select(x => x.Key)) { countLeft = wordCounter[word][0]; countRight = wordCounter[word][1]; //Matemàtica amb aplicacions de paraules if (countLeft == 0 || countRight == 0) { matchWord = 0; } else { matchWord = (countLeft < countRight ? (float)countLeft / (float)countRight : (float)countRight / (float)countLeft); } //Afegint els detalls de cada paraula cr.Child.AddMatch(matchWord); cr.Child.DetailsData.Add(new object[] { word, countLeft, countRight, matchWord }); } //Si s'afegeixen els detalls de cada paràgraf, la coincidència total és: 75% per a les paraules - 25% per a la durada (s'ha de provar per tal de modificar) i afegir la informació als detalls. match = (cr.Child.Matching * 0.75f + matchLength * 0.25f); cr.AddMatch(match); cr.DetailsData.Add(new object[] { paragraphs[0], paragraphs[1], leftLengt, rightLength, matchLength, cr.Child.Matching, match }); } return(cr); }