/// <summary> /// Evaluates the specified input. /// </summary> /// <param name="input">The input.</param> protected void evaluate(IWeightTable table, bool sortByFrequency = true) { //if (inputText.Contains("protivpožarni")) //{ //} if (inputTokens.Count == 0) { ratioA = 0; resultMode = textEvaluationResultEnum.notEnoughInformation; return; } var sort = new List <string>(); if (sortByFrequency) { foreach (var s in inputTokens) { tokenFrequency.AddInstanceRange(inputTokens); } // tokenFrequency.reCalculate(); sort = tokenFrequency.getSorted(); } else { sort.AddRange(inputTokens); } int take = Math.Min(sort.Count(), testSize); testTokens.AddRange(sort.Take(take)); foreach (string tkn in testTokens) { bool testA = false; bool testB = false; //if (tkn.) testA = basicLanguageTools.testBoolean(languageA, tkn, basicLanguageCheck.spellCheck); testB = basicLanguageTools.testBoolean(languageB, tkn, basicLanguageCheck.spellCheck); bool testAB = testA && testB; bool testNotAB = (!testA) && (!testB); while (testNotAB) { if (parent.langNotABTokens.Contains(tkn)) { testNotAB = true; break; } else { testA = parent.langATokens.Contains(tkn); testB = parent.langBTokens.Contains(tkn); testNotAB = (!testA) && (!testB); if (testNotAB) { lexiconResponse lemmas = parent.manager.getLexiconItems(tkn); if (lemmas.type != lexiconResponse.responseType.failedQueries) { testA = true; parent.langATokens.AddUnique(tkn); } } testNotAB = (!testA) && (!testB); if (testNotAB) { parent.langNotABTokens.AddUnique(tkn); } } testNotAB = (!testA) && (!testB); } if (testA) { langATokens.Add(tkn); } if (testB) { langBTokens.Add(tkn); } testAB = testA && testB; if (testAB) { langABTokens.Add(tkn); } if (testNotAB) { langNotABTokens.AddUnique(tkn); } } if (table != null) { foreach (string tkA in langATokens) { double sc = table.GetTF_IDF(tkA); scoreForA += sc + 0.1; } foreach (string tkB in langBTokens) { double sc = table.GetTF_IDF(tkB); scoreForB += sc + 0.1; } foreach (string tkN in langNotABTokens) { double sc = table.GetTF_IDF(tkN); scoreForNotAB += sc + 0.1; } } else { scoreForA = langATokens.Count(); scoreForB = langBTokens.Count(); scoreForNotAB = langNotABTokens.Count(); } //scoreForA = langATokens.Count() + langABTokens.Count(); //scoreForB = langBTokens.Count() + langABTokens.Count(); if ((scoreForA > scoreForB) && (scoreForA > scoreForNotAB)) { isLanguageA = true; resultMode = textEvaluationResultEnum.languageA; } if ((scoreForB > scoreForA) && (scoreForB > scoreForNotAB)) { isLanguageB = true; resultMode = textEvaluationResultEnum.languageB; } if ((!isLanguageA) && (!isLanguageB)) { resultMode = textEvaluationResultEnum.noneOfBoth; } else { if (scoreForA == scoreForB) { resultMode = textEvaluationResultEnum.uncertain; } } if (scoreForA == 0) { ratioA = 0; } else { double div = (double)(scoreForA + scoreForB + scoreForNotAB); if (div == 0) { ratioA = 1; } else { ratioA = (double)scoreForA / div; } } }
public static double GetScoreAggregate(this IEnumerable <IWeightTableTerm> terms, IWeightTable table, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum) { List <double> output = new List <double>(); foreach (IWeightTableTerm term in terms) { switch (scoreToUse) { case termTableColumns.cw: output.Add(table.GetWeight(term)); break; case termTableColumns.df: output.Add(table.GetBDFreq(term)); break; case termTableColumns.freqAbs: output.Add(table.GetAFreq(term)); break; case termTableColumns.freqNorm: output.Add(table.GetNFreq(term)); break; case termTableColumns.idf: output.Add(table.GetIDF(term)); break; case termTableColumns.ncw: output.Add(table.GetNWeight(term)); break; case termTableColumns.none: break; case termTableColumns.words: case termTableColumns.normalizedSemanticDistance: case termTableColumns.semanticDistance: case termTableColumns.termLemma: case termTableColumns.termName: throw new NotImplementedException(); break; case termTableColumns.tf_idf: output.Add(table.GetTF_IDF(term)); break; } } switch (aggregation) { case dataPointAggregationType.avg: return(output.Average()); break; case dataPointAggregationType.count: return(output.Count()); break; case dataPointAggregationType.max: return(output.Max()); break; case dataPointAggregationType.min: return(output.Min()); break; case dataPointAggregationType.range: return(output.Max() - output.Min()); break; case dataPointAggregationType.sum: return(output.Sum()); break; default: throw new dataException("Operation not supported [" + aggregation.toString() + "]", null, table, "Aggregation operation not supported"); return(0); break; } return(0); }