/// <summary> /// Queries table for specified terms and return aggregated score. The score source is specified by <see cref="termTableColumns.tf_idf"/> (only numeric columns are supported). /// </summary> /// <param name="queryTerms">Terms to test against the table, terms found are used in calculation.</param> /// <param name="scoreToUse">What numeric property of matched term to use for aggregation.</param> /// <param name="aggregation">The aggregation type</param> /// <returns>Any score information from the query terms is ignored.</returns> public static double GetScoreForMatch(this IWeightTable table, IEnumerable <string> queryTerms, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum) { List <IWeightTableTerm> output = new List <IWeightTableTerm>(); output = table.GetMatches(queryTerms); return(output.GetScoreAggregate(table, scoreToUse, aggregation)); }
/// <summary> /// Adds the specified document and processes all terms contained /// </summary> /// <param name="document">The document.</param> public IWeightTable Add(IWeightTable document) { string newName = document.name; newName = newName.makeUniqueName(documents.ContainsKey, "D4", 10000); TWeightTable newDoc = (TWeightTable)AddTable(newName); if (document is weightTableCompiled) { weightTableCompiled cTable = (weightTableCompiled)document; foreach (weightTableTermCompiled cTerm in cTable.GetList()) { newDoc.Add(cTerm); } } else { foreach (IWeightTableTerm iTerm in document) { newDoc.Add(iTerm, iTerm.AFreqPoints); } } return(newDoc); }
/// <summary> /// Gets the cross section of matched terms /// </summary> /// <param name="secondTable">The second table.</param> /// <param name="thisAgainstSecond">if set to <c>true</c> [this against second].</param> /// <returns></returns> public List <IWeightTableTerm> GetCrossSection(IWeightTable secondTable, bool thisAgainstSecond = false) { List <IWeightTableTerm> matched = new List <IWeightTableTerm>(); if (thisAgainstSecond) { foreach (IWeightTableTerm term in this.ToList()) { var match = GetMatchTerm(term); match = secondTable.GetMatchTerm(term); if (match != null) { matched.Add(match); } } } else { foreach (IWeightTableTerm term in secondTable.ToList()) { var match = GetMatchTerm(term); if (match != null) { matched.Add(match); } } } /* * */ return(matched); }
/// <summary> /// Returns the matching term entries /// </summary> /// <param name="queryTerms">The query terms.</param> /// <returns></returns> public static List <IWeightTableTerm> GetMatches(this IWeightTable table, IEnumerable <IWeightTableTerm> queryTerms) { List <IWeightTableTerm> output = new List <IWeightTableTerm>(); List <string> expandedQuery = new List <string>(); foreach (IWeightTableTerm qt in queryTerms) { expandedQuery.AddUnique(qt.GetAllForms()); } //queryTerms.ForEach(x => expandedQuery.AddRangeUnique(x.GetAllForms())); return(table.GetMatches(expandedQuery)); }
/// <summary> /// Returns the matching term entries /// </summary> /// <param name="queryTerms">The query terms.</param> /// <returns></returns> public static List <IWeightTableTerm> GetMatches(this IWeightTable table, IEnumerable <string> queryTerms) { List <IWeightTableTerm> output = new List <IWeightTableTerm>(); foreach (string term in queryTerms) { var mc = table.GetMatchByString(term); if (mc != null) { collectionExtensions.AddUnique(output, mc); } } return(output); }
public void Add(object item) { if (item is IWeightTable) { IWeightTable item_IWeightTable = (IWeightTable)item; documents.Add(item_IWeightTable.name, item_IWeightTable); } else if (item is IWeightTableTerm) { IWeightTableTerm item_IWeightTableTerm = (IWeightTableTerm)item; //item_IWeightTableTerm. } }
/// <summary> /// Non semantic matching /// </summary> /// <param name="table">The table.</param> /// <param name="term">The term.</param> /// <param name="callTableLevelAdd">Add supplied table, usully you don't want that</param> public void Add(IWeightTable table, IWeightTableTerm term, bool callTableLevelAdd = false) //, Int32 DFPoints = -1) { if (callTableLevelAdd) { table.Add(term); } if (table != AggregateDocument) { var t = AggregateDocument.Add(term); counter.AddVote(table, t); } else { } }
/// <summary> /// Copies matching terms from external document> adds new if have to, rise the abs frequency if exists /// </summary> /// <param name="source">The source.</param> /// <param name="CopyFrequencies">if set to <c>true</c> [copy frequencies].</param> public void AddExternalDocument(IWeightTable source, bool CopyFrequencies) { int c = Count(); List <string> new_terms = source.GetAllTermString(); foreach (var nt in source.GetAllTerms()) { if (CopyFrequencies) { Add(source.GetMatchTermByName(nt), source.GetAFreq(nt)); } else { Add(source.GetMatchTermByName(nt)); } } int a = Count() - c; }
/// <summary> /// Evaluates the specified input. /// </summary> /// <param name="input">The input.</param> protected void evaluate(IWeightTable table, bool sortByFrequency = true) { //if (inputText.Contains("protivpožarni")) //{ //} if (inputTokens.Count == 0) { ratioA = 0; resultMode = textEvaluationResultEnum.notEnoughInformation; return; } var sort = new List <string>(); if (sortByFrequency) { foreach (var s in inputTokens) { tokenFrequency.AddInstanceRange(inputTokens); } // tokenFrequency.reCalculate(); sort = tokenFrequency.getSorted(); } else { sort.AddRange(inputTokens); } int take = Math.Min(sort.Count(), testSize); testTokens.AddRange(sort.Take(take)); foreach (string tkn in testTokens) { bool testA = false; bool testB = false; //if (tkn.) testA = basicLanguageTools.testBoolean(languageA, tkn, basicLanguageCheck.spellCheck); testB = basicLanguageTools.testBoolean(languageB, tkn, basicLanguageCheck.spellCheck); bool testAB = testA && testB; bool testNotAB = (!testA) && (!testB); while (testNotAB) { if (parent.langNotABTokens.Contains(tkn)) { testNotAB = true; break; } else { testA = parent.langATokens.Contains(tkn); testB = parent.langBTokens.Contains(tkn); testNotAB = (!testA) && (!testB); if (testNotAB) { lexiconResponse lemmas = parent.manager.getLexiconItems(tkn); if (lemmas.type != lexiconResponse.responseType.failedQueries) { testA = true; parent.langATokens.AddUnique(tkn); } } testNotAB = (!testA) && (!testB); if (testNotAB) { parent.langNotABTokens.AddUnique(tkn); } } testNotAB = (!testA) && (!testB); } if (testA) { langATokens.Add(tkn); } if (testB) { langBTokens.Add(tkn); } testAB = testA && testB; if (testAB) { langABTokens.Add(tkn); } if (testNotAB) { langNotABTokens.AddUnique(tkn); } } if (table != null) { foreach (string tkA in langATokens) { double sc = table.GetTF_IDF(tkA); scoreForA += sc + 0.1; } foreach (string tkB in langBTokens) { double sc = table.GetTF_IDF(tkB); scoreForB += sc + 0.1; } foreach (string tkN in langNotABTokens) { double sc = table.GetTF_IDF(tkN); scoreForNotAB += sc + 0.1; } } else { scoreForA = langATokens.Count(); scoreForB = langBTokens.Count(); scoreForNotAB = langNotABTokens.Count(); } //scoreForA = langATokens.Count() + langABTokens.Count(); //scoreForB = langBTokens.Count() + langABTokens.Count(); if ((scoreForA > scoreForB) && (scoreForA > scoreForNotAB)) { isLanguageA = true; resultMode = textEvaluationResultEnum.languageA; } if ((scoreForB > scoreForA) && (scoreForB > scoreForNotAB)) { isLanguageB = true; resultMode = textEvaluationResultEnum.languageB; } if ((!isLanguageA) && (!isLanguageB)) { resultMode = textEvaluationResultEnum.noneOfBoth; } else { if (scoreForA == scoreForB) { resultMode = textEvaluationResultEnum.uncertain; } } if (scoreForA == 0) { ratioA = 0; } else { double div = (double)(scoreForA + scoreForB + scoreForNotAB); if (div == 0) { ratioA = 1; } else { ratioA = (double)scoreForA / div; } } }
public void evaluateTokens(List <string> tokens, IWeightTable table, bool sortByFrequency = true) { inputTokens = tokens; evaluate(table, sortByFrequency); }
public static double GetScoreAggregate(this IEnumerable <IWeightTableTerm> terms, IWeightTable table, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum) { List <double> output = new List <double>(); foreach (IWeightTableTerm term in terms) { switch (scoreToUse) { case termTableColumns.cw: output.Add(table.GetWeight(term)); break; case termTableColumns.df: output.Add(table.GetBDFreq(term)); break; case termTableColumns.freqAbs: output.Add(table.GetAFreq(term)); break; case termTableColumns.freqNorm: output.Add(table.GetNFreq(term)); break; case termTableColumns.idf: output.Add(table.GetIDF(term)); break; case termTableColumns.ncw: output.Add(table.GetNWeight(term)); break; case termTableColumns.none: break; case termTableColumns.words: case termTableColumns.normalizedSemanticDistance: case termTableColumns.semanticDistance: case termTableColumns.termLemma: case termTableColumns.termName: throw new NotImplementedException(); break; case termTableColumns.tf_idf: output.Add(table.GetTF_IDF(term)); break; } } switch (aggregation) { case dataPointAggregationType.avg: return(output.Average()); break; case dataPointAggregationType.count: return(output.Count()); break; case dataPointAggregationType.max: return(output.Max()); break; case dataPointAggregationType.min: return(output.Min()); break; case dataPointAggregationType.range: return(output.Max() - output.Min()); break; case dataPointAggregationType.sum: return(output.Sum()); break; default: throw new dataException("Operation not supported [" + aggregation.toString() + "]", null, table, "Aggregation operation not supported"); return(0); break; } return(0); }
public int AddVote(IWeightTable targetTable, IWeightTableTerm term) { this[term, targetTable] = this[term, targetTable] + term.AFreqPoints; return(this[term, targetTable]); }
public weightTableMatchCollection(IWeightTable __first, IWeightTable __second) { first = __first; second = __second; }
public void AddExternalDocument(IWeightTable source, bool CopyFrequencies) { throw new NotImplementedException(); }
public List <IWeightTableTerm> GetCrossSection(IWeightTable secondTable, bool thisAgainstSecond = false) { throw new NotImplementedException(); }