/// <summary> /// Gets the lemmas, matching <c>queryTerms</c>, sorted by weight (in the <c>table</c>) /// </summary> /// <param name="table">webLemmaTermTable to be queried</param> /// <param name="queryTerms">The query terms, strings to query <c>table</c></param> /// <param name="takeTopN">Number of top ranked lemmas, by <see cref="termLemmaBase.weight"/>, descending.</param> /// <returns>List of matched webLemmaTerms</returns> public static List <String> GetLemmasInStringSorted(this webLemmaTermTable table, IEnumerable <String> queryTerms, Int32 takeTopN = -1) { var list = table.GetLemmasSorted(queryTerms, takeTopN); List <String> output = new List <string>(); list.ForEach(x => output.Add(x.lemmaForm)); return(output); }
/// <summary> /// Gets lemmas that are common between this and specified <c>tableB</c> /// </summary> /// <param name="tableB">The table b.</param> /// <param name="logger">The logger.</param> /// <returns></returns> public webLemmaTermPairCollection GetMatchingTerms(webLemmaTermTable tableB, ILogBuilder logger = null) { webLemmaTermPairCollection output = new webLemmaTermPairCollection(); List <webLemmaTerm> lemmas = GetList(); foreach (webLemmaTerm lemma in lemmas) { webLemmaTerm lemmaB = tableB.ResolveLemmaForTerm(lemma.nominalForm); if (lemmaB != null) { output.Add(lemma, lemmaB); } } return(output); }
/// <summary> /// Gets the data table sorted. /// </summary> /// <param name="table">The table.</param> /// <param name="limit">The limit.</param> /// <returns></returns> public static DataTable GetDataTableSorted(this webLemmaTermTable table, Int32 limit = -1) { DataTable wlt = table.GetDataTable(); wlt.DefaultView.Sort = "termFrequency desc"; var sorted = wlt.DefaultView.ToTable(); DataTable elt = wlt.GetClonedShema <DataTable>(true); elt.CopyRowsFrom(sorted, 0, limit); if (limit > 0) { elt.AddExtra("The report contains first [" + limit.ToString("D5") + "] rows"); } return(elt); }
/// <summary> /// Merges web lemma term tables into single table that should be recomputed afterwards because only absolute values are set for the lemmas /// </summary> /// <param name="tables">The tables.</param> /// <param name="name">The name.</param> /// <param name="logger">The logger.</param> /// <returns></returns> public static webLemmaTermTable GetMergedLemmaTable(this List <webLemmaTermTable> tables, String name, ILogBuilder logger) { logger.log("Merging [" + tables.Count + "]"); Dictionary <String, webLemmaTerm> lemmaSummary = GetMergedLemmaDictionary(tables, logger); webLemmaTermTable output = new webLemmaTermTable(name); foreach (var pair in lemmaSummary) { output.Add(pair.Value); } logger.log("Merged lemma table created [" + output.name + "] with [" + output.Count + "] entries."); return(output); }
/// <summary> /// Gets the lemmas, matching <c>queryTerms</c>, sorted by weight (in the <c>table</c>) /// </summary> /// <param name="table">webLemmaTermTable to be queried</param> /// <param name="queryTerms">The query terms, strings to query <c>table</c></param> /// <param name="takeTopN">Number of top ranked lemmas, by <see cref="termLemmaBase.weight"/>, descending.</param> /// <returns>List of matched webLemmaTerms</returns> public static List <webLemmaTerm> GetLemmasSorted(this webLemmaTermTable table, IEnumerable <String> queryTerms, Int32 takeTopN = -1) { // ------------ selection of key terms List <webLemmaTerm> terms = new List <webLemmaTerm>(); foreach (String tkn in queryTerms) { var lm = table.ResolveLemmaForTerm(tkn); if (lm != null) { terms.Add(lm); } } terms.Sort((x, y) => y.weight.CompareTo(x.weight)); if (takeTopN == -1) { takeTopN = terms.Count; } var list = terms.Take(Math.Min(takeTopN, terms.Count)).ToList(); return(list); }