Exemple #1
0
        /// <summary>
        /// Gets the lemmas, matching <c>queryTerms</c>, sorted by weight (in the <c>table</c>)
        /// </summary>
        /// <param name="table">webLemmaTermTable to be queried</param>
        /// <param name="queryTerms">The query terms, strings to query <c>table</c></param>
        /// <param name="takeTopN">Number of top ranked lemmas, by <see cref="termLemmaBase.weight"/>, descending.</param>
        /// <returns>List of matched webLemmaTerms</returns>
        public static List <String> GetLemmasInStringSorted(this webLemmaTermTable table, IEnumerable <String> queryTerms, Int32 takeTopN = -1)
        {
            var           list   = table.GetLemmasSorted(queryTerms, takeTopN);
            List <String> output = new List <string>();

            list.ForEach(x => output.Add(x.lemmaForm));
            return(output);
        }
        /// <summary>
        /// Gets lemmas that are common between this and specified <c>tableB</c>
        /// </summary>
        /// <param name="tableB">The table b.</param>
        /// <param name="logger">The logger.</param>
        /// <returns></returns>
        public webLemmaTermPairCollection GetMatchingTerms(webLemmaTermTable tableB, ILogBuilder logger = null)
        {
            webLemmaTermPairCollection output = new webLemmaTermPairCollection();
            List <webLemmaTerm>        lemmas = GetList();

            foreach (webLemmaTerm lemma in lemmas)
            {
                webLemmaTerm lemmaB = tableB.ResolveLemmaForTerm(lemma.nominalForm);
                if (lemmaB != null)
                {
                    output.Add(lemma, lemmaB);
                }
            }
            return(output);
        }
Exemple #3
0
        /// <summary>
        /// Gets the data table sorted.
        /// </summary>
        /// <param name="table">The table.</param>
        /// <param name="limit">The limit.</param>
        /// <returns></returns>
        public static DataTable GetDataTableSorted(this webLemmaTermTable table, Int32 limit = -1)
        {
            DataTable wlt = table.GetDataTable();

            wlt.DefaultView.Sort = "termFrequency desc";
            var sorted = wlt.DefaultView.ToTable();

            DataTable elt = wlt.GetClonedShema <DataTable>(true);

            elt.CopyRowsFrom(sorted, 0, limit);

            if (limit > 0)
            {
                elt.AddExtra("The report contains first [" + limit.ToString("D5") + "] rows");
            }
            return(elt);
        }
Exemple #4
0
        /// <summary>
        /// Merges web lemma term tables into single table that should be recomputed afterwards because only absolute values are set for the lemmas
        /// </summary>
        /// <param name="tables">The tables.</param>
        /// <param name="name">The name.</param>
        /// <param name="logger">The logger.</param>
        /// <returns></returns>
        public static webLemmaTermTable GetMergedLemmaTable(this List <webLemmaTermTable> tables, String name, ILogBuilder logger)
        {
            logger.log("Merging [" + tables.Count + "]");

            Dictionary <String, webLemmaTerm> lemmaSummary = GetMergedLemmaDictionary(tables, logger);

            webLemmaTermTable output = new webLemmaTermTable(name);

            foreach (var pair in lemmaSummary)
            {
                output.Add(pair.Value);
            }

            logger.log("Merged lemma table created [" + output.name + "] with [" + output.Count + "] entries.");

            return(output);
        }
Exemple #5
0
        /// <summary>
        /// Gets the lemmas, matching <c>queryTerms</c>, sorted by weight (in the <c>table</c>)
        /// </summary>
        /// <param name="table">webLemmaTermTable to be queried</param>
        /// <param name="queryTerms">The query terms, strings to query <c>table</c></param>
        /// <param name="takeTopN">Number of top ranked lemmas, by <see cref="termLemmaBase.weight"/>, descending.</param>
        /// <returns>List of matched webLemmaTerms</returns>
        public static List <webLemmaTerm> GetLemmasSorted(this webLemmaTermTable table, IEnumerable <String> queryTerms, Int32 takeTopN = -1)
        {
            // ------------ selection of key terms
            List <webLemmaTerm> terms = new List <webLemmaTerm>();

            foreach (String tkn in queryTerms)
            {
                var lm = table.ResolveLemmaForTerm(tkn);
                if (lm != null)
                {
                    terms.Add(lm);
                }
            }
            terms.Sort((x, y) => y.weight.CompareTo(x.weight));
            if (takeTopN == -1)
            {
                takeTopN = terms.Count;
            }
            var list = terms.Take(Math.Min(takeTopN, terms.Count)).ToList();

            return(list);
        }