Beispiel #1
0
        /// <summary>
        /// Copies matching terms from external document&gt; adds new if have to, rise the abs frequency if exists
        /// </summary>
        /// <param name="source">The source.</param>
        /// <param name="CopyFrequencies">if set to <c>true</c> [copy frequencies].</param>
        public void AddExternalDocument(IWeightTable source, bool CopyFrequencies)
        {
            int           c         = Count();
            List <string> new_terms = source.GetAllTermString();

            foreach (var nt in source.GetAllTerms())
            {
                if (CopyFrequencies)
                {
                    Add(source.GetMatchTermByName(nt), source.GetAFreq(nt));
                }
                else
                {
                    Add(source.GetMatchTermByName(nt));
                }
            }

            int a = Count() - c;
        }
Beispiel #2
0
        public static double GetScoreAggregate(this IEnumerable <IWeightTableTerm> terms, IWeightTable table, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum)
        {
            List <double> output = new List <double>();

            foreach (IWeightTableTerm term in terms)
            {
                switch (scoreToUse)
                {
                case termTableColumns.cw:
                    output.Add(table.GetWeight(term));
                    break;

                case termTableColumns.df:
                    output.Add(table.GetBDFreq(term));
                    break;

                case termTableColumns.freqAbs:
                    output.Add(table.GetAFreq(term));
                    break;

                case termTableColumns.freqNorm:
                    output.Add(table.GetNFreq(term));
                    break;

                case termTableColumns.idf:
                    output.Add(table.GetIDF(term));
                    break;

                case termTableColumns.ncw:
                    output.Add(table.GetNWeight(term));
                    break;

                case termTableColumns.none:
                    break;

                case termTableColumns.words:
                case termTableColumns.normalizedSemanticDistance:
                case termTableColumns.semanticDistance:
                case termTableColumns.termLemma:
                case termTableColumns.termName:
                    throw new NotImplementedException();
                    break;

                case termTableColumns.tf_idf:
                    output.Add(table.GetTF_IDF(term));
                    break;
                }
            }

            switch (aggregation)
            {
            case dataPointAggregationType.avg:
                return(output.Average());

                break;

            case dataPointAggregationType.count:
                return(output.Count());

                break;

            case dataPointAggregationType.max:
                return(output.Max());

                break;

            case dataPointAggregationType.min:
                return(output.Min());

                break;

            case dataPointAggregationType.range:
                return(output.Max() - output.Min());

                break;

            case dataPointAggregationType.sum:
                return(output.Sum());

                break;

            default:
                throw new dataException("Operation not supported [" + aggregation.toString() + "]", null, table, "Aggregation operation not supported");
                return(0);

                break;
            }

            return(0);
        }