コード例 #1
0
        /// <summary>
        /// Queries table for specified terms and return aggregated score. The score source is specified by <see cref="termTableColumns.tf_idf"/> (only numeric columns are supported).
        /// </summary>
        /// <param name="queryTerms">Terms to test against the table, terms found are used in calculation.</param>
        /// <param name="scoreToUse">What numeric property of matched term to use for aggregation.</param>
        /// <param name="aggregation">The aggregation type</param>
        /// <returns>Any score information from the query terms is ignored.</returns>
        public static double GetScoreForMatch(this IWeightTable table, IEnumerable <string> queryTerms, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum)
        {
            List <IWeightTableTerm> output = new List <IWeightTableTerm>();

            output = table.GetMatches(queryTerms);
            return(output.GetScoreAggregate(table, scoreToUse, aggregation));
        }
コード例 #2
0
        /// <summary>
        /// Adds the specified document and processes all terms contained
        /// </summary>
        /// <param name="document">The document.</param>
        public IWeightTable Add(IWeightTable document)
        {
            string newName = document.name;

            newName = newName.makeUniqueName(documents.ContainsKey, "D4", 10000);

            TWeightTable newDoc = (TWeightTable)AddTable(newName);

            if (document is weightTableCompiled)
            {
                weightTableCompiled cTable = (weightTableCompiled)document;

                foreach (weightTableTermCompiled cTerm in cTable.GetList())
                {
                    newDoc.Add(cTerm);
                }
            }
            else
            {
                foreach (IWeightTableTerm iTerm in document)
                {
                    newDoc.Add(iTerm, iTerm.AFreqPoints);
                }
            }

            return(newDoc);
        }
コード例 #3
0
        /// <summary>
        /// Gets the cross section of matched terms
        /// </summary>
        /// <param name="secondTable">The second table.</param>
        /// <param name="thisAgainstSecond">if set to <c>true</c> [this against second].</param>
        /// <returns></returns>
        public List <IWeightTableTerm> GetCrossSection(IWeightTable secondTable, bool thisAgainstSecond = false)
        {
            List <IWeightTableTerm> matched = new List <IWeightTableTerm>();

            if (thisAgainstSecond)
            {
                foreach (IWeightTableTerm term in this.ToList())
                {
                    var match = GetMatchTerm(term);
                    match = secondTable.GetMatchTerm(term);
                    if (match != null)
                    {
                        matched.Add(match);
                    }
                }
            }
            else
            {
                foreach (IWeightTableTerm term in secondTable.ToList())
                {
                    var match = GetMatchTerm(term);
                    if (match != null)
                    {
                        matched.Add(match);
                    }
                }
            }

            /*
             *
             */
            return(matched);
        }
コード例 #4
0
        /// <summary>
        /// Returns the matching term entries
        /// </summary>
        /// <param name="queryTerms">The query terms.</param>
        /// <returns></returns>
        public static List <IWeightTableTerm> GetMatches(this IWeightTable table, IEnumerable <IWeightTableTerm> queryTerms)
        {
            List <IWeightTableTerm> output        = new List <IWeightTableTerm>();
            List <string>           expandedQuery = new List <string>();

            foreach (IWeightTableTerm qt in queryTerms)
            {
                expandedQuery.AddUnique(qt.GetAllForms());
            }

            //queryTerms.ForEach(x => expandedQuery.AddRangeUnique(x.GetAllForms()));
            return(table.GetMatches(expandedQuery));
        }
コード例 #5
0
        /// <summary>
        /// Returns the matching term entries
        /// </summary>
        /// <param name="queryTerms">The query terms.</param>
        /// <returns></returns>
        public static List <IWeightTableTerm> GetMatches(this IWeightTable table, IEnumerable <string> queryTerms)
        {
            List <IWeightTableTerm> output = new List <IWeightTableTerm>();

            foreach (string term in queryTerms)
            {
                var mc = table.GetMatchByString(term);
                if (mc != null)
                {
                    collectionExtensions.AddUnique(output, mc);
                }
            }
            return(output);
        }
コード例 #6
0
        public void Add(object item)
        {
            if (item is IWeightTable)
            {
                IWeightTable item_IWeightTable = (IWeightTable)item;

                documents.Add(item_IWeightTable.name, item_IWeightTable);
            }
            else if (item is IWeightTableTerm)
            {
                IWeightTableTerm item_IWeightTableTerm = (IWeightTableTerm)item;

                //item_IWeightTableTerm.
            }
        }
コード例 #7
0
        /// <summary>
        /// Non semantic matching
        /// </summary>
        /// <param name="table">The table.</param>
        /// <param name="term">The term.</param>
        /// <param name="callTableLevelAdd">Add supplied table, usully you don't want that</param>
        public void Add(IWeightTable table, IWeightTableTerm term, bool callTableLevelAdd = false) //, Int32 DFPoints = -1)
        {
            if (callTableLevelAdd)
            {
                table.Add(term);
            }

            if (table != AggregateDocument)
            {
                var t = AggregateDocument.Add(term);

                counter.AddVote(table, t);
            }
            else
            {
            }
        }
コード例 #8
0
        /// <summary>
        /// Copies matching terms from external document&gt; adds new if have to, rise the abs frequency if exists
        /// </summary>
        /// <param name="source">The source.</param>
        /// <param name="CopyFrequencies">if set to <c>true</c> [copy frequencies].</param>
        public void AddExternalDocument(IWeightTable source, bool CopyFrequencies)
        {
            int           c         = Count();
            List <string> new_terms = source.GetAllTermString();

            foreach (var nt in source.GetAllTerms())
            {
                if (CopyFrequencies)
                {
                    Add(source.GetMatchTermByName(nt), source.GetAFreq(nt));
                }
                else
                {
                    Add(source.GetMatchTermByName(nt));
                }
            }

            int a = Count() - c;
        }
コード例 #9
0
ファイル: textEvaluation.cs プロジェクト: gorangrubic/imbNLP
        /// <summary>
        /// Evaluates the specified input.
        /// </summary>
        /// <param name="input">The input.</param>
        protected void evaluate(IWeightTable table, bool sortByFrequency = true)
        {
            //if (inputText.Contains("protivpožarni"))
            //{

            //}



            if (inputTokens.Count == 0)
            {
                ratioA     = 0;
                resultMode = textEvaluationResultEnum.notEnoughInformation;
                return;
            }

            var sort = new List <string>();

            if (sortByFrequency)
            {
                foreach (var s in inputTokens)
                {
                    tokenFrequency.AddInstanceRange(inputTokens);
                }
                //  tokenFrequency.reCalculate();

                sort = tokenFrequency.getSorted();
            }
            else
            {
                sort.AddRange(inputTokens);
            }



            int take = Math.Min(sort.Count(), testSize);



            testTokens.AddRange(sort.Take(take));

            foreach (string tkn in testTokens)
            {
                bool testA = false;
                bool testB = false;

                //if (tkn.)


                testA = basicLanguageTools.testBoolean(languageA, tkn, basicLanguageCheck.spellCheck);

                testB = basicLanguageTools.testBoolean(languageB, tkn, basicLanguageCheck.spellCheck);


                bool testAB    = testA && testB;
                bool testNotAB = (!testA) && (!testB);


                while (testNotAB)
                {
                    if (parent.langNotABTokens.Contains(tkn))
                    {
                        testNotAB = true;
                        break;
                    }
                    else
                    {
                        testA = parent.langATokens.Contains(tkn);
                        testB = parent.langBTokens.Contains(tkn);

                        testNotAB = (!testA) && (!testB);

                        if (testNotAB)
                        {
                            lexiconResponse lemmas = parent.manager.getLexiconItems(tkn);
                            if (lemmas.type != lexiconResponse.responseType.failedQueries)
                            {
                                testA = true;
                                parent.langATokens.AddUnique(tkn);
                            }
                        }

                        testNotAB = (!testA) && (!testB);
                        if (testNotAB)
                        {
                            parent.langNotABTokens.AddUnique(tkn);
                        }
                    }

                    testNotAB = (!testA) && (!testB);
                }

                if (testA)
                {
                    langATokens.Add(tkn);
                }
                if (testB)
                {
                    langBTokens.Add(tkn);
                }

                testAB = testA && testB;

                if (testAB)
                {
                    langABTokens.Add(tkn);
                }
                if (testNotAB)
                {
                    langNotABTokens.AddUnique(tkn);
                }
            }

            if (table != null)
            {
                foreach (string tkA in langATokens)
                {
                    double sc = table.GetTF_IDF(tkA);

                    scoreForA += sc + 0.1;
                }

                foreach (string tkB in langBTokens)
                {
                    double sc = table.GetTF_IDF(tkB);
                    scoreForB += sc + 0.1;
                }

                foreach (string tkN in langNotABTokens)
                {
                    double sc = table.GetTF_IDF(tkN);
                    scoreForNotAB += sc + 0.1;
                }
            }
            else
            {
                scoreForA     = langATokens.Count();
                scoreForB     = langBTokens.Count();
                scoreForNotAB = langNotABTokens.Count();
            }

            //scoreForA = langATokens.Count() + langABTokens.Count();
            //scoreForB = langBTokens.Count() + langABTokens.Count();

            if ((scoreForA > scoreForB) && (scoreForA > scoreForNotAB))
            {
                isLanguageA = true;
                resultMode  = textEvaluationResultEnum.languageA;
            }

            if ((scoreForB > scoreForA) && (scoreForB > scoreForNotAB))
            {
                isLanguageB = true;
                resultMode  = textEvaluationResultEnum.languageB;
            }

            if ((!isLanguageA) && (!isLanguageB))
            {
                resultMode = textEvaluationResultEnum.noneOfBoth;
            }
            else
            {
                if (scoreForA == scoreForB)
                {
                    resultMode = textEvaluationResultEnum.uncertain;
                }
            }

            if (scoreForA == 0)
            {
                ratioA = 0;
            }
            else
            {
                double div = (double)(scoreForA + scoreForB + scoreForNotAB);
                if (div == 0)
                {
                    ratioA = 1;
                }
                else
                {
                    ratioA = (double)scoreForA / div;
                }
            }
        }
コード例 #10
0
ファイル: textEvaluation.cs プロジェクト: gorangrubic/imbNLP
 public void evaluateTokens(List <string> tokens, IWeightTable table, bool sortByFrequency = true)
 {
     inputTokens = tokens;
     evaluate(table, sortByFrequency);
 }
コード例 #11
0
        public static double GetScoreAggregate(this IEnumerable <IWeightTableTerm> terms, IWeightTable table, termTableColumns scoreToUse = termTableColumns.tf_idf, dataPointAggregationType aggregation = dataPointAggregationType.sum)
        {
            List <double> output = new List <double>();

            foreach (IWeightTableTerm term in terms)
            {
                switch (scoreToUse)
                {
                case termTableColumns.cw:
                    output.Add(table.GetWeight(term));
                    break;

                case termTableColumns.df:
                    output.Add(table.GetBDFreq(term));
                    break;

                case termTableColumns.freqAbs:
                    output.Add(table.GetAFreq(term));
                    break;

                case termTableColumns.freqNorm:
                    output.Add(table.GetNFreq(term));
                    break;

                case termTableColumns.idf:
                    output.Add(table.GetIDF(term));
                    break;

                case termTableColumns.ncw:
                    output.Add(table.GetNWeight(term));
                    break;

                case termTableColumns.none:
                    break;

                case termTableColumns.words:
                case termTableColumns.normalizedSemanticDistance:
                case termTableColumns.semanticDistance:
                case termTableColumns.termLemma:
                case termTableColumns.termName:
                    throw new NotImplementedException();
                    break;

                case termTableColumns.tf_idf:
                    output.Add(table.GetTF_IDF(term));
                    break;
                }
            }

            switch (aggregation)
            {
            case dataPointAggregationType.avg:
                return(output.Average());

                break;

            case dataPointAggregationType.count:
                return(output.Count());

                break;

            case dataPointAggregationType.max:
                return(output.Max());

                break;

            case dataPointAggregationType.min:
                return(output.Min());

                break;

            case dataPointAggregationType.range:
                return(output.Max() - output.Min());

                break;

            case dataPointAggregationType.sum:
                return(output.Sum());

                break;

            default:
                throw new dataException("Operation not supported [" + aggregation.toString() + "]", null, table, "Aggregation operation not supported");
                return(0);

                break;
            }

            return(0);
        }
コード例 #12
0
 public int AddVote(IWeightTable targetTable, IWeightTableTerm term)
 {
     this[term, targetTable] = this[term, targetTable] + term.AFreqPoints;
     return(this[term, targetTable]);
 }
コード例 #13
0
 public weightTableMatchCollection(IWeightTable __first, IWeightTable __second)
 {
     first  = __first;
     second = __second;
 }
コード例 #14
0
 public void AddExternalDocument(IWeightTable source, bool CopyFrequencies)
 {
     throw new NotImplementedException();
 }
コード例 #15
0
 public List <IWeightTableTerm> GetCrossSection(IWeightTable secondTable, bool thisAgainstSecond = false)
 {
     throw new NotImplementedException();
 }