C# (CSharp) Summarization.Utils Configuration示例

编程语言: C# (CSharp)

命名空间/包名称: Summarization.Utils

类/类型: Configuration

hotexamples.com的示例: 4

C# (CSharp) Summarization.Utils Configuration - 已找到4个示例。这些是从开源项目中提取的最受好评的Summarization.Utils.Configuration现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： TFIDF.cs 项目： BhargavUppalapu/DotNetNLPTools

        private static List<string> GetVocabulary(Document doc, out List<List<string>> stemmedDocs, Configuration config, int vocabularyThreshold)
        {
            List<string> vocabulary = new List<string>();
            Dictionary<string, int> wordCountList = new Dictionary<string, int>();
            stemmedDocs = new List<List<string>>();

            int docIndex = 0;
            foreach (Sentence sentence in doc.sentences)
            {
                List<string> stemmedDoc = new List<string>();
                docIndex++;
                //string[] parts2 = config.Tokenizer.Tokenize(sentence.sent);//??????
                //List<string> words = new List<string>();
                foreach (wordDetails part in sentence.words)
                {
                    // Strip non-alphanumeric characters.
                    string stripped = Regex.Replace(part.word, "[^a-zA-Z0-9]", "");
                    if (!StopWords.stopWordsList.Contains(stripped.ToLower()))
                    {
                        try
                        {
                            string stem = config.Stemmer.Stem(stripped);
                            if (stem.Length > 0)
                            {
                                // Build the word count list.
                                if (wordCountList.ContainsKey(stem))
                                {
                                    wordCountList[stem]++;
                                }
                                else
                                {
                                    wordCountList.Add(stem, 0);
                                }
                                stemmedDoc.Add(stem);
                            }
                        }
                        catch
                        {
                            Console.WriteLine("There is some error in Stemming");
                        }
                    }

                }
                stemmedDocs.Add(stemmedDoc);
            }

            // Get the top words.
            var vocabList = wordCountList.Where(w => w.Value >= vocabularyThreshold);

            foreach (var item in vocabList)
            {
                vocabulary.Add(item.Key);
            }

            return vocabulary;
        }

示例#2

显示文件

文件： ExtractionSummarizer.cs 项目： BhargavUppalapu/DotNetNLPTools

 public void Summarize(DataStructures.Document doc, Configuration config)
 {
     TFIDF.Transform(doc, config, 0);
     DescriptionSummary(doc);
     MustHaveSummary(doc);
     GoodToHaveSummary(doc);
     //?? Future work
     //Imporrtant Words count
     //Nouns Count
     //Verbs Count
 }

示例#3

显示文件

文件： TFIDF.cs 项目： BhargavUppalapu/DotNetNLPTools

        public static void Transform(Document doc, Configuration config, int vocabularyThreshold = 3)
        {
            List<List<string>> stemmedDocs;
            List<string> vocabulary;

            // Get the vocabulary and stem the documents at the same time.

            vocabulary = GetVocabulary(doc, out stemmedDocs, config, vocabularyThreshold);

            if (_IDF.Count == 0)
            {
                // Calculate the IDF for each vocabulary term.
                foreach (var term in vocabulary)
                {
                    double numberOfDocsContainingTerm = stemmedDocs.Where(d => d.Contains(term)).Count();
                    _IDF[term] = Math.Log((double)stemmedDocs.Count / ((double)1 + numberOfDocsContainingTerm));
                }
            }

            // Transform each document into a vector of tfidf values.
            TransformToTFIDFVectors(doc, config, _IDF);
        }

示例#4

显示文件

文件： TFIDF.cs 项目： BhargavUppalapu/DotNetNLPTools

        private static void TransformToTFIDFVectors(Document doc, Configuration config, Dictionary<string, double> vocabularyIDF)
        {
            foreach (Sentence S in doc.sentences)
            {

                List<double> vector = new List<double>();

                foreach (var vocab in vocabularyIDF)
                {
                    // Term frequency = count how many times the term appears in this document.
                    double tf = S.words.Where(d => d.word == vocab.Key).Count();
                    double tfidf = tf * vocab.Value;
                    vector.Add(tfidf);
                }
                double[] tfids = vector.Select(v => v).ToArray();
                tfids = L2Normalization.Normalize(tfids);
                foreach (double tfidf in tfids)
                {
                    S.TFIDFScore = S.TFIDFScore + tfidf;
                }
            }
        }