示例#1
0
        public TextSource Process(IEnumerable<string> documents, string name)
        {
            TextSource result = new TextSource();

            foreach (var item in documents)
            {
                var doc = new DocumentSource();
                var sItem = item.Replace(".", " . ")
                                .Replace(",", " , ")
                                .Replace("\r\n", " \r##n ")
                                .Replace("\n", " \n ")
                                .Replace(",", " , ")
                                .Replace("##n", "\n");
                // consider punctuation marks/new line as seperate words

                foreach (var segment in sItem.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
                {
                    doc.LanguageSegments.Add(segment);
                }

                result.Documents.Add(doc);
            }

            result.Name = name;
            return result;
        }
示例#2
0
        public double P_c(CategoryProbabilityDistribution trainingDistribution, DocumentSource testData, int n, double prob_c)
        {
            var result = Math.Log10(prob_c);

            for (int i = 0; i <= testData.LanguageSegments.Count - n; i++)
            {
                IEnumerable<string> ngram = testData.LanguageSegments.Skip(i).Take(n).ToArray();
                var xyz = Math.Log10(trainingDistribution.GetProbability(ngram));

                if(xyz < -1000)
                {
                    xyz = -1000;
                }

                result += xyz;
            }

            return result;
        }
        public TextSource Process(IEnumerable<string> documents, string name)
        {
            TextSource result = new TextSource();

            foreach (var item in documents)
            {
                var doc = new DocumentSource();

                foreach (var segment in item)
                {
                    doc.LanguageSegments.Add(segment.ToString());
                }

                result.Documents.Add(doc);
            }

            result.Name = name;
            return result;
        }