Exemplo n.º 1
0
        public TextSource Process(IEnumerable<string> documents, string name)
        {
            TextSource result = new TextSource();

            foreach (var item in documents)
            {
                var doc = new DocumentSource();
                var sItem = item.Replace(".", " . ")
                                .Replace(",", " , ")
                                .Replace("\r\n", " \r##n ")
                                .Replace("\n", " \n ")
                                .Replace(",", " , ")
                                .Replace("##n", "\n");
                // consider punctuation marks/new line as seperate words

                foreach (var segment in sItem.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
                {
                    doc.LanguageSegments.Add(segment);
                }

                result.Documents.Add(doc);
            }

            result.Name = name;
            return result;
        }
Exemplo n.º 2
0
        public TextSource Process(IEnumerable <string> documents, string name)
        {
            TextSource result = new TextSource();

            foreach (var item in documents)
            {
                var doc   = new DocumentSource();
                var sItem = item.Replace(".", " . ")
                            .Replace(",", " , ")
                            .Replace("\r\n", " \r##n ")
                            .Replace("\n", " \n ")
                            .Replace(",", " , ")
                            .Replace("##n", "\n");
                // consider punctuation marks/new line as seperate words

                foreach (var segment in sItem.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
                {
                    doc.LanguageSegments.Add(segment);
                }

                result.Documents.Add(doc);
            }

            result.Name = name;
            return(result);
        }
Exemplo n.º 3
0
        public double P_c(CategoryProbabilityDistribution trainingDistribution, DocumentSource testData, int n, double prob_c)
        {
            var result = Math.Log10(prob_c);
            var source = testData.LanguageSegments.ToArray();

            for (int i = 0; i <= source.Length - n; i++)
            {
                string[] ngram = source.GetNGram(i, n);
                var logProb = Math.Log10(trainingDistribution.GetProbability(ngram));
                result += logProb;
            }

            return result;
        }
        public TextSource Process(IEnumerable<string> documents, string name)
        {
            TextSource result = new TextSource();

            foreach (var item in documents)
            {
                var doc = new DocumentSource();

                foreach (var segment in item)
                {
                    doc.LanguageSegments.Add(segment.ToString());
                }

                result.Documents.Add(doc);
            }

            result.Name = name;
            return result;
        }
Exemplo n.º 5
0
        public TextSource Process(IEnumerable <string> documents, string name)
        {
            TextSource result = new TextSource();

            foreach (var item in documents)
            {
                var doc = new DocumentSource();

                foreach (var segment in item)
                {
                    doc.LanguageSegments.Add(segment.ToString());
                }

                result.Documents.Add(doc);
            }

            result.Name = name;
            return(result);
        }