コード例 #1
0
        public double Compute(string sentence)
        {
            Dictionary <string, bool> words         = new Dictionary <string, bool>();
            HashSet <string>          negationWords = new HashSet <string>()
            {
                "not", "nor", "no"
            };

            var textFiles = new Dictionary <string, bool>
            {
                { "Lexicon\\NegativeWords.txt", false },
                { "Lexicon\\PositiveWords.txt", true }
            };


            foreach (var item in textFiles)
            {
                using (StreamReader sr = new StreamReader(item.Key))
                {
                    while (!sr.EndOfStream)
                    {
                        var wrd = sr.ReadLine();
                        words[wrd] = item.Value;
                    }
                }
            }

            var sentenceWords = TextParser.SplitToWords(sentence);

            double positiveRatio  = 0.0;
            double totalSum       = 0.0;
            double negationFactor = -1;

            foreach (var word in sentenceWords)
            {
                var lowerWord = word.ToLower();

                if (negationWords.Contains(lowerWord))
                {
                    negationFactor = -negationFactor;
                }

                bool isPositive;
                if (!words.TryGetValue(word, out isPositive))
                {
                    continue;
                }
                positiveRatio += negationFactor * (isPositive ? 1 : -1);
                totalSum      += 1;
            }

            if (totalSum > 0)
            {
                return(positiveRatio / totalSum);
            }

            return(0);
        }
コード例 #2
0
        public int Compute(string sentence)
        {
            var sample           = new DataSample();
            var sampleDataPoints = new List <DataPoint>();

            var sentenceWords = TextParser.SplitToWords(sentence).ToArray();
            var isNegated     = false;

            for (int index = 0; index < sentenceWords.Length; index++)
            {
                var currentWord = sentenceWords[index].ToLower();
                if (_negationWords.Contains(currentWord))
                {
                    isNegated = !isNegated;
                }
                else
                {
                    if (currentWord.EndsWith("n't"))
                    {
                        isNegated = !isNegated;
                    }
                    else
                    {
                        currentWord = isNegated ? "not_" + currentWord : currentWord;
                    }


                    if (_wordDictionary.ContainsKey(currentWord))
                    {
                        sampleDataPoints.Add(new DataPoint {
                            ColumnId = _wordDictionary[currentWord], Value = 1
                        });
                    }
                }
            }
            sample.DataPoints = sampleDataPoints.ToArray();

            return(_naiveBayesClassifier.Compute(sample));
        }
コード例 #3
0
        public void Train(IEnumerable <Tuple <string, string> > trainingSet, int count)
        {
            _wordDictionary = new Dictionary <string, int>();
            _classes        = new Dictionary <string, int>();

            DataSample[] samples           = new DataSample[count];
            int          wordId            = 0;
            int          classId           = 0;
            var          trainingItemIndex = 0;

            trainingSet = trainingSet.Take(count);
            foreach (var trainingItem in trainingSet)
            {
                string[] sentences  = { trainingItem.Item1 };
                var      classValue = trainingItem.Item2;
                if (!_classes.ContainsKey(classValue))
                {
                    _classes.Add(classValue, classId);
                    classId++;
                }

                var dataSample = new DataSample
                {
                    ClassId = _classes[classValue]
                };

                var sampleDataPoints = new List <DataPoint>();

                foreach (var sentence in sentences)
                {
                    var sentenceWords = TextParser.SplitToWords(sentence);
                    var isNegated     = false;
                    for (int index = 0; index < sentenceWords.Count; index++)
                    {
                        var currentWord = sentenceWords[index].ToLower();
                        if (currentWord.StartsWith("@"))
                        {
                            continue;
                        }
                        if (_negationWords.Contains(currentWord))
                        {
                            isNegated = !isNegated;
                        }
                        else
                        {
                            if (currentWord.EndsWith("n't"))
                            {
                                isNegated = !isNegated;
                            }
                            else
                            {
                                currentWord = isNegated ? "not_" + currentWord : currentWord;
                            }


                            if (!_wordDictionary.ContainsKey(currentWord))
                            {
                                _wordDictionary.Add(currentWord, wordId);
                                wordId++;
                            }

                            sampleDataPoints.Add(new DataPoint {
                                ColumnId = _wordDictionary[currentWord], Value = 1
                            });
                        }
                    }
                }
                dataSample.DataPoints      = sampleDataPoints.ToArray();
                samples[trainingItemIndex] = dataSample;

                trainingItemIndex++;
            }
            _columnsDataTypes = new ColumnDataType[wordId];
            for (var index = 0; index < wordId; index++)
            {
                _columnsDataTypes[index] = new ColumnDataType {
                    IsDiscrete = true, NumberOfCategories = 2
                };
            }

            _naiveBayesClassifier = new NaiveBayesClassifier(samples, 2, _columnsDataTypes);
        }