//Double check the opinion words 80% protected string GetDoublePropagationOpinionWord(string OpinionWord, int index, string sentence) { FillConjunctions(); Lexicon(); if (!(index + 1 >= WordsInSentence.Length)) { if (Conjunctions.Contains(WordsInSentence[index + 1]) && OpinionLexicon.Contains(OpinionWord)) { return(OpinionWord); } else if (OpinionTargetList.Contains(WordsInSentence[index + 1]) && OpinionLexicon.Contains(OpinionWord)) { return(OpinionWord); } else { return(null); } } return(null); }
public bool Evaluate(ReadOnlyDictionary <string, string[]> properties) { return(Negate ^ ((Conjunctions != null ? Conjunctions.All(c => c.Evaluate(properties)) : true) && (Disjunctions != null ? Disjunctions.All(d => d.Evaluate(properties)) : true) && (Propositions != null ? Propositions.All(p => p.Evaluate(properties)) : true))); }
/// <summary> /// join conjunctions to surrounding pieces, e.g.: /// ['Mr. and Mrs.'], ['King of the Hill'], ['Jack and Jill'], ['Velasquez y Garcia'] /// </summary> /// <param name="pieces">name pieces strings after split on spaces</param> /// <param name="additionalPartsCount"></param> /// <returns>new list with piece next to conjunctions merged into one piece with spaces in it.</returns> internal static string[] joinOnConjunctions(List <string> pieces, int additionalPartsCount = 0) { var length = pieces.Count() + additionalPartsCount; // don't join on conjuctions if there are only 2 parts if (length < 3) { return(pieces.ToArray()); } foreach (var conj in pieces.Where(IsConjunction).Reverse()) { // loop through the pieces backwards, starting at the end of the list. // Join conjunctions to the pieces on either side of them. if (conj.Length == 1 && pieces.Count(IsRootname) < 4) { // if there are only 3 total parts (minus known titles, suffixes and prefixes) // and this conjunction is a single letter, prefer treating it as an initial // rather than a conjunction. // http://code.google.com/p/python-nameparser/issues/detail?id=11 continue; } var index = pieces.IndexOf(conj); if (index == -1) { continue; } if (index < pieces.Count() - 1) { // if this is not the last piece string newPiece; if (index == 0) { // if this is the first piece and it's a conjunction var nxt = pieces[index + 1]; var cons = IsTitle(nxt) ? Conjunctions : Titles; newPiece = string.Join(" ", pieces.Take(2)); cons.Add(newPiece); pieces[index] = newPiece; pieces.RemoveAt(index + 1); continue; } if (IsConjunction(pieces[index - 1])) { // if the piece in front of this one is a conjunction too, // add new_piece (this conjuction and the following piece) // to the conjuctions constant so that it is recognized // as a conjunction in the next loop. // e.g. for ["Lord","of","the Universe"], put "the Universe" // into the conjunctions constant. newPiece = string.Join(" ", pieces.Skip(index).Take(2)); Conjunctions.Add(newPiece); pieces[index] = newPiece; pieces.RemoveAt(index + 1); continue; } newPiece = string.Join(" ", pieces.Skip(index - 1).Take(3)); if (IsTitle(pieces[index - 1])) { // if the second name is a title, assume the first one is too and add the // two titles with the conjunction between them to the titles constant // so the combo we just created gets parsed as a title. // e.g. "Mr. and Mrs." becomes a title. Titles.Add(newPiece); } pieces[index - 1] = newPiece; pieces.RemoveAt(index); pieces.RemoveAt(index); } } // join prefixes to following lastnames: ['de la Vega'], ['van Buren'] var prefixes = pieces.Where(IsPrefix).ToArray(); if (prefixes.Length > 0) { var i = pieces.IndexOf(prefixes[0]); // join everything after the prefix until the next suffix var nextSuffix = pieces.Skip(i).Where(IsSuffix).ToArray(); if (nextSuffix.Length > 0) { var j = pieces.IndexOf(nextSuffix[0]); var newPiece = string.Join(" ", pieces.Skip(i).Take(j - i)); pieces = pieces .Take(i) .Concat(new[] { newPiece }) .Concat(pieces.Skip(j)) .ToList(); } else { var newPiece = string.Join(" ", pieces.Skip(i)); pieces = pieces.Take(i).ToList(); pieces.Add(newPiece); } } return(pieces.ToArray()); }
private static bool IsConjunction(string piece) { return(Conjunctions.Contains(piece.ToLower().Replace(".", string.Empty)) && !IsAnInitial(piece)); }
// Find an opinion word target 90% protected string GetOpinionWordTarget(string OpinionWord, int index, string sentence) { FillConjunctions(); FillAdverbs(); FillComparatives(); FillDecreasers(); FillFutureWords(); FillIncreasers(); FillVerbs(); FillPronouns(); FillNegations(); OpinionWord = WordsInSentence[index]; if (OpinionWord != null) { if (sentence.Contains(OpinionWord)) { int myvalue = WordsInSentence.GetUpperBound(0); if (myvalue >= index + 1) { Target = WordsInSentence[index + 1]; Target = Target.ToLower(); if ((!OpinionLexicon.Contains(Target)) && (!Conjunctions.Contains(Target)) && (!Comparatives.Contains(Target)) && (!FutureWords.Contains(Target)) && (!Adverbs.Contains(Target)) && (!Increasers.Contains(Target)) && (!Decreasers.Contains(Target)) && (!Verbs.Contains(Target)) && (!Pronouns.Contains(Target)) && (!Negations.Contains(Target))) { return(Target); } { if (Conjunctions.Contains(WordsInSentence[index + 1])) { Target = WordsInSentence[index + 2]; if ((!OpinionLexicon.Contains(Target)) && (!Conjunctions.Contains(Target)) && (!Comparatives.Contains(Target)) && (!FutureWords.Contains(Target)) && (!Adverbs.Contains(Target)) && (!Increasers.Contains(Target)) && (!Decreasers.Contains(Target)) && (!Verbs.Contains(Target)) && (!Pronouns.Contains(Target)) && (!Negations.Contains(Target))) { return(Target); } } } } else { return(null); } } } return(null); }
//Conjunction based extraction process 90% protected List <OpinionWord> ExtractConjunctionBasedOpinionWords() { FillPositiveWords(); FillNegativeWords(); PositionOfReview = 0; SentenceIndex = 0; ConjunctionList = new List <OpinionWord>(); foreach (var Opinion in Opinions) { SentenceIndex = 0; SentencesInReview = SentencesSeparator(SentencesInReview, PositionOfReview); foreach (var sentence in SentencesInReview) { WordsInSentence = EachWordInSentence(WordsInSentence, SentencesInReview, SentenceIndex); var i = -1; foreach (var word in WordsInSentence) { i++; var tempWord = word.ToLower(); var position = i; var opinionWord = new OpinionWord(tempWord, WordOrientation) { OpWord = tempWord, Orientation = WordOrientation }; //check if the opinion word is included in filteredSeed list var matches = FilteredSeed.Find(Didaxto => opinionWord.OpWord == tempWord); // var matches = FilteredSeed.Where(OpinionWord => OpinionWord.OpWord == tempWord); if (FilteredSeed.Contains(matches)) //matches.OfType<OpinionWord>().Equals(opinionWord) { if (position + 1 < WordsInSentence.Length) { if (Conjunctions.Contains(WordsInSentence[position + 1])) { tempWord = GetConjunctionBaseOpinionWord(tempWord, position, sentence); { if (PositiveWords.Contains(tempWord) || NegativeWords.Contains(tempWord)) { WordOrientation = GetOpinionWordOrientation(tempWord, position, sentence); if (!OpinionLexicon.Contains(tempWord)) { ConjunctionList.Add(new OpinionWord(tempWord, WordOrientation) { OpWord = tempWord, Orientation = WordOrientation }); } else { ConjunctionList.Add(new OpinionWord(tempWord, opinionWord.Orientation)); } } } } } } } } SentenceIndex++; Array.Clear(WordsInSentence, 0, WordsInSentence.Length); } PositionOfReview++; return(ConjunctionList); }
//Orientation=> if pos = true else false 80% protected bool GetOpinionWordOrientation(string word, int position, string sentence) { FillPositiveWords(); FillNegativeWords(); FillAdverbs(); FillComparatives(); FillConjunctions(); FillDecreasers(); FillFutureWords(); FillIncreasers(); FillVerbs(); FillNegations(); //{pos} if (PositiveWords.Contains(word)) { WordOrientation = true; } if (NegativeWords.Contains(word)) { WordOrientation = false; } if (position != 0) { //{pos} {pos} if (PositiveWords.Contains(WordsInSentence[position - 1])) { WordOrientation = true; } //{neg} {pos} if (NegativeWords.Contains(WordsInSentence[position - 1])) { WordOrientation = false; } if (position >= 2) { //{fut} {verb} {pos} if (FutureWords.Contains(WordsInSentence[position - 2]) && Verbs.Contains(WordsInSentence[position - 1]) && PositiveWords.Contains(WordsInSentence[position])) { WordOrientation = true; } //{pos} {neg} if (PositiveWords.Contains(WordsInSentence[position - 1]) && NegativeWords.Contains(WordsInSentence[position])) { WordOrientation = false; } //{decr} {comp} {pos} if (Decreasers.Contains(WordsInSentence[position - 2]) && Comparatives.Contains( WordsInSentence[position - 1]) && PositiveWords.Contains( WordsInSentence[position])) { WordOrientation = true; } if (position >= 3) { //{pos} {conj} {nego} {cpos} if (PositiveWords.Contains(WordsInSentence[position - 3]) && Conjunctions.Contains( WordsInSentence[position - 2]) && Negations.Contains( WordsInSentence[position - 1]) && PositiveWords.Contains( WordsInSentence[position])) { WordOrientation = true; } //{neg} {conj} {incr} {cneg} if (NegativeWords.Contains(WordsInSentence[position - 3]) && Conjunctions.Contains( WordsInSentence[position - 2]) && Increasers.Contains( WordsInSentence[position - 1]) && NegativeWords.Contains( WordsInSentence[position])) { WordOrientation = false; } } } } return(WordOrientation); }