/**
         * <summary> The method disambiguates words with multiple possible root words in its morphological parses. If the word
         * is already morphologically disambiguated, the method does not disambiguate that word. The method first check
         * for multiple root words by using rootWords method. If there are multiple root words, the method select the most
         * occurring root word (if its occurence wrt other root words occurence is above some threshold) for that word
         * using the bestRootWord method. If root word is selected, then the case for single root word is called.</summary>
         * <param name="sentence">The sentence to be disambiguated automatically.</param>
         */
        protected override void AutoDisambiguateMultipleRootWords(AnnotatedSentence.AnnotatedSentence sentence)
        {
            FsmParseList[]  fsmParses     = morphologicalAnalyzer.RobustMorphologicalAnalysis(sentence);
            List <FsmParse> correctParses = longestRootFirstDisambiguation.Disambiguate(fsmParses);

            for (int i = 0; i < sentence.WordCount(); i++)
            {
                AnnotatedWord word = (AnnotatedWord)sentence.GetWord(i);
                if (word.GetParse() == null)
                {
                    SetParseAutomatically(correctParses[i], word);
                }
            }
        }
コード例 #2
0
        /**
         * <summary> The method checks for the MONEY entities using regular expressions. After that, if the expression is a MONEY
         * expression, it also assigns the previous text, which may included numbers or some monetarial texts, MONEY tag.</summary>
         * <param name="sentence">The sentence for which MONEY named entities checked.</param>
         */
        protected override void AutoDetectMoney(AnnotatedSentence.AnnotatedSentence sentence)
        {
            for (var i = 0; i < sentence.WordCount(); i++)
            {
                var word          = (AnnotatedWord)sentence.GetWord(i);
                var wordLowercase = word.GetName().ToLower(new CultureInfo("tr"));
                if (word.GetParse() != null)
                {
                    if (Word.IsMoney(wordLowercase))
                    {
                        word.SetNamedEntityType("MONEY");
                        var j = i - 1;
                        while (j >= 0)
                        {
                            AnnotatedWord previous = (AnnotatedWord)sentence.GetWord(j);
                            if (previous.GetParse() != null && (previous.GetName().Equals("amerikan") ||
                                                                previous.GetParse()
                                                                .ContainsTag(MorphologicalTag.REAL) ||
                                                                previous.GetParse()
                                                                .ContainsTag(MorphologicalTag.CARDINAL) ||
                                                                previous.GetParse()
                                                                .ContainsTag(MorphologicalTag.NUMBER)))
                            {
                                previous.SetNamedEntityType("MONEY");
                            }
                            else
                            {
                                break;
                            }

                            j--;
                        }
                    }
                }
            }
        }
コード例 #3
0
 /**
  * <summary> The method checks for the TIME entities using regular expressions. After that, if the expression is a TIME
  * expression, it also assigns the previous texts, which are numbers, TIME tag.</summary>
  * <param name="sentence">The sentence for which TIME named entities checked.</param>
  */
 protected override void AutoDetectTime(AnnotatedSentence.AnnotatedSentence sentence)
 {
     for (var i = 0; i < sentence.WordCount(); i++)
     {
         var word          = (AnnotatedWord)sentence.GetWord(i);
         var wordLowercase = word.GetName().ToLower(new CultureInfo("tr"));
         if (word.GetParse() != null)
         {
             if (Word.IsTime(wordLowercase))
             {
                 word.SetNamedEntityType("TIME");
                 if (i > 0)
                 {
                     AnnotatedWord previous = (AnnotatedWord)sentence.GetWord(i - 1);
                     if (previous.GetParse().ContainsTag(MorphologicalTag.CARDINAL))
                     {
                         previous.SetNamedEntityType("TIME");
                     }
                 }
             }
         }
     }
 }
コード例 #4
0
        public WordNodePair(AnnotatedWord annotatedWord, int no)
        {
            _annotatedWord = annotatedWord;
            ParseNodeDrawable parent;

            if (GetUniversalDependency().Equals("ADVMOD"))
            {
                parent = new ParseNodeDrawable(new Symbol("ADVP"));
            }
            else if (GetUniversalDependency().Equals("ACL"))
            {
                parent = new ParseNodeDrawable(new Symbol("ADJP"));
            }
            else
            {
                parent = new ParseNodeDrawable(new Symbol(annotatedWord.GetParse().GetTreePos()));
            }
            _node = new ParseNodeDrawable(parent, annotatedWord.ToString().Replace("\\(", "-LRB-").Replace("\\)", "-RRB-"), true, 0);
            parent.AddChild(_node);
            _no             = no;
            _doneForConnect = false;
            _doneForHead    = false;
        }
        protected List <SynSet> GetCandidateSynSets(WordNet.WordNet wordNet, FsmMorphologicalAnalyzer fsm,
                                                    AnnotatedSentence.AnnotatedSentence sentence, int index)
        {
            AnnotatedWord twoPrevious = null, previous = null, current, twoNext = null, next = null;
            var           synSets = new List <SynSet>();

            current = (AnnotatedWord)sentence.GetWord(index);
            if (index > 1)
            {
                twoPrevious = (AnnotatedWord)sentence.GetWord(index - 2);
            }

            if (index > 0)
            {
                previous = (AnnotatedWord)sentence.GetWord(index - 1);
            }

            if (index != sentence.WordCount() - 1)
            {
                next = (AnnotatedWord)sentence.GetWord(index + 1);
            }

            if (index < sentence.WordCount() - 2)
            {
                twoNext = (AnnotatedWord)sentence.GetWord(index + 2);
            }

            synSets = wordNet.ConstructSynSets(current.GetParse().GetWord().GetName(),
                                               current.GetParse(), current.GetMetamorphicParse(), fsm);
            if (twoPrevious?.GetParse() != null && previous?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(twoPrevious.GetParse(), previous.GetParse(),
                                                               current.GetParse(),
                                                               twoPrevious.GetMetamorphicParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(),
                                                               fsm));
            }

            if (previous?.GetParse() != null && next?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(), next.GetParse(),
                                                               previous.GetMetamorphicParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm));
            }

            if (next?.GetParse() != null && twoNext?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(), twoNext.GetParse(),
                                                               current.GetMetamorphicParse(), next.GetMetamorphicParse(), twoNext.GetMetamorphicParse(), fsm));
            }

            if (previous?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(),
                                                               previous.GetMetamorphicParse(), current.GetMetamorphicParse(), fsm));
            }

            if (next?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(),
                                                               current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm));
            }

            return(synSets);
        }
        /**
         * <summary> The method checks
         * 1. the previous two words and the current word; the previous, current and next word, current and the next
         * two words for a three word multiword expression that occurs in the Turkish wordnet.
         * 2. the previous word and current word; current word and the next word for a two word multiword expression that
         * occurs in the Turkish wordnet.
         * 3. the current word
         * if it has only one sense. If there is only one sense for that multiword expression or word; it sets that sense.</summary>
         * <param name="sentence">The sentence for which word sense disambiguation will be determined automatically.</param>
         */
        protected override bool AutoLabelSingleSemantics(AnnotatedSentence.AnnotatedSentence sentence)
        {
            AnnotatedWord twoPrevious = null, previous = null;
            AnnotatedWord twoNext = null, next = null;

            for (var i = 0; i < sentence.WordCount(); i++)
            {
                var current = (AnnotatedWord)sentence.GetWord(i);
                if (i > 1)
                {
                    twoPrevious = (AnnotatedWord)sentence.GetWord(i - 2);
                }

                if (i > 0)
                {
                    previous = (AnnotatedWord)sentence.GetWord(i - 1);
                }

                if (i != sentence.WordCount() - 1)
                {
                    next = (AnnotatedWord)sentence.GetWord(i + 1);
                }

                if (i < sentence.WordCount() - 2)
                {
                    twoNext = (AnnotatedWord)sentence.GetWord(i + 2);
                }

                if (current.GetSemantic() == null && current.GetParse() != null)
                {
                    if (previous != null && twoPrevious != null && twoPrevious.GetParse() != null &&
                        previous.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(twoPrevious.GetParse(),
                                                                           previous.GetParse(), current.GetParse(), twoPrevious.GetMetamorphicParse(),
                                                                           previous.GetMetamorphicParse(), current.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (previous != null && previous.GetParse() != null && next != null && next.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(previous.GetParse(),
                                                                           current.GetParse(), next.GetParse(), previous.GetMetamorphicParse(),
                                                                           current.GetMetamorphicParse(), next.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (next != null && next.GetParse() != null && twoNext != null && twoNext.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(current.GetParse(),
                                                                           next.GetParse(), twoNext.GetParse(), current.GetMetamorphicParse(),
                                                                           next.GetMetamorphicParse(), twoNext.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (previous != null && previous.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(previous.GetParse(),
                                                                           current.GetParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (current.GetSemantic() == null && next != null && next.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(current.GetParse(),
                                                                           next.GetParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    var meanings = _turkishWordNet.ConstructSynSets(current.GetParse().GetWord().GetName(),
                                                                    current.GetParse(), current.GetMetamorphicParse(), _fsm);
                    if (current.GetSemantic() == null && meanings.Count == 1)
                    {
                        current.SetSemantic(meanings[0].GetId());
                    }
                }
            }

            return(true);
        }
コード例 #7
0
 public string GetTreePos()
 {
     return(_annotatedWord.GetParse().GetTreePos());
 }
コード例 #8
0
        private string FindData(string dependent, string head, bool condition1, bool condition2, AnnotatedWord dependentWord, AnnotatedWord headWord)
        {
            if (condition1 || condition2)
            {
                return("PUNCT");
            }
            switch (dependent)
            {
            case "ADVP":
                if (dependentWord.GetParse().GetRootPos().Equals("VERB"))
                {
                    return("ADVCL");
                }
                if (dependentWord.GetParse().GetRootPos().Equals("NOUN"))
                {
                    return("NMOD");
                }
                return("ADVMOD");

            case "ADJP":
                switch (head)
                {
                case "NP":
                    if (dependentWord.GetParse().GetRootPos().Equals("VERB"))
                    {
                        return("ACL");
                    }
                    return("AMOD");
                }
                return("ADVMOD");

            case "PP":
                switch (head)
                {
                case "NP":
                    return("CASE");

                default:
                    if (dependentWord.GetParse() != null && dependentWord.GetParse().GetRootPos().Equals("NOUN"))
                    {
                        return("NMOD");
                    }
                    return("ADVMOD");
                }

            case "DP":
                return("DET");

            case "NP":
                switch (head)
                {
                case "NP":
                    if (dependentWord.GetParse().ContainsTag(MorphologicalTag.PROPERNOUN) && headWord.GetParse().ContainsTag(MorphologicalTag.PROPERNOUN))
                    {
                        return("FLAT");
                    }
                    if (dependentWord.GetSemantic() != null && headWord.GetSemantic() != null && dependentWord.GetSemantic().Equals(headWord.GetSemantic()))
                    {
                        return("COMPOUND");
                    }
                    return("NMOD");

                case "VP":
                    if (dependentWord.GetSemantic() != null && headWord.GetSemantic() != null && dependentWord.GetSemantic().Equals(headWord.GetSemantic()))
                    {
                        return("COMPOUND");
                    }
                    if (dependentWord.GetParse().ContainsTag(MorphologicalTag.NOMINATIVE) || dependentWord.GetParse().ContainsTag(MorphologicalTag.ACCUSATIVE))
                    {
                        return("OBJ");
                    }
                    return("OBL");
                }
                return("NMOD");

            case "S":
                switch (head)
                {
                case "VP":
                    return("CCOMP");

                default:
                    return("DEP");
                }

            case "NUM":
                return("NUMMOD");

            case "INTJ":
                return("DISCOURSE");

            case "NEG":
                return("NEG");

            case "CONJP":
                return("CC");

            default:
                return("DEP");
            }
        }