コード例 #1
0
 public WordNodePair(ParseNodeDrawable parseNodeDrawable, int no)
 {
     _node           = parseNodeDrawable;
     _annotatedWord  = new AnnotatedWord(parseNodeDrawable.GetLayerData());
     _doneForConnect = false;
     _no             = no;
 }
コード例 #2
0
        public async Task <IHttpActionResult> PutAnnotatedWord(int id, AnnotatedWord annotatedWord)
        {
            if (!ModelState.IsValid)
            {
                return(BadRequest(ModelState));
            }

            if (id != annotatedWord.Id)
            {
                return(BadRequest());
            }

            db.Entry(annotatedWord).State = EntityState.Modified;

            try
            {
                await db.SaveChangesAsync();
            }
            catch (DbUpdateConcurrencyException)
            {
                if (!AnnotatedWordExists(id))
                {
                    return(NotFound());
                }
                else
                {
                    throw;
                }
            }

            return(StatusCode(HttpStatusCode.NoContent));
        }
コード例 #3
0
        public void TestAccuracy()
        {
            int correct = 0, total = 0;
            MostFrequentSentenceAutoSemantic mostFrequentSentenceAutoSemantic = new MostFrequentSentenceAutoSemantic(wordNet, fsm);
            AnnotatedCorpus corpus1 = new AnnotatedCorpus("../../../new-sentences");
            AnnotatedCorpus corpus2 = new AnnotatedCorpus("../../../old-sentences");

            for (int i = 0; i < corpus1.SentenceCount(); i++)
            {
                var sentence1 = (AnnotatedSentence.AnnotatedSentence)corpus1.GetSentence(i);
                mostFrequentSentenceAutoSemantic.AutoSemantic(sentence1);
                var sentence2 = (AnnotatedSentence.AnnotatedSentence)corpus2.GetSentence(i);
                for (int j = 0; j < sentence1.WordCount(); j++)
                {
                    total++;
                    AnnotatedWord word1 = (AnnotatedWord)sentence1.GetWord(j);
                    AnnotatedWord word2 = (AnnotatedWord)sentence2.GetWord(j);
                    if (word1.GetSemantic() != null && word1.GetSemantic().Equals(word2.GetSemantic()))
                    {
                        correct++;
                    }
                }
            }
            Assert.AreEqual(549, total);
            Assert.AreEqual(277, correct);
        }
コード例 #4
0
        public async Task <IHttpActionResult> GetAnnotatedWord(int id)
        {
            AnnotatedWord annotatedWord = await db.AnnotatedWords.FindAsync(id);

            if (annotatedWord == null)
            {
                return(NotFound());
            }

            return(Ok(annotatedWord));
        }
コード例 #5
0
        public async Task <IHttpActionResult> PostAnnotatedWord(AnnotatedWord annotatedWord)
        {
            if (!ModelState.IsValid)
            {
                return(BadRequest(ModelState));
            }

            db.AnnotatedWords.Add(annotatedWord);
            await db.SaveChangesAsync();

            return(CreatedAtRoute("DefaultApi", new { id = annotatedWord.Id }, annotatedWord));
        }
        /**
         * <summary> The method disambiguates words with multiple possible root words in its morphological parses. If the word
         * is already morphologically disambiguated, the method does not disambiguate that word. The method first check
         * for multiple root words by using rootWords method. If there are multiple root words, the method select the most
         * occurring root word (if its occurence wrt other root words occurence is above some threshold) for that word
         * using the bestRootWord method. If root word is selected, then the case for single root word is called.</summary>
         * <param name="sentence">The sentence to be disambiguated automatically.</param>
         */
        protected override void AutoDisambiguateMultipleRootWords(AnnotatedSentence.AnnotatedSentence sentence)
        {
            FsmParseList[]  fsmParses     = morphologicalAnalyzer.RobustMorphologicalAnalysis(sentence);
            List <FsmParse> correctParses = longestRootFirstDisambiguation.Disambiguate(fsmParses);

            for (int i = 0; i < sentence.WordCount(); i++)
            {
                AnnotatedWord word = (AnnotatedWord)sentence.GetWord(i);
                if (word.GetParse() == null)
                {
                    SetParseAutomatically(correctParses[i], word);
                }
            }
        }
コード例 #7
0
        public async Task <IHttpActionResult> DeleteAnnotatedWord(int id)
        {
            AnnotatedWord annotatedWord = await db.AnnotatedWords.FindAsync(id);

            if (annotatedWord == null)
            {
                return(NotFound());
            }

            db.AnnotatedWords.Remove(annotatedWord);
            await db.SaveChangesAsync();

            return(Ok(annotatedWord));
        }
コード例 #8
0
        public AnnotatedSentence.AnnotatedSentence GenerateAnnotatedSentence(string language)
        {
            var sentence = new AnnotatedSentence.AnnotatedSentence("");
            var nodeDrawableCollector =
                new NodeDrawableCollector((ParseNodeDrawable)root, new IsEnglishLeafNode());
            var leafList = nodeDrawableCollector.Collect();

            foreach (var parseNode in leafList)
            {
                var newWord = new AnnotatedWord("{" + language + "=" + parseNode.GetData().GetName() + "}{posTag="
                                                + parseNode.GetParent().GetData().GetName() + "}");
                sentence.AddWord(newWord);
            }

            return(sentence);
        }
        private void SetRoot(List <WordNodePair> wordNodePairList)
        {
            AnnotatedWord last = null;

            for (var i = 0; i < wordNodePairList.Count; i++)
            {
                var wordNodePair = wordNodePairList[wordNodePairList.Count - i - 1];
                if (!wordNodePair.GetWord().IsPunctuation())
                {
                    last = wordNodePair.GetWord();
                    break;
                }
            }
            if (last != null)
            {
                last.SetUniversalDependency(0, "ROOT");
            }
        }
コード例 #10
0
 /**
  * <summary> The method checks for the TIME entities using regular expressions. After that, if the expression is a TIME
  * expression, it also assigns the previous texts, which are numbers, TIME tag.</summary>
  * <param name="sentence">The sentence for which TIME named entities checked.</param>
  */
 protected override void AutoDetectTime(AnnotatedSentence.AnnotatedSentence sentence)
 {
     for (var i = 0; i < sentence.WordCount(); i++)
     {
         var word          = (AnnotatedWord)sentence.GetWord(i);
         var wordLowercase = word.GetName().ToLower(new CultureInfo("tr"));
         if (word.GetParse() != null)
         {
             if (Word.IsTime(wordLowercase))
             {
                 word.SetNamedEntityType("TIME");
                 if (i > 0)
                 {
                     AnnotatedWord previous = (AnnotatedWord)sentence.GetWord(i - 1);
                     if (previous.GetParse().ContainsTag(MorphologicalTag.CARDINAL))
                     {
                         previous.SetNamedEntityType("TIME");
                     }
                 }
             }
         }
     }
 }
コード例 #11
0
        public WordNodePair(AnnotatedWord annotatedWord, int no)
        {
            _annotatedWord = annotatedWord;
            ParseNodeDrawable parent;

            if (GetUniversalDependency().Equals("ADVMOD"))
            {
                parent = new ParseNodeDrawable(new Symbol("ADVP"));
            }
            else if (GetUniversalDependency().Equals("ACL"))
            {
                parent = new ParseNodeDrawable(new Symbol("ADJP"));
            }
            else
            {
                parent = new ParseNodeDrawable(new Symbol(annotatedWord.GetParse().GetTreePos()));
            }
            _node = new ParseNodeDrawable(parent, annotatedWord.ToString().Replace("\\(", "-LRB-").Replace("\\)", "-RRB-"), true, 0);
            parent.AddChild(_node);
            _no             = no;
            _doneForConnect = false;
            _doneForHead    = false;
        }
コード例 #12
0
        public AnnotatedWord ToAnnotatedWord(int wordIndex)
        {
            AnnotatedWord annotatedWord = new AnnotatedWord(GetTurkishWordAt(wordIndex));

            if (LayerExists(ViewLayerType.INFLECTIONAL_GROUP))
            {
                annotatedWord.SetParse(GetMorphologicalParseAt(wordIndex).ToString());
            }

            if (LayerExists(ViewLayerType.META_MORPHEME))
            {
                annotatedWord.SetMetamorphicParse(GetMetamorphicParseAt(wordIndex).ToString());
            }

            if (LayerExists(ViewLayerType.SEMANTICS))
            {
                annotatedWord.SetSemantic(GetSemanticAt(wordIndex));
            }

            if (LayerExists(ViewLayerType.NER))
            {
                annotatedWord.SetNamedEntityType(GetLayerData(ViewLayerType.NER));
            }

            if (LayerExists(ViewLayerType.PROPBANK))
            {
                annotatedWord.SetArgument(GetArgument().ToString());
            }

            if (LayerExists(ViewLayerType.SHALLOW_PARSE))
            {
                annotatedWord.SetShallowParse(GetShallowParseAt(wordIndex));
            }

            return(annotatedWord);
        }
コード例 #13
0
        /**
         * <summary> The method checks for the MONEY entities using regular expressions. After that, if the expression is a MONEY
         * expression, it also assigns the previous text, which may included numbers or some monetarial texts, MONEY tag.</summary>
         * <param name="sentence">The sentence for which MONEY named entities checked.</param>
         */
        protected override void AutoDetectMoney(AnnotatedSentence.AnnotatedSentence sentence)
        {
            for (var i = 0; i < sentence.WordCount(); i++)
            {
                var word          = (AnnotatedWord)sentence.GetWord(i);
                var wordLowercase = word.GetName().ToLower(new CultureInfo("tr"));
                if (word.GetParse() != null)
                {
                    if (Word.IsMoney(wordLowercase))
                    {
                        word.SetNamedEntityType("MONEY");
                        var j = i - 1;
                        while (j >= 0)
                        {
                            AnnotatedWord previous = (AnnotatedWord)sentence.GetWord(j);
                            if (previous.GetParse() != null && (previous.GetName().Equals("amerikan") ||
                                                                previous.GetParse()
                                                                .ContainsTag(MorphologicalTag.REAL) ||
                                                                previous.GetParse()
                                                                .ContainsTag(MorphologicalTag.CARDINAL) ||
                                                                previous.GetParse()
                                                                .ContainsTag(MorphologicalTag.NUMBER)))
                            {
                                previous.SetNamedEntityType("MONEY");
                            }
                            else
                            {
                                break;
                            }

                            j--;
                        }
                    }
                }
            }
        }
コード例 #14
0
        private string FindData(string dependent, string head, bool condition1, bool condition2, AnnotatedWord dependentWord, AnnotatedWord headWord)
        {
            if (condition1 || condition2)
            {
                return("PUNCT");
            }
            switch (dependent)
            {
            case "ADVP":
                if (dependentWord.GetParse().GetRootPos().Equals("VERB"))
                {
                    return("ADVCL");
                }
                if (dependentWord.GetParse().GetRootPos().Equals("NOUN"))
                {
                    return("NMOD");
                }
                return("ADVMOD");

            case "ADJP":
                switch (head)
                {
                case "NP":
                    if (dependentWord.GetParse().GetRootPos().Equals("VERB"))
                    {
                        return("ACL");
                    }
                    return("AMOD");
                }
                return("ADVMOD");

            case "PP":
                switch (head)
                {
                case "NP":
                    return("CASE");

                default:
                    if (dependentWord.GetParse() != null && dependentWord.GetParse().GetRootPos().Equals("NOUN"))
                    {
                        return("NMOD");
                    }
                    return("ADVMOD");
                }

            case "DP":
                return("DET");

            case "NP":
                switch (head)
                {
                case "NP":
                    if (dependentWord.GetParse().ContainsTag(MorphologicalTag.PROPERNOUN) && headWord.GetParse().ContainsTag(MorphologicalTag.PROPERNOUN))
                    {
                        return("FLAT");
                    }
                    if (dependentWord.GetSemantic() != null && headWord.GetSemantic() != null && dependentWord.GetSemantic().Equals(headWord.GetSemantic()))
                    {
                        return("COMPOUND");
                    }
                    return("NMOD");

                case "VP":
                    if (dependentWord.GetSemantic() != null && headWord.GetSemantic() != null && dependentWord.GetSemantic().Equals(headWord.GetSemantic()))
                    {
                        return("COMPOUND");
                    }
                    if (dependentWord.GetParse().ContainsTag(MorphologicalTag.NOMINATIVE) || dependentWord.GetParse().ContainsTag(MorphologicalTag.ACCUSATIVE))
                    {
                        return("OBJ");
                    }
                    return("OBL");
                }
                return("NMOD");

            case "S":
                switch (head)
                {
                case "VP":
                    return("CCOMP");

                default:
                    return("DEP");
                }

            case "NUM":
                return("NUMMOD");

            case "INTJ":
                return("DISCOURSE");

            case "NEG":
                return("NEG");

            case "CONJP":
                return("CC");

            default:
                return("DEP");
            }
        }
コード例 #15
0
 public PackagedNextWord(AnnotatedSentence a, bool f)
 {
     annotatedWord = null; annotatedSentence = a;filler = f;
 }
 /**
  * <summary> If the words has only single root in its possible parses, the method disambiguates by looking special cases.
  * The cases are implemented in the caseDisambiguator method.</summary>
  * <param name="fsmParseList">Morphological parses of the word.</param>
  * <param name="word">Word to be disambiguated.</param>
  */
 private void SetParseAutomatically(FsmParse disambiguatedParse, AnnotatedWord word)
 {
     word.SetParse(disambiguatedParse.TransitionList());
     word.SetMetamorphicParse(disambiguatedParse.WithList());
 }
コード例 #17
0
 public PackagedAnnotatedWord(AnnotatedWord a, bool f)
 {
     annotatedWord = a; filler = f;
 }
        protected List <SynSet> GetCandidateSynSets(WordNet.WordNet wordNet, FsmMorphologicalAnalyzer fsm,
                                                    AnnotatedSentence.AnnotatedSentence sentence, int index)
        {
            AnnotatedWord twoPrevious = null, previous = null, current, twoNext = null, next = null;
            var           synSets = new List <SynSet>();

            current = (AnnotatedWord)sentence.GetWord(index);
            if (index > 1)
            {
                twoPrevious = (AnnotatedWord)sentence.GetWord(index - 2);
            }

            if (index > 0)
            {
                previous = (AnnotatedWord)sentence.GetWord(index - 1);
            }

            if (index != sentence.WordCount() - 1)
            {
                next = (AnnotatedWord)sentence.GetWord(index + 1);
            }

            if (index < sentence.WordCount() - 2)
            {
                twoNext = (AnnotatedWord)sentence.GetWord(index + 2);
            }

            synSets = wordNet.ConstructSynSets(current.GetParse().GetWord().GetName(),
                                               current.GetParse(), current.GetMetamorphicParse(), fsm);
            if (twoPrevious?.GetParse() != null && previous?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(twoPrevious.GetParse(), previous.GetParse(),
                                                               current.GetParse(),
                                                               twoPrevious.GetMetamorphicParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(),
                                                               fsm));
            }

            if (previous?.GetParse() != null && next?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(), next.GetParse(),
                                                               previous.GetMetamorphicParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm));
            }

            if (next?.GetParse() != null && twoNext?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(), twoNext.GetParse(),
                                                               current.GetMetamorphicParse(), next.GetMetamorphicParse(), twoNext.GetMetamorphicParse(), fsm));
            }

            if (previous?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(previous.GetParse(), current.GetParse(),
                                                               previous.GetMetamorphicParse(), current.GetMetamorphicParse(), fsm));
            }

            if (next?.GetParse() != null)
            {
                synSets.AddRange(wordNet.ConstructIdiomSynSets(current.GetParse(), next.GetParse(),
                                                               current.GetMetamorphicParse(), next.GetMetamorphicParse(), fsm));
            }

            return(synSets);
        }
        /**
         * <summary> The method checks
         * 1. the previous two words and the current word; the previous, current and next word, current and the next
         * two words for a three word multiword expression that occurs in the Turkish wordnet.
         * 2. the previous word and current word; current word and the next word for a two word multiword expression that
         * occurs in the Turkish wordnet.
         * 3. the current word
         * if it has only one sense. If there is only one sense for that multiword expression or word; it sets that sense.</summary>
         * <param name="sentence">The sentence for which word sense disambiguation will be determined automatically.</param>
         */
        protected override bool AutoLabelSingleSemantics(AnnotatedSentence.AnnotatedSentence sentence)
        {
            AnnotatedWord twoPrevious = null, previous = null;
            AnnotatedWord twoNext = null, next = null;

            for (var i = 0; i < sentence.WordCount(); i++)
            {
                var current = (AnnotatedWord)sentence.GetWord(i);
                if (i > 1)
                {
                    twoPrevious = (AnnotatedWord)sentence.GetWord(i - 2);
                }

                if (i > 0)
                {
                    previous = (AnnotatedWord)sentence.GetWord(i - 1);
                }

                if (i != sentence.WordCount() - 1)
                {
                    next = (AnnotatedWord)sentence.GetWord(i + 1);
                }

                if (i < sentence.WordCount() - 2)
                {
                    twoNext = (AnnotatedWord)sentence.GetWord(i + 2);
                }

                if (current.GetSemantic() == null && current.GetParse() != null)
                {
                    if (previous != null && twoPrevious != null && twoPrevious.GetParse() != null &&
                        previous.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(twoPrevious.GetParse(),
                                                                           previous.GetParse(), current.GetParse(), twoPrevious.GetMetamorphicParse(),
                                                                           previous.GetMetamorphicParse(), current.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (previous != null && previous.GetParse() != null && next != null && next.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(previous.GetParse(),
                                                                           current.GetParse(), next.GetParse(), previous.GetMetamorphicParse(),
                                                                           current.GetMetamorphicParse(), next.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (next != null && next.GetParse() != null && twoNext != null && twoNext.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(current.GetParse(),
                                                                           next.GetParse(), twoNext.GetParse(), current.GetMetamorphicParse(),
                                                                           next.GetMetamorphicParse(), twoNext.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (previous != null && previous.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(previous.GetParse(),
                                                                           current.GetParse(), previous.GetMetamorphicParse(), current.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    if (current.GetSemantic() == null && next != null && next.GetParse() != null)
                    {
                        var idioms = _turkishWordNet.ConstructIdiomSynSets(current.GetParse(),
                                                                           next.GetParse(), current.GetMetamorphicParse(), next.GetMetamorphicParse(), _fsm);
                        if (idioms.Count == 1)
                        {
                            current.SetSemantic(idioms[0].GetId());
                            continue;
                        }
                    }

                    var meanings = _turkishWordNet.ConstructSynSets(current.GetParse().GetWord().GetName(),
                                                                    current.GetParse(), current.GetMetamorphicParse(), _fsm);
                    if (current.GetSemantic() == null && meanings.Count == 1)
                    {
                        current.SetSemantic(meanings[0].GetId());
                    }
                }
            }

            return(true);
        }