예제 #1
0
        public static OpenNLP.Tools.Parser.Parse ParseSentence(string sentence)
        {
            if (mParser == null)
            {
                mParser = new OpenNLP.Tools.Parser.EnglishTreebankParser(mModelPath, true, false);
            }

            return(mParser.DoParse(sentence));
        }
예제 #2
0
        private Parse ParseSentence(string sentence)
        {
            if (_parser == null)
            {
                _parser = new EnglishTreebankParser(_modelPath, true, false);
            }

            return(_parser.DoParse(sentence));
        }
예제 #3
0
        private string MySentenceStructure(string sentence)
        {
            var modelPath = Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName) + @"\Models\";

            var parser = new EnglishTreebankParser(modelPath);
            var parse  = parser.DoParse(sentence);
            var result = parse.Show();

            return(result);
        }
예제 #4
0
        private void ShowParse()
        {
            if (txtInput.Text.Length == 0)
            {
                return;
            }

            //prepare the UI
            txtInput.Enabled = false;
            btnParse.Enabled = false;
            this.Cursor      = Cursors.WaitCursor;

            lithiumControl.NewDiagram();

            //do the parsing
            if (mParser == null)
            {
                mParser = new EnglishTreebankParser(mModelPath, true, false);
            }
            mParse = mParser.DoParse(txtInput.Text);

            if (mParse.Type == MaximumEntropyParser.TopNode)
            {
                mParse = mParse.GetChildren()[0];
            }

            //display the parse result
            ShapeBase root = this.lithiumControl.Root;

            root.Text    = mParse.Type;
            root.Visible = true;

            AddChildNodes(root, mParse.GetChildren());
            root.Expand();

            this.lithiumControl.DrawTree();

            //restore the UI
            this.Cursor      = Cursors.Default;
            txtInput.Enabled = true;
            btnParse.Enabled = true;
        }
        /*public bool IsMatch(string sentence, PhrasalVerb phrasalVerb)
         * {
         *  var tokens = tokenizer.Tokenize(sentence);
         *  var pv = MatchingPhrasalVerbs(sentence, new List<PhrasalVerb>() {phrasalVerb});
         *  return pv.Any();
         * }*/

        /*public List<PhrasalVerb> MatchingPhrasalVerbs(string sentence, List<PhrasalVerb> phrasalVerbs)
         * {
         *  // tokenize sentence
         *  var tokens = tokenizer.Tokenize(sentence);
         *  var taggedWords = tagger.Tag(tokens)/*.Where(t => Regex.IsMatch(t, "[A-Z]+")).ToList()#1#;
         *  // create parse tree
         *  var parse = parser.DoParse(tokens);
         *  // retrieve dependencies
         *  var dependencies = ComputeDependencies(parse).ToList();
         *
         *  // compute matching phrasal verbs
         *  var matchingPhrasalVerbs = new List<PhrasalVerb>();
         *  foreach (var phrasalVerb in phrasalVerbs)
         *  {
         *      // get relevant dependencies found
         *      var parts = phrasalVerb.Name.Split(' ');
         *      var root = parts.First();
         *      // find dependencies for this root
         *      var relevantDepedencies = dependencies
         *          .Where(
         *              d =>
         *                  ((string.Equals(root, lemmatizer.Lemmatize(d.Gov().GetWord()),
         *                      StringComparison.InvariantCultureIgnoreCase) && d.Gov().Index() < d.Dep().Index())
         ||
         ||                  (string.Equals(root, lemmatizer.Lemmatize(d.Dep().GetWord()),
         ||                      StringComparison.InvariantCultureIgnoreCase) && d.Dep().Index() < d.Gov().Index()))
         ||                 && (!phrasalVerb.Inseparable || Math.Abs(d.Dep().Index() - d.Gov().Index()) == 1)
         ||                     // for non separable verbs
         ||                 && (!phrasalVerb.SeparableMandatory || Math.Abs(d.Dep().Index() - d.Gov().Index()) > 1)
         ||         // for separable mandatory verbs
         ||         //&& d.Gov().Index() >= 1 && IsVerb(taggedWords[d.Gov().Index() - 1])
         ||         )
         ||         .ToList();
         ||
         ||     // We take only the 2nd part
         ||     // For phrasal verbs with several particles, that's a good approximation for now
         ||     // (we could check that all the particles are also linked)
         ||     if (relevantDepedencies.Any() && parts.Count() > 1)
         ||     {
         ||         var particle1 = parts[1];
         ||         var prtDependencies = relevantDepedencies.Where(d => d.Reln().GetShortName() == "prt").ToList();
         ||         if (prtDependencies.Any())
         ||         {
         ||             // if root has a prt dependency, don't look at other relations
         ||             if (prtDependencies
         ||                 .Any(d => string.Equals(particle1, d.Dep().GetWord(),StringComparison.InvariantCultureIgnoreCase)
         || string.Equals(particle1, d.Gov().GetWord(), StringComparison.InvariantCultureIgnoreCase)))
         ||             {
         ||                 matchingPhrasalVerbs.Add(phrasalVerb);
         ||             }
         ||         }
         ||         else
         ||         {
         ||             // otherwise, look at all the other relations
         ||             var relevantRelationships = relevantDepedencies
         ||                 .Where(d => string.Equals(particle1, d.Dep().GetWord(), StringComparison.InvariantCultureIgnoreCase)
         || string.Equals(particle1, d.Gov().GetWord(), StringComparison.InvariantCultureIgnoreCase))
         ||                 .ToList();
         ||             if (relevantRelationships.Any())
         ||             {
         ||                 matchingPhrasalVerbs.Add(phrasalVerb);
         ||             }
         ||         }
         ||     }
         || }
         || return matchingPhrasalVerbs;
         ||}
         ||
         ||private IEnumerable<TypedDependency> ComputeDependencies(Parse parse)
         ||{
         || // Extract dependencies from lexical tree
         || var tlp = new PennTreebankLanguagePack();
         || var gsf = tlp.GrammaticalStructureFactory();
         || var tree = new ParseTree(parse);
         || try
         || {
         ||     var gs = gsf.NewGrammaticalStructure(tree);
         ||     return gs.TypedDependencies();
         || }
         || catch (Exception)
         || {
         ||     Console.WriteLine("Exception when computing deps for {0}", parse);
         ||     return new List<TypedDependency>();
         || }
         ||}*/

        public List <PhrasalVerb> MatchingPhrasalVerbs(string sentence, List <PhrasalVerb> phrasalVerbs)
        {
            // tokenize sentence
            var tokens = tokenizer.Tokenize(sentence);
            // create parse tree
            var parse = parser.DoParse(tokens);
            // retrieve dependencies
            var dependencies = ComputeDependencies(parse).ToList();

            var matchingPhrasalVerbs = new List <PhrasalVerb>();

            foreach (var phrasalVerb in phrasalVerbs)
            {
                // get relevant dependencies found
                var parts = phrasalVerb.Name.Split(' ').ToList();
                var root  = parts.First();
                // find dependencies for this root
                var rootRelatedDependencies = dependencies
                                              .Where(d => // the (lemmatized) token must be equal to the gov/dep of the dependency
                                                     ((string.Equals(root, lemmatizer.Lemmatize(d.Gov().GetWord()), StringComparison.InvariantCultureIgnoreCase) &&
                                                       d.Gov().Index() < d.Dep().Index()) ||
                                                      (string.Equals(root, lemmatizer.Lemmatize(d.Dep().GetWord()), StringComparison.InvariantCultureIgnoreCase) &&
                                                       d.Dep().Index() < d.Gov().Index()))
                                                     // if the phrasal verb is inseparable, no word must be between the root and the particle
                                                     && (!phrasalVerb.Inseparable.HasValue || (!phrasalVerb.Inseparable.Value || Math.Abs(d.Dep().Index() - d.Gov().Index()) == 1))
                                                     // if the phrasal verb is mandatory seprable, at least one word must be between the root and the particle
                                                     && (!phrasalVerb.SeparableMandatory.HasValue || (!phrasalVerb.SeparableMandatory.Value || Math.Abs(d.Dep().Index() - d.Gov().Index()) > 1))
                                                     )
                                              .ToList();

                // We take only the 2nd part
                // For phrasal verbs with several particles, that's a good approximation for now
                // (we could check that all the particles are also linked)
                if (rootRelatedDependencies.Any() && parts.Count() > 1)
                {
                    var particle1            = parts[1];
                    var relevantDependencies = rootRelatedDependencies.Where(d => d.Reln().GetShortName() == "prt").ToList();
                    if (!relevantDependencies.Any())
                    {
                        // if no "prt" relation, take all relations whatsoever.
                        relevantDependencies = rootRelatedDependencies;
                    }

                    // if one of relevant dependencies have the particle as gov/dep, it's good!
                    var rootParticle1Dependency = relevantDependencies
                                                  .FirstOrDefault(d => string.Equals(particle1, d.Dep().GetWord(), StringComparison.InvariantCultureIgnoreCase) ||
                                                                  string.Equals(particle1, d.Gov().GetWord(), StringComparison.InvariantCultureIgnoreCase));
                    if (rootParticle1Dependency != null && !AreWordSeparatedInSentence(rootParticle1Dependency, dependencies))
                    {
                        var remainingParts = parts.Skip(2).ToList();
                        var lastTokenIndex = Math.Max(rootParticle1Dependency.Gov().Index(), rootParticle1Dependency.Dep().Index()) - 1;

                        var endOfSentenceTokens = tokens.Skip(lastTokenIndex + 1).ToList();
                        if (endOfSentenceTokens.Any())
                        {
                            for (var i = 0; i < endOfSentenceTokens.Count; i++)
                            {
                                if (i < remainingParts.Count)
                                {
                                    if (!string.Equals(remainingParts[i], endOfSentenceTokens[i],
                                                       StringComparison.InvariantCultureIgnoreCase))
                                    {
                                        // no match, get out of the loop
                                        break;
                                    }
                                }
                                else
                                {
                                    // all the remaining parts were included in the remaining tokens --> OK
                                    matchingPhrasalVerbs.Add(phrasalVerb);
                                }
                            }
                        }
                        else
                        {
                            // if there is no remaining parts, the phrasal verb matches
                            if (!remainingParts.Any())
                            {
                                matchingPhrasalVerbs.Add(phrasalVerb);
                            }
                        }


                        /*if (parts.Count <= 2)
                         * {
                         *  // phrasal verb has 1 particle only; we're done
                         *  matchingPhrasalVerbs.Add(phrasalVerb);
                         * }
                         * else
                         * {
                         *  // otherwise, check that the other particles are in the sentence (approximation)
                         *  var lastTokenIndex = Math.Max(rootParticle1Dependency.Gov().Index(), rootParticle1Dependency.Dep().Index()) - 1;
                         *  var endOfSentenceTokens = tokens.Skip(lastTokenIndex).ToList();
                         *  if (parts.Skip(2).All(endOfSentenceTokens.Contains))
                         *  {
                         *      matchingPhrasalVerbs.Add(phrasalVerb);
                         *  }
                         * }*/
                    }
                }
            }

            return(matchingPhrasalVerbs);
        }
예제 #6
0
        public LSyntagm GetSyntagmFromText(string submitStatement)
        {
            Parse parse = _parser.DoParse(submitStatement).GetChildren()[0];

            return(GetSyntagmFromParse(parse, new LHippocampus()));
        }
예제 #7
0
 public Parse parseTweet(List <Token> sentence)
 {
     string[] sentenceTokenText = sentence.Select(t => t.text).ToArray();
     return(nlpParser.DoParse(sentenceTokenText).GetChildren()[0]);
 }
예제 #8
0
 public Parse ParseSentence(string sentence)
 {
     return(parser.DoParse(sentence));
 }