public static PrepositionalPhrase Parse(object value)
        {
            string item = value.ToString(); //unbox

            if (string.IsNullOrEmpty(item))
            {
                throw new ArgumentException("value is null or zero length string");
            }

            Dialect     c  = Dialect.LooseyGoosey;
            ParserUtils pu = new ParserUtils(c);

            if (item.ContainsCheck("~"))
            {
                string[] parts = Splitters.SplitOnPrepositions(item);
                List <PrepositionalPhrase> chain = pu.ProcessPrepositionalPhrases(parts);
                if (chain.Count != 1)
                {
                    throw new TpParseException("Tried to parse a single chain, but value contains " +
                                               chain.Count);
                }
                return(chain[0]);
            }
            throw new TpParseException("Cannot parse this to a prepositional phrase: " + value);
        }
Beispiel #2
0
        /// <summary>
        /// Parses simple headed phrases fine. Parses some headed phrases with PP modifiers, but
        /// not if the PP is in maximal form.
        /// </summary>
        /// <param name="value"></param>
        /// <returns></returns>
        public HeadedPhrase HeadedPhraseParser(string value)
        {
            if (String.IsNullOrEmpty(value))
            {
                throw new ArgumentException("Impossible to parse a null or zero length string.");
            }
            //#if DEBUG
            //            string copyValue = String.Copy(value);
            //#endif
            if (memoize)
            {
                if (headedPhraseParserMemo.ContainsKey(value))
                {
                    return(headedPhraseParserMemo[value]);
                }
            }


            foreach (string particle in new[] { "pi", "la", "e", "li" })
            {
                if (value.StartsOrContainsOrEnds(particle))
                {
                    throw new TpSyntaxException("Headed phrases have no particles. This one has " + particle + " ref: " + value + " (Did we forget the li between the subject and verb?)");
                }
            }

            PrepositionalPhrase[] pp = null;
            if (value.ContainsCheck("~"))
            {
                string[] headAndPreps = Splitters.SplitOnPrepositions(value);
                value = headAndPreps[0];
                pp    = ProcessPrepositionalPhrases(ArrayExtensions.Tail(headAndPreps)).ToArray();
            }
            //No Pi!
            TokenParserUtils pu = new TokenParserUtils();

            Word[] words = pu.ValidWords(value);


            if (words.Length == 0)
            {
                throw new TpParseException("Failed to parse: " + value);
            }
            //Word head = words[0];
            Word[] tail = words;//ArrayExtensions.Tail(words);

            //EXTRACT MORPHOLOGICAL STRUCTURE OF MODIFIERS HERE.

            var mergedTail = TurnThisWordsIntoWordsWithTaggedWords(tail);


            HeadedPhrase phrase = new HeadedPhrase(mergedTail[0],
                                                   new WordSet(ArrayExtensions.Tail(mergedTail.ToArray())),
                                                   pp);

            //#if DEBUG
            //            if (copyValue != value)
            //            {
            //                throw new TpParseException("Invariant violation: " + copyValue +" --- "+ value);
            //            }
            //#endif
            //            if (memoize)
            //            {
            //                headedPhraseParserMemo[value] = phrase;
            //            }
            return(phrase);
        }
Beispiel #3
0
        // jan li jo e soweli e kili e wawa lon anpa tawa anpa
        //     li jo e soweli e kili e wawa lon anpa tawa anpa
        public TpPredicate ProcessPredicates(string liPart)
        {
            if (String.IsNullOrWhiteSpace(liPart))
            {
                throw new TpParseException("Missing argument, cannot continue");
            }
            if (liPart == "li")
            {
                throw new TpParseException("Cannot do anything with just li");
            }
            TokenParserUtils pu = new TokenParserUtils();
            Particle         verbPhraseParticle;
            ComplexChain     directObjectChain = null;
            VerbPhrase       verbPhrase        = null;

            PrepositionalPhrase[] prepositionalChain = null;
            ComplexChain          nominalPredicate   = null;
            PiPredicate           piPredicate        = null;

            //Transitive Path.
            if (liPart.Split(new[] { ' ', '\t' }).Contains("e"))
            {
                string[] eParts = Splitters.SplitOnE(liPart);

                string[] verbPhraseParts = pu.WordsPunctuationAndCompounds(eParts[0]); //Could contain particles.

                if (!Token.CheckIsParticle(verbPhraseParts[0]))
                {
                    throw new TpSyntaxException("uh-oh not a particle: " + verbPhraseParts[0] + " from " + liPart);
                }
                verbPhraseParticle = new Particle(verbPhraseParts[0]);

                //Only process preps in normalized sentences
                string[] partsWithPreps = null;

                if (verbPhraseParts.Length > 1)
                {
                    if (verbPhraseParts.Any(x => x == "pi"))
                    {
                        //nominal predicate
                        nominalPredicate =
                            new ComplexChain(Particles.en,
                                             new[] {
                            ProcessPiChain(String.Join(" ", ArrayExtensions.Tail(verbPhraseParts)))
                        });
                    }
                    else
                    {
                        verbPhrase = VerbPhraseParser(ArrayExtensions.Tail(verbPhraseParts));
                    }
                }

                string verbsMaybePrepositions = eParts[eParts.Length - 1];


                if (verbsMaybePrepositions.ContainsCheck("~"))
                {
                    partsWithPreps = Splitters.SplitOnPrepositions(verbsMaybePrepositions);
                    if (partsWithPreps.Length == 1)
                    {
                        //This is the last e phrase or 1st prep.
                        if (partsWithPreps[0].ContainsCheck("~"))
                        {
                            //That is a prep phrase (is this possible?)
                        }
                        else
                        {
                            eParts[eParts.Length - 1] = partsWithPreps[0];
                            //No prep phrases.
                        }
                    }
                }

                string[] directObjects = ArrayExtensions.Tail(eParts);

                //List<HeadedPhrase> doNPs = new List<HeadedPhrase>();
                List <Chain> doPiChains = new List <Chain>();

                //Fancy foot work for when we have e ... ~... & that's all.
                string[] toUse;
                if (partsWithPreps != null)
                {
                    toUse = partsWithPreps.Where(x => x.StartCheck("e ")).ToArray();
                    directObjects[directObjects.Length - 1] = toUse[0];
                    toUse = directObjects;
                }
                else
                {
                    toUse = directObjects;
                }

                foreach (string directObject in toUse)
                {
                    if (directObject.Length <= 2)
                    {
                        throw new TpParseException("This is a degenerate e phrase, i.e. it is only e or e space. Missing a ni, e.g. e ni: possibly. ref: " + liPart);
                    }
                    string eFree  = directObject.Substring(2);
                    Chain  phrase = ProcessPiChain(eFree);
                    doPiChains.Add(phrase);
                }
                directObjectChain = new ComplexChain(Particles.e, doPiChains.ToArray());

                if (partsWithPreps != null)
                {
                    prepositionalChain = ProcessPrepositionalPhrases(partsWithPreps).ToArray();
                }
            }
            else
            {
                //Intransitives & Predictates

                string[] ppParts = Splitters.SplitOnPrepositions(liPart);

                if (ppParts.Length == 0) //Excect at least "li verb" or "li noun"
                {
                    throw new TpParseException("Whoa, got " + ppParts.Length + " parts for " + liPart);
                }

                if (Punctuation.ContainsPunctuation(ppParts[0]))
                {
                    throw new TpParseException("This has punctuation, may fail to parse : " + ppParts[0]);
                }
                string[] verbPhraseParts = pu.WordsPunctuationAndCompounds(ppParts[0]);

                if (!Token.CheckIsParticle(verbPhraseParts[0]))
                {
                    throw new TpSyntaxException("uh-oh not a particle: " + verbPhraseParts[0] + " from " + liPart);
                }
                verbPhraseParticle = new Particle(verbPhraseParts[0]);


                if (verbPhraseParts.Length > 1)
                {
                    //0:li 1:xxx 2:np...
                    if (verbPhraseParts[1].ContainsCheck("XXXXZiXXXX"))
                    {
                        //Make it go away. Confuses other parsers and will be picked up by container object.
                        verbPhraseParts = ArrayExtensions.Tail(verbPhraseParts);

                        //piPredicate
                        ComplexChain phrase = new ComplexChain(Particles.en,
                                                               new[] {
                            ProcessPiChain(String.Join(" ", ArrayExtensions.Tail(verbPhraseParts)))
                        });

                        piPredicate = new PiPredicate(Particles.pi, phrase);
                    }
                    else if (verbPhraseParts.Any(x => x == "pi"))
                    {
                        //nominal predicate
                        nominalPredicate = new ComplexChain(Particles.en,
                                                            new[] {
                            ProcessPiChain(String.Join(" ", ArrayExtensions.Tail(verbPhraseParts)))
                        }
                                                            );
                    }
                    else
                    {
                        verbPhrase = VerbPhraseParser(ArrayExtensions.Tail(verbPhraseParts));
                    }
                }


                string[] prepositions = ArrayExtensions.Tail(ppParts);

                if (prepositions.Length != 0)
                {
                    List <PrepositionalPhrase> pChains = new List <PrepositionalPhrase>();
                    foreach (string pp in prepositions)
                    {
                        string[] phraseParts = pu.WordsPunctuationAndCompounds(pp);//Could contain particles.
                        string   preposition = phraseParts[0];
                        string[] tail        = ArrayExtensions.Tail(phraseParts);

                        if (tail.Length == 0)
                        {
                            //uh oh. This is an intransitive verb, like "ni li lon"
                            //HACK: Oh, this is so ugly (still sort of ugly)
                            verbPhrase = new VerbPhrase(new Word(preposition.Replace("~", "")));
                            //or a noun phrase.

                            continue;
                        }

                        PrepositionalPhrase foundPrepositionalPhrase = new PrepositionalPhrase(new Word(preposition), ProcessEnPiChain(String.Join(" ", tail)));
                        pChains.Add(foundPrepositionalPhrase);
                    }
                    if (pChains.Count > 0)
                    {
                        prepositionalChain = pChains.ToArray();
                    }
                    else
                    {
                        //We changed our mind about a phrase being a prep phrase. Turned out to be verb phrase or predicate.
                    }
                }
            }
            if (piPredicate != null)
            {
                return(new TpPredicate(verbPhraseParticle, piPredicate, prepositionalChain));
            }
            if (nominalPredicate == null)
            {
                return(new TpPredicate(verbPhraseParticle, verbPhrase, directObjectChain, prepositionalChain));
            }

            return(new TpPredicate(verbPhraseParticle, nominalPredicate, directObjectChain, prepositionalChain));
        }
Beispiel #4
0
        //This should only operate on normalized sentences.
        public Sentence ParsedSentenceFactory(string sentence, string original)
        {
            diagnostics = new SentenceDiagnostics(original, sentence);

            if (String.IsNullOrWhiteSpace(sentence))
            {
                return(new Sentence(new NullOrSymbols(original), diagnostics));
                //  throw new TpParseException("Do not give me a null sentence. Can't tell if null sentence is from input or got lost in translation");
            }

            //This may have already been done by the normalizer, but if not, no problem.
            if (sentence.Contains(" li pi "))
            {
                sentence = sentence.Replace(" li pi ", " li XXXXZiXXXX ");
            }
            ParserUtils.ThrowOnDoubleParticles(sentence, dialect);



            if (sentence.StartCheck(" "))
            {
                throw new TpParseException("Do not give me a sentence that leads with whitespace, I do not want to do defensive Trim() all day. (Call at least NormalizeExplict)");
            }

            if (sentence.StartCheck("///"))
            {
                Comment c = new Comment(sentence);
                return(new Sentence(c, diagnostics));
            }


            if (sentence.EndCheck(" li") || sentence.EndCheck(" li."))
            {
                throw new TpParseException("Something went wrong, sentence ends with li: " + original);
            }
            //Normalization is really expensive. We must stop calling it twice.
            //sentence = Normalizer.NormalizeText(sentence, config); //Any way to avoid calling this twice?

            //HACK: This is necessary (otherwise we have to deal with optional quotes starting, ending words)
            //But we'd rather do this on a sentence level in Discourse.
            bool startsQuotedSpeech;
            bool endsQuotedSpeech;

            if (sentence.StartCheck("«"))
            {
                startsQuotedSpeech = true;
                sentence           = sentence.Replace("«", " ").Trim();
            }
            if (sentence.EndCheck("»", "».", "»!") || sentence.EndCheck("»:", "»?"))
            {
                endsQuotedSpeech = true;
                sentence         = sentence.Replace("»", " ").Trim();
            }

            //TODO: do something with quoted speech. Big problem #1 it spans multiple sentences


            if (sentence.EndCheck(" "))
            {
                throw new TpParseException("Normalizer failed to trim: " + original);
            }

            //Get the final punctuation out or it will mess up parsing later.
            string      possiblePunctuation = sentence[sentence.Length - 1].ToString();
            Punctuation punctuation;

            if (Punctuation.TryParse(possiblePunctuation, out punctuation))
            {
                sentence = sentence.Substring(0, sentence.Length - 1);
            }


            //Square bracket sentence contains all others
            //[S]
            //F la [S]
            //S la [S]
            //F la S la [S]
            //Maximal.. maybe later
            //F la S la F la S  => (F la S ) la (F la [S])
            //F la S la S la F la S la S
            //[{F la S} la {S} la {F la S}] la <S>

            //Just dealing with la fragments

            Sentence        headSentence  = null;
            List <Sentence> preconditions = new List <Sentence>();

            string[] laParts = Splitters.SplitOnLa(sentence);

            //Degenerate sentences.
            if (laParts[laParts.Length - 1] == "la")
            {
                //We have a vocative sentence...
                Fragment fragment         = new Fragment(ProcessEnPiChain(laParts[0]));
                Sentence fragmentSentence = new Sentence(fragment, punctuation, diagnostics);
                return(fragmentSentence);
            }

            if (laParts.Length > 1)
            {
                int             i               = 0;
                List <Fragment> laFragments     = new List <Fragment>();
                Sentence        currentSentence = null;
                foreach (string subSentence in laParts.Reverse())
                {
                    i++;
                    if (i == 1)
                    {
                        //Head sentence.
                        // subSentence.StartCheck("la ") ? subSentence.Substring(3) : subSentence
                        string laLessString = subSentence.RemoveLeadingWholeWord("la");
                        headSentence = ProcessSimpleSentence(laLessString, punctuation, original);
                        continue; //Not dealing with "kin la!"
                    }

                    //Fragments & preconditions
                    const string liFinder = @"\bli\b";
                    Match        m        = Regex.Match(subSentence, liFinder);
                    if (m.Success)
                    {
                        //This is a sentence
                        //Maybe should recurse.
                        string laLessString = subSentence.RemoveLeadingWholeWord("la");

                        currentSentence = ProcessSimpleSentence(laLessString, null, original);
                        preconditions.Add(currentSentence);
                    }
                    else
                    {
                        string   laLessString = subSentence.RemoveLeadingWholeWord("la");
                        Fragment fragment;
                        if (laLessString.StartCheck("~"))
                        {
                            string[] parts = Splitters.SplitOnPrepositions(laLessString);
                            fragment = new Fragment(ProcessPrepositionalPhrases(parts).ToArray());
                        }
                        else
                        {
                            fragment = new Fragment(ProcessEnPiChain(laLessString));
                        }

                        if (currentSentence == null)
                        {
                            if (headSentence == null)
                            {
                                throw new TpParseException(
                                          "Sentence appears to be headed by a fragment. Shouldn't deal with those here.: " + original);
                            }
                            headSentence.LaFragment.Add(fragment);
                        }
                        else
                        {
                            laFragments.Add(fragment);
                        }
                    }
                }
            }
            else
            {
                //No la at all.
                //Simple Sentence
                return(ProcessSimpleSentence(sentence, punctuation, original));
            }
            if (headSentence == null)
            {
                throw new TpParseException("This is not a sentence, should deal with it with it's own parser: " + original);
            }
            if (preconditions.Count == 0)
            {
                return(headSentence);
            }
            Sentence s = new Sentence(diagnostics, preconditions.ToArray(), headSentence);

            return(s);
        }