Ejemplo n.º 1
0
        //Maximal phrase: (subject)
        //jan pona anu ike lon tomo ...li pona tawa mi.
        //kili suwi en namako lon tomo ...li pona tawa mi.
        //kili suwi en namako anu loje lon tomo .... li pona tawa mi.
        //kili suwi namako anu loje
        public HeadedPhrase(Word head, WordSet modifiers = null, PrepositionalPhrase[] prepositionalPhrases = null, WordSet joinedModifiers = null, WordSet alternativeModifiers = null)
        {
            //if (new[] { "mi", "sina", "ona" }.Contains(head.Text))
            //{
            //    throw new ArgumentException("mi, sina, ona can only be pronouns, so you must use ComplexPronoun");
            //}

            if (modifiers != null && (modifiers.Contains(Words.kin) || modifiers.Contains(Words.ala)))
            {
                ParserUtils pu         = new ParserUtils(Dialect.LooseyGoosey);
                var         mergedTail = pu.TurnThisWordsIntoWordsWithTaggedWords(modifiers.ToArray());
                modifiers = new WordSet(mergedTail);
            }

            ValidateConstruction(head, modifiers);

            this.head                 = head;
            this.modifiers            = modifiers;
            this.prepositionalPhrases = prepositionalPhrases;
            this.joinedModifiers      = joinedModifiers;
            this.alternativeModifiers = alternativeModifiers;
        }
Ejemplo n.º 2
0
        //This should only operate on normalized sentences.
        public Sentence ParsedSentenceFactory(string sentence, string original)
        {
            diagnostics = new SentenceDiagnostics(original, sentence);

            if (String.IsNullOrWhiteSpace(sentence))
            {
                return(new Sentence(new NullOrSymbols(original), diagnostics));
                //  throw new TpParseException("Do not give me a null sentence. Can't tell if null sentence is from input or got lost in translation");
            }

            //This may have already been done by the normalizer, but if not, no problem.
            if (sentence.Contains(" li pi "))
            {
                sentence = sentence.Replace(" li pi ", " li XXXXZiXXXX ");
            }
            ParserUtils.ThrowOnDoubleParticles(sentence, dialect);



            if (sentence.StartCheck(" "))
            {
                throw new TpParseException("Do not give me a sentence that leads with whitespace, I do not want to do defensive Trim() all day. (Call at least NormalizeExplict)");
            }

            if (sentence.StartCheck("///"))
            {
                Comment c = new Comment(sentence);
                return(new Sentence(c, diagnostics));
            }


            if (sentence.EndCheck(" li") || sentence.EndCheck(" li."))
            {
                throw new TpParseException("Something went wrong, sentence ends with li: " + original);
            }
            //Normalization is really expensive. We must stop calling it twice.
            //sentence = Normalizer.NormalizeText(sentence, config); //Any way to avoid calling this twice?

            //HACK: This is necessary (otherwise we have to deal with optional quotes starting, ending words)
            //But we'd rather do this on a sentence level in Discourse.
            bool startsQuotedSpeech;
            bool endsQuotedSpeech;

            if (sentence.StartCheck("«"))
            {
                startsQuotedSpeech = true;
                sentence           = sentence.Replace("«", " ").Trim();
            }
            if (sentence.EndCheck("»", "».", "»!") || sentence.EndCheck("»:", "»?"))
            {
                endsQuotedSpeech = true;
                sentence         = sentence.Replace("»", " ").Trim();
            }

            //TODO: do something with quoted speech. Big problem #1 it spans multiple sentences


            if (sentence.EndCheck(" "))
            {
                throw new TpParseException("Normalizer failed to trim: " + original);
            }

            //Get the final punctuation out or it will mess up parsing later.
            string      possiblePunctuation = sentence[sentence.Length - 1].ToString();
            Punctuation punctuation;

            if (Punctuation.TryParse(possiblePunctuation, out punctuation))
            {
                sentence = sentence.Substring(0, sentence.Length - 1);
            }


            //Square bracket sentence contains all others
            //[S]
            //F la [S]
            //S la [S]
            //F la S la [S]
            //Maximal.. maybe later
            //F la S la F la S  => (F la S ) la (F la [S])
            //F la S la S la F la S la S
            //[{F la S} la {S} la {F la S}] la <S>

            //Just dealing with la fragments

            Sentence        headSentence  = null;
            List <Sentence> preconditions = new List <Sentence>();

            string[] laParts = Splitters.SplitOnLa(sentence);

            //Degenerate sentences.
            if (laParts[laParts.Length - 1] == "la")
            {
                //We have a vocative sentence...
                Fragment fragment         = new Fragment(ProcessEnPiChain(laParts[0]));
                Sentence fragmentSentence = new Sentence(fragment, punctuation, diagnostics);
                return(fragmentSentence);
            }

            if (laParts.Length > 1)
            {
                int             i               = 0;
                List <Fragment> laFragments     = new List <Fragment>();
                Sentence        currentSentence = null;
                foreach (string subSentence in laParts.Reverse())
                {
                    i++;
                    if (i == 1)
                    {
                        //Head sentence.
                        // subSentence.StartCheck("la ") ? subSentence.Substring(3) : subSentence
                        string laLessString = subSentence.RemoveLeadingWholeWord("la");
                        headSentence = ProcessSimpleSentence(laLessString, punctuation, original);
                        continue; //Not dealing with "kin la!"
                    }

                    //Fragments & preconditions
                    const string liFinder = @"\bli\b";
                    Match        m        = Regex.Match(subSentence, liFinder);
                    if (m.Success)
                    {
                        //This is a sentence
                        //Maybe should recurse.
                        string laLessString = subSentence.RemoveLeadingWholeWord("la");

                        currentSentence = ProcessSimpleSentence(laLessString, null, original);
                        preconditions.Add(currentSentence);
                    }
                    else
                    {
                        string   laLessString = subSentence.RemoveLeadingWholeWord("la");
                        Fragment fragment;
                        if (laLessString.StartCheck("~"))
                        {
                            string[] parts = Splitters.SplitOnPrepositions(laLessString);
                            fragment = new Fragment(ProcessPrepositionalPhrases(parts).ToArray());
                        }
                        else
                        {
                            fragment = new Fragment(ProcessEnPiChain(laLessString));
                        }

                        if (currentSentence == null)
                        {
                            if (headSentence == null)
                            {
                                throw new TpParseException(
                                          "Sentence appears to be headed by a fragment. Shouldn't deal with those here.: " + original);
                            }
                            headSentence.LaFragment.Add(fragment);
                        }
                        else
                        {
                            laFragments.Add(fragment);
                        }
                    }
                }
            }
            else
            {
                //No la at all.
                //Simple Sentence
                return(ProcessSimpleSentence(sentence, punctuation, original));
            }
            if (headSentence == null)
            {
                throw new TpParseException("This is not a sentence, should deal with it with it's own parser: " + original);
            }
            if (preconditions.Count == 0)
            {
                return(headSentence);
            }
            Sentence s = new Sentence(diagnostics, preconditions.ToArray(), headSentence);

            return(s);
        }