示例#1
0
        //This should only operate on normalized sentences.
        public Sentence ParsedSentenceFactory(string sentence, string original)
        {
            diagnostics = new SentenceDiagnostics(original, sentence);

            if (String.IsNullOrWhiteSpace(sentence))
            {
                return(new Sentence(new NullOrSymbols(original), diagnostics));
                //  throw new TpParseException("Do not give me a null sentence. Can't tell if null sentence is from input or got lost in translation");
            }

            //This may have already been done by the normalizer, but if not, no problem.
            if (sentence.Contains(" li pi "))
            {
                sentence = sentence.Replace(" li pi ", " li XXXXZiXXXX ");
            }
            ParserUtils.ThrowOnDoubleParticles(sentence, dialect);



            if (sentence.StartCheck(" "))
            {
                throw new TpParseException("Do not give me a sentence that leads with whitespace, I do not want to do defensive Trim() all day. (Call at least NormalizeExplict)");
            }

            if (sentence.StartCheck("///"))
            {
                Comment c = new Comment(sentence);
                return(new Sentence(c, diagnostics));
            }


            if (sentence.EndCheck(" li") || sentence.EndCheck(" li."))
            {
                throw new TpParseException("Something went wrong, sentence ends with li: " + original);
            }
            //Normalization is really expensive. We must stop calling it twice.
            //sentence = Normalizer.NormalizeText(sentence, config); //Any way to avoid calling this twice?

            //HACK: This is necessary (otherwise we have to deal with optional quotes starting, ending words)
            //But we'd rather do this on a sentence level in Discourse.
            bool startsQuotedSpeech;
            bool endsQuotedSpeech;

            if (sentence.StartCheck("«"))
            {
                startsQuotedSpeech = true;
                sentence           = sentence.Replace("«", " ").Trim();
            }
            if (sentence.EndCheck("»", "».", "»!") || sentence.EndCheck("»:", "»?"))
            {
                endsQuotedSpeech = true;
                sentence         = sentence.Replace("»", " ").Trim();
            }

            //TODO: do something with quoted speech. Big problem #1 it spans multiple sentences


            if (sentence.EndCheck(" "))
            {
                throw new TpParseException("Normalizer failed to trim: " + original);
            }

            //Get the final punctuation out or it will mess up parsing later.
            string      possiblePunctuation = sentence[sentence.Length - 1].ToString();
            Punctuation punctuation;

            if (Punctuation.TryParse(possiblePunctuation, out punctuation))
            {
                sentence = sentence.Substring(0, sentence.Length - 1);
            }


            //Square bracket sentence contains all others
            //[S]
            //F la [S]
            //S la [S]
            //F la S la [S]
            //Maximal.. maybe later
            //F la S la F la S  => (F la S ) la (F la [S])
            //F la S la S la F la S la S
            //[{F la S} la {S} la {F la S}] la <S>

            //Just dealing with la fragments

            Sentence        headSentence  = null;
            List <Sentence> preconditions = new List <Sentence>();

            string[] laParts = Splitters.SplitOnLa(sentence);

            //Degenerate sentences.
            if (laParts[laParts.Length - 1] == "la")
            {
                //We have a vocative sentence...
                Fragment fragment         = new Fragment(ProcessEnPiChain(laParts[0]));
                Sentence fragmentSentence = new Sentence(fragment, punctuation, diagnostics);
                return(fragmentSentence);
            }

            if (laParts.Length > 1)
            {
                int             i               = 0;
                List <Fragment> laFragments     = new List <Fragment>();
                Sentence        currentSentence = null;
                foreach (string subSentence in laParts.Reverse())
                {
                    i++;
                    if (i == 1)
                    {
                        //Head sentence.
                        // subSentence.StartCheck("la ") ? subSentence.Substring(3) : subSentence
                        string laLessString = subSentence.RemoveLeadingWholeWord("la");
                        headSentence = ProcessSimpleSentence(laLessString, punctuation, original);
                        continue; //Not dealing with "kin la!"
                    }

                    //Fragments & preconditions
                    const string liFinder = @"\bli\b";
                    Match        m        = Regex.Match(subSentence, liFinder);
                    if (m.Success)
                    {
                        //This is a sentence
                        //Maybe should recurse.
                        string laLessString = subSentence.RemoveLeadingWholeWord("la");

                        currentSentence = ProcessSimpleSentence(laLessString, null, original);
                        preconditions.Add(currentSentence);
                    }
                    else
                    {
                        string   laLessString = subSentence.RemoveLeadingWholeWord("la");
                        Fragment fragment;
                        if (laLessString.StartCheck("~"))
                        {
                            string[] parts = Splitters.SplitOnPrepositions(laLessString);
                            fragment = new Fragment(ProcessPrepositionalPhrases(parts).ToArray());
                        }
                        else
                        {
                            fragment = new Fragment(ProcessEnPiChain(laLessString));
                        }

                        if (currentSentence == null)
                        {
                            if (headSentence == null)
                            {
                                throw new TpParseException(
                                          "Sentence appears to be headed by a fragment. Shouldn't deal with those here.: " + original);
                            }
                            headSentence.LaFragment.Add(fragment);
                        }
                        else
                        {
                            laFragments.Add(fragment);
                        }
                    }
                }
            }
            else
            {
                //No la at all.
                //Simple Sentence
                return(ProcessSimpleSentence(sentence, punctuation, original));
            }
            if (headSentence == null)
            {
                throw new TpParseException("This is not a sentence, should deal with it with it's own parser: " + original);
            }
            if (preconditions.Count == 0)
            {
                return(headSentence);
            }
            Sentence s = new Sentence(diagnostics, preconditions.ToArray(), headSentence);

            return(s);
        }
示例#2
0
        public Sentence ProcessSimpleSentence(string sentence, Punctuation punctuation, string original)
        {
            //Think this is causing a bug.
            ////HACK: Still need a better way to deal with quotes.
            //if (sentence.EndCheck("»") || sentence.EndCheck("«"))
            //{
            //    sentence = sentence.Substring(0, sentence.Length - 1);
            //}



            //Comment? Get out of here!
            if (sentence.StartCheck("///"))
            {
                Comment c = new Comment(sentence);
                return(new Sentence(c, diagnostics));
            }

            //Simple exclamation! Get out of here!
            if (Exclamation.IsExclamation(sentence))
            {
                return(new Sentence(new Exclamation(new HeadedPhrase(new Word(sentence))), punctuation, new SentenceDiagnostics(original, sentence)));
            }

            List <Vocative> headVocatives = null;

            //jan Mato o, ale li pona. Head vocative!
            //kin la o moku. //not a vocative (hopefully dealt with elsewhere)
            //jan Mato o moku! //Head vocative, & imperative, with 2nd o discarded
            //jan Mato o o moku! //Head vocative, & imperative, with 2nd o discarded


            if (sentence.ContainsCheck(" o o "))//Explicit vocative & imperative
            {
                //Okay, we know exactly when the head vocatives end.
                headVocatives = new List <Vocative>();
                string justHeadVocatives = sentence.Substring(0, sentence.IndexOf(" o o ", StringComparison.Ordinal));

                //Process head vocatives.
                ProcessHeadVocatives(Splitters.SplitOnO(justHeadVocatives), headVocatives, allAreVocatives: true);
                //BUG: Add the dummy! (And it still doesn't work!)
                sentence = "jan Sanwan o " + sentence.Substring(sentence.IndexOf(" o o ", StringComparison.Ordinal) + 5);
            }


            //Starts with o, then we have imperative & no head vocatives.
            bool endsOrStartsWithO = sentence.StartCheck("o ") && sentence.EndCheck(" o");

            if (!endsOrStartsWithO)
            {
                //jan So o! (We already deal with degenerate vocative sentences elsewhere)
                //jan So o sina li nasa.
                //jan So o nasa!
                //jan So o mi mute o nasa.  <-- This is the problem.

                //These could be vocatives or imperatives.
                if (sentence.ContainsCheck(" o ", " o,", ",o ") && sentence.ContainsCheck(" li "))
                {
                    headVocatives = new List <Vocative>();

                    ProcessHeadVocatives(Splitters.SplitOnO(sentence), headVocatives, allAreVocatives: false);

                    //int firstLi = sentence.IndexOf(" li ");
                    int lastO = sentence.LastIndexOf(" o ", StringComparison.Ordinal);
                    if (lastO < 0)
                    {
                        lastO = sentence.LastIndexOf(" o,", StringComparison.Ordinal);
                    }

                    sentence = sentence.Substring(lastO + 2);
                }
            }

            //Process tag conjunctions and tag questions
            Particle    conjunction = null;
            TagQuestion tagQuestion = null;

            if (sentence.StartCheck("taso "))
            {
                conjunction = Particles.taso;
                sentence    = sentence.Substring(5);
            }
            else if (sentence.StartCheck("anu "))
            {
                conjunction = Particles.anu;
                sentence    = sentence.Substring(4);
            }
            else if (sentence.StartCheck("en "))
            {
                //Well, either parse it or throw. Otherwise, this gets skipped.
                //is this legal?
                conjunction = Particles.en;
                sentence    = sentence.Substring(3);
            }
            else if (sentence.StartCheck("ante ")) //never seen it.
            {
                conjunction = Particles.ante;
                sentence    = sentence.Substring(5);
            }

            //Should already have ? stripped off
            if (sentence.EndsWith(" anu seme"))
            {
                tagQuestion = new TagQuestion();
                sentence    = sentence.Substring(0, sentence.LastIndexOf(" anu seme", StringComparison.Ordinal));
            }


            if (sentence.EndCheck(" li"))
            {
                throw new TpParseException("Something went wrong-- sentenc ends with li. " + sentence);
            }
            if (sentence.StartsOrContainsOrEnds("la"))
            {
                throw new TpParseException("If it contains a la, anywhere, it isn't a simple sentence. " + sentence);
            }

            bool isHortative  = false;
            bool isImperative = false;

            if (sentence.StartCheck("o ") && sentence.ContainsCheck(" li "))
            {
                //o mi mute li moku
                isHortative = true;
                sentence    = sentence.RemoveLeadingWholeWord("o");
            }
            if (sentence.StartCheck("o ") && !sentence.ContainsCheck(" li "))
            {
                //o pana e pan
                isImperative = true;
                //sentence = sentence.RemoveLeadingWholeWord("o");
            }
            // someting o ==> vocative

            string[] liParts = Splitters.SplitOnLiOrO(sentence);

            if (liParts.Length == 1 && Exclamation.IsExclamation(liParts[0]))
            {
                //HACK: Duplicate code. & it only deals with a single final puncution mark.
                string possiblePunctuation = sentence[sentence.Length - 1].ToString();
                if (Punctuation.TryParse(possiblePunctuation, out punctuation))
                {
                    sentence = sentence.Substring(0, sentence.Length - 1);
                }

                //The whole thing is o! (or pakala! or the like)
                //pona a! a a a! ike a!
                TokenParserUtils tpu = new TokenParserUtils();

                Word[]       tokes         = tpu.ValidWords(sentence);
                HeadedPhrase parts         = new HeadedPhrase(tokes[0], new WordSet(ArrayExtensions.Tail(tokes)));
                bool         modifiersAreA = true;

                foreach (Word w in parts.Modifiers)
                {
                    if (w == "a")
                    {
                        continue;           //peculiar to exclamations & repeats.
                    }
                    if (w == "kin")
                    {
                        continue;             //modifies just about anything
                    }
                    modifiersAreA = false;
                }

                if (modifiersAreA)
                {
                    Exclamation exclamation = new Exclamation(parts);
                    Sentence    s           = new Sentence(exclamation, punctuation, diagnostics);
                    return(s);
                }
            }


            //Degenerate sentences.
            if (liParts[liParts.Length - 1].Trim(new char[] { ',', '«', '»', '!', ' ' }) == "o")
            {
                //We have a vocative sentence...
                Vocative vocative = new Vocative(ProcessEnPiChain(liParts[0]));
                Sentence s        = new Sentence(vocative, punctuation, diagnostics);
                return(s);
            }

            string subjects = liParts[0].Trim();

            ComplexChain subjectChain = null;
            int          startAt      = 1; //slot 0 is normally a subject

            if (subjects.Contains("«"))
            {
                int foo = 3;
            }
            if (subjects.StartCheck("o ") ||
                subjects.StartCheck("«o "))
            {
                //This is a verb phrase with implicit subjects!
                startAt = 0;
            }
            else
            {
                subjectChain = ProcessEnPiChain(subjects);
            }

            PredicateList verbPhrases = new PredicateList();

            for (int i = startAt; i < liParts.Length; i++)
            {
                string predicate = liParts[i].Trim();

                verbPhrases.Add(ProcessPredicates(predicate));
            }

            //Head or complete sentence.

            Sentence parsedSentence = new Sentence(subjectChain, verbPhrases, diagnostics, new SentenceOptionalParts
            {
                Conjunction = conjunction,
                //Etc
                Punctuation   = punctuation,
                IsHortative   = isHortative,
                TagQuestion   = tagQuestion,
                HeadVocatives = headVocatives != null ? headVocatives.ToArray() : null
            });

            return(parsedSentence);
        }