//Maximal phrase: (subject) //jan pona anu ike lon tomo ...li pona tawa mi. //kili suwi en namako lon tomo ...li pona tawa mi. //kili suwi en namako anu loje lon tomo .... li pona tawa mi. //kili suwi namako anu loje public HeadedPhrase(Word head, WordSet modifiers = null, PrepositionalPhrase[] prepositionalPhrases = null, WordSet joinedModifiers = null, WordSet alternativeModifiers = null) { //if (new[] { "mi", "sina", "ona" }.Contains(head.Text)) //{ // throw new ArgumentException("mi, sina, ona can only be pronouns, so you must use ComplexPronoun"); //} if (modifiers != null && (modifiers.Contains(Words.kin) || modifiers.Contains(Words.ala))) { ParserUtils pu = new ParserUtils(Dialect.LooseyGoosey); var mergedTail = pu.TurnThisWordsIntoWordsWithTaggedWords(modifiers.ToArray()); modifiers = new WordSet(mergedTail); } ValidateConstruction(head, modifiers); this.head = head; this.modifiers = modifiers; this.prepositionalPhrases = prepositionalPhrases; this.joinedModifiers = joinedModifiers; this.alternativeModifiers = alternativeModifiers; }
//This should only operate on normalized sentences. public Sentence ParsedSentenceFactory(string sentence, string original) { diagnostics = new SentenceDiagnostics(original, sentence); if (String.IsNullOrWhiteSpace(sentence)) { return(new Sentence(new NullOrSymbols(original), diagnostics)); // throw new TpParseException("Do not give me a null sentence. Can't tell if null sentence is from input or got lost in translation"); } //This may have already been done by the normalizer, but if not, no problem. if (sentence.Contains(" li pi ")) { sentence = sentence.Replace(" li pi ", " li XXXXZiXXXX "); } ParserUtils.ThrowOnDoubleParticles(sentence, dialect); if (sentence.StartCheck(" ")) { throw new TpParseException("Do not give me a sentence that leads with whitespace, I do not want to do defensive Trim() all day. (Call at least NormalizeExplict)"); } if (sentence.StartCheck("///")) { Comment c = new Comment(sentence); return(new Sentence(c, diagnostics)); } if (sentence.EndCheck(" li") || sentence.EndCheck(" li.")) { throw new TpParseException("Something went wrong, sentence ends with li: " + original); } //Normalization is really expensive. We must stop calling it twice. //sentence = Normalizer.NormalizeText(sentence, config); //Any way to avoid calling this twice? //HACK: This is necessary (otherwise we have to deal with optional quotes starting, ending words) //But we'd rather do this on a sentence level in Discourse. bool startsQuotedSpeech; bool endsQuotedSpeech; if (sentence.StartCheck("«")) { startsQuotedSpeech = true; sentence = sentence.Replace("«", " ").Trim(); } if (sentence.EndCheck("»", "».", "»!") || sentence.EndCheck("»:", "»?")) { endsQuotedSpeech = true; sentence = sentence.Replace("»", " ").Trim(); } //TODO: do something with quoted speech. Big problem #1 it spans multiple sentences if (sentence.EndCheck(" ")) { throw new TpParseException("Normalizer failed to trim: " + original); } //Get the final punctuation out or it will mess up parsing later. string possiblePunctuation = sentence[sentence.Length - 1].ToString(); Punctuation punctuation; if (Punctuation.TryParse(possiblePunctuation, out punctuation)) { sentence = sentence.Substring(0, sentence.Length - 1); } //Square bracket sentence contains all others //[S] //F la [S] //S la [S] //F la S la [S] //Maximal.. maybe later //F la S la F la S => (F la S ) la (F la [S]) //F la S la S la F la S la S //[{F la S} la {S} la {F la S}] la <S> //Just dealing with la fragments Sentence headSentence = null; List <Sentence> preconditions = new List <Sentence>(); string[] laParts = Splitters.SplitOnLa(sentence); //Degenerate sentences. if (laParts[laParts.Length - 1] == "la") { //We have a vocative sentence... Fragment fragment = new Fragment(ProcessEnPiChain(laParts[0])); Sentence fragmentSentence = new Sentence(fragment, punctuation, diagnostics); return(fragmentSentence); } if (laParts.Length > 1) { int i = 0; List <Fragment> laFragments = new List <Fragment>(); Sentence currentSentence = null; foreach (string subSentence in laParts.Reverse()) { i++; if (i == 1) { //Head sentence. // subSentence.StartCheck("la ") ? subSentence.Substring(3) : subSentence string laLessString = subSentence.RemoveLeadingWholeWord("la"); headSentence = ProcessSimpleSentence(laLessString, punctuation, original); continue; //Not dealing with "kin la!" } //Fragments & preconditions const string liFinder = @"\bli\b"; Match m = Regex.Match(subSentence, liFinder); if (m.Success) { //This is a sentence //Maybe should recurse. string laLessString = subSentence.RemoveLeadingWholeWord("la"); currentSentence = ProcessSimpleSentence(laLessString, null, original); preconditions.Add(currentSentence); } else { string laLessString = subSentence.RemoveLeadingWholeWord("la"); Fragment fragment; if (laLessString.StartCheck("~")) { string[] parts = Splitters.SplitOnPrepositions(laLessString); fragment = new Fragment(ProcessPrepositionalPhrases(parts).ToArray()); } else { fragment = new Fragment(ProcessEnPiChain(laLessString)); } if (currentSentence == null) { if (headSentence == null) { throw new TpParseException( "Sentence appears to be headed by a fragment. Shouldn't deal with those here.: " + original); } headSentence.LaFragment.Add(fragment); } else { laFragments.Add(fragment); } } } } else { //No la at all. //Simple Sentence return(ProcessSimpleSentence(sentence, punctuation, original)); } if (headSentence == null) { throw new TpParseException("This is not a sentence, should deal with it with it's own parser: " + original); } if (preconditions.Count == 0) { return(headSentence); } Sentence s = new Sentence(diagnostics, preconditions.ToArray(), headSentence); return(s); }