public Sentence(Fragment fragment, Punctuation punctuation, SentenceDiagnostics diagnostics) { LaFragment = new List <Fragment>(); this.degenerateFragment = fragment; this.punctuation = punctuation; this.diagnostics = diagnostics; }
//Suggest that vocatives don't chain. o jan o meli o soweli o => o! jan o! meli o! soweli o! public Sentence(Vocative vocative, Punctuation punctuation, SentenceDiagnostics diagnostics) { LaFragment = new List <Fragment>(); this.degenerateVocative = vocative; this.punctuation = punctuation; this.diagnostics = diagnostics; }
public Sentence(Exclamation exclamation, Punctuation punctuation, SentenceDiagnostics diagnostics) { LaFragment = new List <Fragment>(); this.degenerateExclamation = exclamation; this.punctuation = punctuation; this.diagnostics = diagnostics; }
//Simple Sentences public Sentence(ComplexChain subjects, PredicateList predicates, SentenceDiagnostics diagnostics, SentenceOptionalParts parts = null) { LaFragment = new List <Fragment>(); this.subjects = subjects; //only (*), o, en this.predicates = predicates; //only li, pi, en if (parts != null) { punctuation = parts.Punctuation; tagConjunction = parts.Conjunction; tagQuestion = parts.TagQuestion; headVocatives = parts.HeadVocatives; isHortative = parts.IsHortative; } this.diagnostics = diagnostics; }
// jan li jo e soweli e kili e wawa lon anpa tawa anpa // li jo e soweli e kili e wawa lon anpa tawa anpa public TpPredicate ProcessPredicates(string liPart) { if (String.IsNullOrWhiteSpace(liPart)) { throw new TpParseException("Missing argument, cannot continue"); } if (liPart == "li") { throw new TpParseException("Cannot do anything with just li"); } TokenParserUtils pu = new TokenParserUtils(); Particle verbPhraseParticle; ComplexChain directObjectChain = null; VerbPhrase verbPhrase = null; PrepositionalPhrase[] prepositionalChain = null; ComplexChain nominalPredicate = null; PiPredicate piPredicate = null; //Transitive Path. if (liPart.Split(new[] { ' ', '\t' }).Contains("e")) { string[] eParts = Splitters.SplitOnE(liPart); string[] verbPhraseParts = pu.WordsPunctuationAndCompounds(eParts[0]); //Could contain particles. if (!Token.CheckIsParticle(verbPhraseParts[0])) { throw new TpSyntaxException("uh-oh not a particle: " + verbPhraseParts[0] + " from " + liPart); } verbPhraseParticle = new Particle(verbPhraseParts[0]); //Only process preps in normalized sentences string[] partsWithPreps = null; if (verbPhraseParts.Length > 1) { if (verbPhraseParts.Any(x => x == "pi")) { //nominal predicate nominalPredicate = new ComplexChain(Particles.en, new[] { ProcessPiChain(String.Join(" ", ArrayExtensions.Tail(verbPhraseParts))) }); } else { verbPhrase = VerbPhraseParser(ArrayExtensions.Tail(verbPhraseParts)); } } string verbsMaybePrepositions = eParts[eParts.Length - 1]; if (verbsMaybePrepositions.ContainsCheck("~")) { partsWithPreps = Splitters.SplitOnPrepositions(verbsMaybePrepositions); if (partsWithPreps.Length == 1) { //This is the last e phrase or 1st prep. if (partsWithPreps[0].ContainsCheck("~")) { //That is a prep phrase (is this possible?) } else { eParts[eParts.Length - 1] = partsWithPreps[0]; //No prep phrases. } } } string[] directObjects = ArrayExtensions.Tail(eParts); //List<HeadedPhrase> doNPs = new List<HeadedPhrase>(); List <Chain> doPiChains = new List <Chain>(); //Fancy foot work for when we have e ... ~... & that's all. string[] toUse; if (partsWithPreps != null) { toUse = partsWithPreps.Where(x => x.StartCheck("e ")).ToArray(); directObjects[directObjects.Length - 1] = toUse[0]; toUse = directObjects; } else { toUse = directObjects; } foreach (string directObject in toUse) { if (directObject.Length <= 2) { throw new TpParseException("This is a degenerate e phrase, i.e. it is only e or e space. Missing a ni, e.g. e ni: possibly. ref: " + liPart); } string eFree = directObject.Substring(2); Chain phrase = ProcessPiChain(eFree); doPiChains.Add(phrase); } directObjectChain = new ComplexChain(Particles.e, doPiChains.ToArray()); if (partsWithPreps != null) { prepositionalChain = ProcessPrepositionalPhrases(partsWithPreps).ToArray(); } } else { //Intransitives & Predictates string[] ppParts = Splitters.SplitOnPrepositions(liPart); if (ppParts.Length == 0) //Excect at least "li verb" or "li noun" { throw new TpParseException("Whoa, got " + ppParts.Length + " parts for " + liPart); } if (Punctuation.ContainsPunctuation(ppParts[0])) { throw new TpParseException("This has punctuation, may fail to parse : " + ppParts[0]); } string[] verbPhraseParts = pu.WordsPunctuationAndCompounds(ppParts[0]); if (!Token.CheckIsParticle(verbPhraseParts[0])) { throw new TpSyntaxException("uh-oh not a particle: " + verbPhraseParts[0] + " from " + liPart); } verbPhraseParticle = new Particle(verbPhraseParts[0]); if (verbPhraseParts.Length > 1) { //0:li 1:xxx 2:np... if (verbPhraseParts[1].ContainsCheck("XXXXZiXXXX")) { //Make it go away. Confuses other parsers and will be picked up by container object. verbPhraseParts = ArrayExtensions.Tail(verbPhraseParts); //piPredicate ComplexChain phrase = new ComplexChain(Particles.en, new[] { ProcessPiChain(String.Join(" ", ArrayExtensions.Tail(verbPhraseParts))) }); piPredicate = new PiPredicate(Particles.pi, phrase); } else if (verbPhraseParts.Any(x => x == "pi")) { //nominal predicate nominalPredicate = new ComplexChain(Particles.en, new[] { ProcessPiChain(String.Join(" ", ArrayExtensions.Tail(verbPhraseParts))) } ); } else { verbPhrase = VerbPhraseParser(ArrayExtensions.Tail(verbPhraseParts)); } } string[] prepositions = ArrayExtensions.Tail(ppParts); if (prepositions.Length != 0) { List <PrepositionalPhrase> pChains = new List <PrepositionalPhrase>(); foreach (string pp in prepositions) { string[] phraseParts = pu.WordsPunctuationAndCompounds(pp);//Could contain particles. string preposition = phraseParts[0]; string[] tail = ArrayExtensions.Tail(phraseParts); if (tail.Length == 0) { //uh oh. This is an intransitive verb, like "ni li lon" //HACK: Oh, this is so ugly (still sort of ugly) verbPhrase = new VerbPhrase(new Word(preposition.Replace("~", ""))); //or a noun phrase. continue; } PrepositionalPhrase foundPrepositionalPhrase = new PrepositionalPhrase(new Word(preposition), ProcessEnPiChain(String.Join(" ", tail))); pChains.Add(foundPrepositionalPhrase); } if (pChains.Count > 0) { prepositionalChain = pChains.ToArray(); } else { //We changed our mind about a phrase being a prep phrase. Turned out to be verb phrase or predicate. } } } if (piPredicate != null) { return(new TpPredicate(verbPhraseParticle, piPredicate, prepositionalChain)); } if (nominalPredicate == null) { return(new TpPredicate(verbPhraseParticle, verbPhrase, directObjectChain, prepositionalChain)); } return(new TpPredicate(verbPhraseParticle, nominalPredicate, directObjectChain, prepositionalChain)); }
//This should only operate on normalized sentences. public Sentence ParsedSentenceFactory(string sentence, string original) { diagnostics = new SentenceDiagnostics(original, sentence); if (String.IsNullOrWhiteSpace(sentence)) { return(new Sentence(new NullOrSymbols(original), diagnostics)); // throw new TpParseException("Do not give me a null sentence. Can't tell if null sentence is from input or got lost in translation"); } //This may have already been done by the normalizer, but if not, no problem. if (sentence.Contains(" li pi ")) { sentence = sentence.Replace(" li pi ", " li XXXXZiXXXX "); } ParserUtils.ThrowOnDoubleParticles(sentence, dialect); if (sentence.StartCheck(" ")) { throw new TpParseException("Do not give me a sentence that leads with whitespace, I do not want to do defensive Trim() all day. (Call at least NormalizeExplict)"); } if (sentence.StartCheck("///")) { Comment c = new Comment(sentence); return(new Sentence(c, diagnostics)); } if (sentence.EndCheck(" li") || sentence.EndCheck(" li.")) { throw new TpParseException("Something went wrong, sentence ends with li: " + original); } //Normalization is really expensive. We must stop calling it twice. //sentence = Normalizer.NormalizeText(sentence, config); //Any way to avoid calling this twice? //HACK: This is necessary (otherwise we have to deal with optional quotes starting, ending words) //But we'd rather do this on a sentence level in Discourse. bool startsQuotedSpeech; bool endsQuotedSpeech; if (sentence.StartCheck("«")) { startsQuotedSpeech = true; sentence = sentence.Replace("«", " ").Trim(); } if (sentence.EndCheck("»", "».", "»!") || sentence.EndCheck("»:", "»?")) { endsQuotedSpeech = true; sentence = sentence.Replace("»", " ").Trim(); } //TODO: do something with quoted speech. Big problem #1 it spans multiple sentences if (sentence.EndCheck(" ")) { throw new TpParseException("Normalizer failed to trim: " + original); } //Get the final punctuation out or it will mess up parsing later. string possiblePunctuation = sentence[sentence.Length - 1].ToString(); Punctuation punctuation; if (Punctuation.TryParse(possiblePunctuation, out punctuation)) { sentence = sentence.Substring(0, sentence.Length - 1); } //Square bracket sentence contains all others //[S] //F la [S] //S la [S] //F la S la [S] //Maximal.. maybe later //F la S la F la S => (F la S ) la (F la [S]) //F la S la S la F la S la S //[{F la S} la {S} la {F la S}] la <S> //Just dealing with la fragments Sentence headSentence = null; List <Sentence> preconditions = new List <Sentence>(); string[] laParts = Splitters.SplitOnLa(sentence); //Degenerate sentences. if (laParts[laParts.Length - 1] == "la") { //We have a vocative sentence... Fragment fragment = new Fragment(ProcessEnPiChain(laParts[0])); Sentence fragmentSentence = new Sentence(fragment, punctuation, diagnostics); return(fragmentSentence); } if (laParts.Length > 1) { int i = 0; List <Fragment> laFragments = new List <Fragment>(); Sentence currentSentence = null; foreach (string subSentence in laParts.Reverse()) { i++; if (i == 1) { //Head sentence. // subSentence.StartCheck("la ") ? subSentence.Substring(3) : subSentence string laLessString = subSentence.RemoveLeadingWholeWord("la"); headSentence = ProcessSimpleSentence(laLessString, punctuation, original); continue; //Not dealing with "kin la!" } //Fragments & preconditions const string liFinder = @"\bli\b"; Match m = Regex.Match(subSentence, liFinder); if (m.Success) { //This is a sentence //Maybe should recurse. string laLessString = subSentence.RemoveLeadingWholeWord("la"); currentSentence = ProcessSimpleSentence(laLessString, null, original); preconditions.Add(currentSentence); } else { string laLessString = subSentence.RemoveLeadingWholeWord("la"); Fragment fragment; if (laLessString.StartCheck("~")) { string[] parts = Splitters.SplitOnPrepositions(laLessString); fragment = new Fragment(ProcessPrepositionalPhrases(parts).ToArray()); } else { fragment = new Fragment(ProcessEnPiChain(laLessString)); } if (currentSentence == null) { if (headSentence == null) { throw new TpParseException( "Sentence appears to be headed by a fragment. Shouldn't deal with those here.: " + original); } headSentence.LaFragment.Add(fragment); } else { laFragments.Add(fragment); } } } } else { //No la at all. //Simple Sentence return(ProcessSimpleSentence(sentence, punctuation, original)); } if (headSentence == null) { throw new TpParseException("This is not a sentence, should deal with it with it's own parser: " + original); } if (preconditions.Count == 0) { return(headSentence); } Sentence s = new Sentence(diagnostics, preconditions.ToArray(), headSentence); return(s); }
public Sentence ProcessSimpleSentence(string sentence, Punctuation punctuation, string original) { //Think this is causing a bug. ////HACK: Still need a better way to deal with quotes. //if (sentence.EndCheck("»") || sentence.EndCheck("«")) //{ // sentence = sentence.Substring(0, sentence.Length - 1); //} //Comment? Get out of here! if (sentence.StartCheck("///")) { Comment c = new Comment(sentence); return(new Sentence(c, diagnostics)); } //Simple exclamation! Get out of here! if (Exclamation.IsExclamation(sentence)) { return(new Sentence(new Exclamation(new HeadedPhrase(new Word(sentence))), punctuation, new SentenceDiagnostics(original, sentence))); } List <Vocative> headVocatives = null; //jan Mato o, ale li pona. Head vocative! //kin la o moku. //not a vocative (hopefully dealt with elsewhere) //jan Mato o moku! //Head vocative, & imperative, with 2nd o discarded //jan Mato o o moku! //Head vocative, & imperative, with 2nd o discarded if (sentence.ContainsCheck(" o o "))//Explicit vocative & imperative { //Okay, we know exactly when the head vocatives end. headVocatives = new List <Vocative>(); string justHeadVocatives = sentence.Substring(0, sentence.IndexOf(" o o ", StringComparison.Ordinal)); //Process head vocatives. ProcessHeadVocatives(Splitters.SplitOnO(justHeadVocatives), headVocatives, allAreVocatives: true); //BUG: Add the dummy! (And it still doesn't work!) sentence = "jan Sanwan o " + sentence.Substring(sentence.IndexOf(" o o ", StringComparison.Ordinal) + 5); } //Starts with o, then we have imperative & no head vocatives. bool endsOrStartsWithO = sentence.StartCheck("o ") && sentence.EndCheck(" o"); if (!endsOrStartsWithO) { //jan So o! (We already deal with degenerate vocative sentences elsewhere) //jan So o sina li nasa. //jan So o nasa! //jan So o mi mute o nasa. <-- This is the problem. //These could be vocatives or imperatives. if (sentence.ContainsCheck(" o ", " o,", ",o ") && sentence.ContainsCheck(" li ")) { headVocatives = new List <Vocative>(); ProcessHeadVocatives(Splitters.SplitOnO(sentence), headVocatives, allAreVocatives: false); //int firstLi = sentence.IndexOf(" li "); int lastO = sentence.LastIndexOf(" o ", StringComparison.Ordinal); if (lastO < 0) { lastO = sentence.LastIndexOf(" o,", StringComparison.Ordinal); } sentence = sentence.Substring(lastO + 2); } } //Process tag conjunctions and tag questions Particle conjunction = null; TagQuestion tagQuestion = null; if (sentence.StartCheck("taso ")) { conjunction = Particles.taso; sentence = sentence.Substring(5); } else if (sentence.StartCheck("anu ")) { conjunction = Particles.anu; sentence = sentence.Substring(4); } else if (sentence.StartCheck("en ")) { //Well, either parse it or throw. Otherwise, this gets skipped. //is this legal? conjunction = Particles.en; sentence = sentence.Substring(3); } else if (sentence.StartCheck("ante ")) //never seen it. { conjunction = Particles.ante; sentence = sentence.Substring(5); } //Should already have ? stripped off if (sentence.EndsWith(" anu seme")) { tagQuestion = new TagQuestion(); sentence = sentence.Substring(0, sentence.LastIndexOf(" anu seme", StringComparison.Ordinal)); } if (sentence.EndCheck(" li")) { throw new TpParseException("Something went wrong-- sentenc ends with li. " + sentence); } if (sentence.StartsOrContainsOrEnds("la")) { throw new TpParseException("If it contains a la, anywhere, it isn't a simple sentence. " + sentence); } bool isHortative = false; bool isImperative = false; if (sentence.StartCheck("o ") && sentence.ContainsCheck(" li ")) { //o mi mute li moku isHortative = true; sentence = sentence.RemoveLeadingWholeWord("o"); } if (sentence.StartCheck("o ") && !sentence.ContainsCheck(" li ")) { //o pana e pan isImperative = true; //sentence = sentence.RemoveLeadingWholeWord("o"); } // someting o ==> vocative string[] liParts = Splitters.SplitOnLiOrO(sentence); if (liParts.Length == 1 && Exclamation.IsExclamation(liParts[0])) { //HACK: Duplicate code. & it only deals with a single final puncution mark. string possiblePunctuation = sentence[sentence.Length - 1].ToString(); if (Punctuation.TryParse(possiblePunctuation, out punctuation)) { sentence = sentence.Substring(0, sentence.Length - 1); } //The whole thing is o! (or pakala! or the like) //pona a! a a a! ike a! TokenParserUtils tpu = new TokenParserUtils(); Word[] tokes = tpu.ValidWords(sentence); HeadedPhrase parts = new HeadedPhrase(tokes[0], new WordSet(ArrayExtensions.Tail(tokes))); bool modifiersAreA = true; foreach (Word w in parts.Modifiers) { if (w == "a") { continue; //peculiar to exclamations & repeats. } if (w == "kin") { continue; //modifies just about anything } modifiersAreA = false; } if (modifiersAreA) { Exclamation exclamation = new Exclamation(parts); Sentence s = new Sentence(exclamation, punctuation, diagnostics); return(s); } } //Degenerate sentences. if (liParts[liParts.Length - 1].Trim(new char[] { ',', '«', '»', '!', ' ' }) == "o") { //We have a vocative sentence... Vocative vocative = new Vocative(ProcessEnPiChain(liParts[0])); Sentence s = new Sentence(vocative, punctuation, diagnostics); return(s); } string subjects = liParts[0].Trim(); ComplexChain subjectChain = null; int startAt = 1; //slot 0 is normally a subject if (subjects.Contains("«")) { int foo = 3; } if (subjects.StartCheck("o ") || subjects.StartCheck("«o ")) { //This is a verb phrase with implicit subjects! startAt = 0; } else { subjectChain = ProcessEnPiChain(subjects); } PredicateList verbPhrases = new PredicateList(); for (int i = startAt; i < liParts.Length; i++) { string predicate = liParts[i].Trim(); verbPhrases.Add(ProcessPredicates(predicate)); } //Head or complete sentence. Sentence parsedSentence = new Sentence(subjectChain, verbPhrases, diagnostics, new SentenceOptionalParts { Conjunction = conjunction, //Etc Punctuation = punctuation, IsHortative = isHortative, TagQuestion = tagQuestion, HeadVocatives = headVocatives != null ? headVocatives.ToArray() : null }); return(parsedSentence); }
public string ToString(string format, IFormatProvider formatProvider) { if (format == null) { format = "g"; } if (degenerateComment != null) { //We don't do anything fancy. //(Maybe suppress?) return(degenerateComment.ToString(format, formatProvider)); } List <string> sb = new List <string>(); string spaceJoined; if (preconditions != null) { foreach (Sentence precondition in preconditions) { sb.AddRange(precondition.ToTokenList(format, formatProvider)); } sb.Add(Particles.la.ToString(format, formatProvider)); sb.AddRange(conclusion.ToTokenList(format, formatProvider)); if (sb[sb.Count() - 1] == "li") { throw new TpSyntaxException("Something went wrong, sentence ends in li"); } spaceJoined = sb.SpaceJoin(format); //Correct punctuation BasicTypes.Punctuation normalizedPunctuation = null; if (Contains(Words.seme)) { normalizedPunctuation = new Punctuation("?"); } if (conclusion.punctuation != null) { normalizedPunctuation = conclusion.punctuation; } //Default if (normalizedPunctuation == null) { normalizedPunctuation = new Punctuation("."); } spaceJoined = spaceJoined + normalizedPunctuation.ToString(); } else { //Simple sentence sb = ToTokenList(format, formatProvider); if (sb[sb.Count() - 1] == "li") { throw new TpSyntaxException("Something went wrong, sentence ends in li"); } spaceJoined = sb.SpaceJoin(format); //Correct punctuation BasicTypes.Punctuation normalizedPunctuation = null; if (Contains(Words.seme)) { normalizedPunctuation = new Punctuation("?"); } if (punctuation != null) { normalizedPunctuation = punctuation; } //Default (depends on if we have a parent?) //if (normalizedPunctuation == null) //{ // normalizedPunctuation = new Punctuation("."); //} if (normalizedPunctuation != null) { spaceJoined = spaceJoined + normalizedPunctuation.ToString(); } if (punctuation != null) { spaceJoined = spaceJoined + punctuation;//format, formatProvider } } //This happens in x la x la x. sentences. if (spaceJoined.EndCheck("..") || spaceJoined.EndCheck("??") || spaceJoined.EndCheck("::") || spaceJoined.EndCheck("!!")) { //HACK: WHY?! spaceJoined = spaceJoined.Substring(0, spaceJoined.Length - 1); } if (format != "bs") { string result = Denormalize(spaceJoined, format, formatProvider); while (result.ContainsCheck(" , ")) { result = result.Replace(" , ", ", "); } spaceJoined = result; } if (format == "html") { if (spaceJoined.ContainsCheck(" <span class=\"prep\">,")) { spaceJoined = spaceJoined.Replace(" <span class=\"prep\">,", "<span class=\"prep\">,"); } } return(spaceJoined); }