protected virtual void Write(Trace trace, bool printTraceInputs) { switch (trace.Type) { case Trace.TraceType.WORD_ANALYSIS: WordAnalysisTrace waTrace = trace as WordAnalysisTrace; m_xmlWriter.WriteStartElement(waTrace.GetType().Name); m_xmlWriter.WriteElementString("InputWord", waTrace.InputWord); break; case Trace.TraceType.STRATUM_ANALYSIS: StratumAnalysisTrace saTrace = trace as StratumAnalysisTrace; m_xmlWriter.WriteStartElement(saTrace.GetType().Name + (saTrace.IsInput ? "In" : "Out")); Write("Stratum", saTrace.Stratum); Write(saTrace.IsInput ? "Input" : "Output", saTrace.Analysis); break; case Trace.TraceType.STRATUM_SYNTHESIS: StratumSynthesisTrace ssTrace = trace as StratumSynthesisTrace; m_xmlWriter.WriteStartElement(ssTrace.GetType().Name + (ssTrace.IsInput ? "In" : "Out")); Write("Stratum", ssTrace.Stratum); Write(ssTrace.IsInput ? "Input" : "Output", ssTrace.Synthesis); break; case Trace.TraceType.LEX_LOOKUP: LexLookupTrace llTrace = trace as LexLookupTrace; m_xmlWriter.WriteStartElement(llTrace.GetType().Name); m_xmlWriter.WriteElementString("Stratum", llTrace.Stratum.Description); m_xmlWriter.WriteElementString("Shape", llTrace.Stratum.CharacterDefinitionTable.ToRegexString(llTrace.Shape, ModeType.ANALYSIS, true)); break; case Trace.TraceType.WORD_SYNTHESIS: WordSynthesisTrace wsTrace = trace as WordSynthesisTrace; m_xmlWriter.WriteStartElement(wsTrace.GetType().Name); Write("RootAllomorph", wsTrace.RootAllomorph); m_xmlWriter.WriteStartElement("MorphologicalRules"); foreach (MorphologicalRule rule in wsTrace.MorphologicalRules) { Write("MorphologicalRule", rule); } m_xmlWriter.WriteEndElement(); // MorphologicalRules m_xmlWriter.WriteElementString("RealizationalFeatures", wsTrace.RealizationalFeatures.ToString()); break; case Trace.TraceType.PHONOLOGICAL_RULE_ANALYSIS: PhonologicalRuleAnalysisTrace paTrace = trace as PhonologicalRuleAnalysisTrace; m_xmlWriter.WriteStartElement(paTrace.GetType().Name); Write("PhonologicalRule", paTrace.Rule); if (printTraceInputs) { Write("Input", paTrace.Input); } Write("Output", paTrace.Output); break; case Trace.TraceType.PHONOLOGICAL_RULE_SYNTHESIS: PhonologicalRuleSynthesisTrace psTrace = trace as PhonologicalRuleSynthesisTrace; m_xmlWriter.WriteStartElement(psTrace.GetType().Name); Write("PhonologicalRule", psTrace.Rule); if (printTraceInputs) { Write("Input", psTrace.Input); } Write("Output", psTrace.Output); break; case Trace.TraceType.PHONOLOGICAL_RULE_SYNTHESIS_REQUIREDPOS: PhonologicalRuleSynthesisRequiredPOSTrace psposTrace = trace as PhonologicalRuleSynthesisRequiredPOSTrace; m_xmlWriter.WriteStartElement(psposTrace.GetType().Name); Write("PhonologicalRuleStemPOS", psposTrace.PartOfSpeech); m_xmlWriter.WriteStartElement("PhonologicalRuleRequiredPOSes"); foreach (var partOfSpeech in psposTrace.RequiredPOSs) { Write("PhonologicalRuleRequiredPOS", partOfSpeech); } m_xmlWriter.WriteEndElement(); break; case Trace.TraceType.PHONOLOGICAL_RULE_SYNTHESIS_MPRFEATURES: PhonologicalRuleSynthesisMPRFeaturesTrace psmprTrace = trace as PhonologicalRuleSynthesisMPRFeaturesTrace; m_xmlWriter.WriteStartElement(psmprTrace.GetType().Name); string type = "required"; if (psmprTrace.MPRFeatureType == PhonologicalRuleSynthesisMPRFeaturesTrace.PhonologicalRuleSynthesisMPRFeaturesTraceType.EXCLUDED) { type = "excluded"; } m_xmlWriter.WriteAttributeString("type", type); m_xmlWriter.WriteStartElement("PhonologicalRuleMPRFeatures"); foreach (var mprFeature in psmprTrace.MPRFeatures) { Write("PhonologicalRuleMPRFeature", mprFeature); } m_xmlWriter.WriteEndElement(); m_xmlWriter.WriteStartElement("PhonologicalRuleConstrainingMPRFeatrues"); foreach (var mprFeature in psmprTrace.ConstrainingMPRFeatures) { Write("PhonologicalRuleMPRFeature", mprFeature); } m_xmlWriter.WriteEndElement(); break; case Trace.TraceType.TEMPLATE_ANALYSIS: TemplateAnalysisTrace taTrace = trace as TemplateAnalysisTrace; m_xmlWriter.WriteStartElement(taTrace.GetType().Name + (taTrace.IsInput ? "In" : "Out")); Write("AffixTemplate", taTrace.Template); Write(taTrace.IsInput ? "Input" : "Output", taTrace.Analysis); break; case Trace.TraceType.TEMPLATE_SYNTHESIS: TemplateSynthesisTrace tsTrace = trace as TemplateSynthesisTrace; m_xmlWriter.WriteStartElement(tsTrace.GetType().Name + (tsTrace.IsInput ? "In" : "Out")); Write("AffixTemplate", tsTrace.Template); Write(tsTrace.IsInput ? "Input" : "Output", tsTrace.Synthesis); break; case Trace.TraceType.MORPHOLOGICAL_RULE_ANALYSIS: MorphologicalRuleAnalysisTrace maTrace = trace as MorphologicalRuleAnalysisTrace; m_xmlWriter.WriteStartElement(maTrace.GetType().Name); Write("MorphologicalRule", maTrace.Rule); if (maTrace.RuleAllomorph != null) { Write("RuleAllomorph", maTrace.RuleAllomorph); } if (printTraceInputs) { Write("Input", maTrace.Input); } Write("Output", maTrace.Output); break; case Trace.TraceType.MORPHOLOGICAL_RULE_SYNTHESIS: MorphologicalRuleSynthesisTrace msTrace = trace as MorphologicalRuleSynthesisTrace; m_xmlWriter.WriteStartElement(msTrace.GetType().Name); Write("MorphologicalRule", msTrace.Rule); if (msTrace.RuleAllomorph != null) { Write("RuleAllomorph", msTrace.RuleAllomorph); } if (printTraceInputs) { Write("Input", msTrace.Input); } Write("Output", msTrace.Output); break; case Trace.TraceType.BLOCKING: BlockingTrace bTrace = trace as BlockingTrace; m_xmlWriter.WriteStartElement(bTrace.GetType().Name); Write("BlockingEntry", bTrace.BlockingEntry); break; case Trace.TraceType.REPORT_SUCCESS: ReportSuccessTrace rsTrace = trace as ReportSuccessTrace; m_xmlWriter.WriteStartElement(rsTrace.GetType().Name); Write("Result", rsTrace.Output); break; } foreach (Trace child in trace.Children) { Write(child, printTraceInputs); } m_xmlWriter.WriteEndElement(); }
/// <summary> /// Does the real work of morphing the specified word. /// </summary> /// <param name="word">The word.</param> /// <param name="prev">The previous word.</param> /// <param name="next">The next word.</param> /// <param name="trace">The trace.</param> /// <returns>All valid word synthesis records.</returns> ICollection <WordSynthesis> MorphAndLookupToken(string word, string prev, string next, out WordAnalysisTrace trace, string[] selectTraceMorphs) { // convert the word to its phonetic shape PhoneticShape input = SurfaceStratum.CharacterDefinitionTable.ToPhoneticShape(word, ModeType.ANALYSIS); // if word contains invalid segments, the char def table will return null if (input == null) { MorphException me = new MorphException(MorphException.MorphErrorType.INVALID_SHAPE, this, string.Format(HCStrings.kstidInvalidWord, word, SurfaceStratum.CharacterDefinitionTable.ID)); me.Data["shape"] = word; me.Data["charDefTable"] = SurfaceStratum.CharacterDefinitionTable.ID; throw me; } // create the root of the trace tree trace = new WordAnalysisTrace(word, input.Clone()); Set <WordSynthesis> candidates = new Set <WordSynthesis>(); Set <WordAnalysis> inAnalysis = new Set <WordAnalysis>(); Set <WordAnalysis> outAnalysis = new Set <WordAnalysis>(); inAnalysis.Add(new WordAnalysis(input, SurfaceStratum, trace)); // Unapply rules for (int i = m_strata.Count - 1; i >= 0; i--) { outAnalysis.Clear(); foreach (WordAnalysis wa in inAnalysis) { if (m_traceStrataAnalysis) { // create the stratum analysis input trace record StratumAnalysisTrace stratumTrace = new StratumAnalysisTrace(m_strata[i], true, wa.Clone()); wa.CurrentTrace.AddChild(stratumTrace); } foreach (WordAnalysis outWa in m_strata[i].Unapply(wa, candidates, selectTraceMorphs)) { // promote each analysis to the next stratum if (i != 0) { outWa.Stratum = m_strata[i - 1]; } if (m_traceStrataAnalysis) { // create the stratum analysis output trace record for the output word synthesis outWa.CurrentTrace.AddChild(new StratumAnalysisTrace(m_strata[i], false, outWa.Clone())); } outAnalysis.Add(outWa); } } inAnalysis.Clear(); inAnalysis.AddMany(outAnalysis); } Set <WordSynthesis> allValidSyntheses = new Set <WordSynthesis>(); // Apply rules for each candidate entry foreach (WordSynthesis candidate in candidates) { Set <WordSynthesis> inSynthesis = new Set <WordSynthesis>(); Set <WordSynthesis> outSynthesis = new Set <WordSynthesis>(); for (int i = 0; i < m_strata.Count; i++) { // start applying at the stratum that this lex entry belongs to if (m_strata[i] == candidate.Root.Stratum) { inSynthesis.Add(candidate); } outSynthesis.Clear(); foreach (WordSynthesis cur in inSynthesis) { if (m_traceStrataSynthesis) { // create the stratum synthesis input trace record StratumSynthesisTrace stratumTrace = new StratumSynthesisTrace(m_strata[i], true, cur.Clone()); cur.CurrentTrace.AddChild(stratumTrace); } foreach (WordSynthesis outWs in m_strata[i].Apply(cur)) { // promote the word synthesis to the next stratum if (i != m_strata.Count - 1) { outWs.Stratum = m_strata[i + 1]; } if (m_traceStrataSynthesis) { // create the stratum synthesis output trace record for the output analysis outWs.CurrentTrace.AddChild(new StratumSynthesisTrace(m_strata[i], false, outWs.Clone())); } outSynthesis.Add(outWs); } } inSynthesis.Clear(); inSynthesis.AddMany(outSynthesis); } foreach (WordSynthesis ws in outSynthesis) { if (ws.IsValid) { allValidSyntheses.Add(ws); } } } Set <WordSynthesis> results = new Set <WordSynthesis>(); // sort the resulting syntheses according to the order of precedence of each allomorph in // their respective morphemes List <WordSynthesis> sortedSyntheses = new List <WordSynthesis>(allValidSyntheses); sortedSyntheses.Sort(); WordSynthesis prevValidSynthesis = null; foreach (WordSynthesis cur in sortedSyntheses) { // enforce the disjunctive property of allomorphs by ensuring that this word synthesis // has the highest order of precedence for its allomorphs while also allowing for free // fluctuation, also check that the phonetic shape matches the original input word if ((prevValidSynthesis == null || AreAllomorphsNondisjunctive(cur, prevValidSynthesis)) && SurfaceStratum.CharacterDefinitionTable.IsMatch(word, cur.Shape)) { if (m_traceSuccess) { // create the report a success output trace record for the output analysis cur.CurrentTrace.AddChild(new ReportSuccessTrace(cur)); } // do not add to the result if it has the same root, shape, and morphemes as another result bool duplicate = false; foreach (WordSynthesis ws in results) { if (cur.Duplicates(ws)) { duplicate = true; break; } } if (!duplicate) { results.Add(cur); } } prevValidSynthesis = cur; } return(results); }
/// <summary> /// Does the real work of morphing the specified word. /// </summary> /// <param name="word">The word.</param> /// <param name="prev">The previous word.</param> /// <param name="next">The next word.</param> /// <param name="trace">The trace.</param> /// <returns>All valid word synthesis records.</returns> ICollection<WordSynthesis> MorphAndLookupToken(string word, string prev, string next, out WordAnalysisTrace trace) { // convert the word to its phonetic shape PhoneticShape input = SurfaceStratum.CharacterDefinitionTable.ToPhoneticShape(word, ModeType.ANALYSIS); // if word contains invalid segments, the char def table will return null if (input == null) { MorphException me = new MorphException(MorphException.MorphErrorType.INVALID_SHAPE, this, string.Format(HCStrings.kstidInvalidWord, word, SurfaceStratum.CharacterDefinitionTable.ID)); me.Data["shape"] = word; me.Data["charDefTable"] = SurfaceStratum.CharacterDefinitionTable.ID; throw me; } // create the root of the trace tree trace = new WordAnalysisTrace(word, input.Clone()); Set<WordSynthesis> candidates = new Set<WordSynthesis>(); Set<WordAnalysis> inAnalysis = new Set<WordAnalysis>(); Set<WordAnalysis> outAnalysis = new Set<WordAnalysis>(); inAnalysis.Add(new WordAnalysis(input, SurfaceStratum, trace)); // Unapply rules for (int i = m_strata.Count - 1; i >= 0; i--) { outAnalysis.Clear(); foreach (WordAnalysis wa in inAnalysis) { if (m_traceStrataAnalysis) { // create the stratum analysis input trace record StratumAnalysisTrace stratumTrace = new StratumAnalysisTrace(m_strata[i], true, wa.Clone()); wa.CurrentTrace.AddChild(stratumTrace); } foreach (WordAnalysis outWa in m_strata[i].Unapply(wa, candidates)) { // promote each analysis to the next stratum if (i != 0) outWa.Stratum = m_strata[i - 1]; if (m_traceStrataAnalysis) // create the stratum analysis output trace record for the output word synthesis outWa.CurrentTrace.AddChild(new StratumAnalysisTrace(m_strata[i], false, outWa.Clone())); outAnalysis.Add(outWa); } } inAnalysis.Clear(); inAnalysis.AddMany(outAnalysis); } Set<WordSynthesis> allValidSyntheses = new Set<WordSynthesis>(); // Apply rules for each candidate entry foreach (WordSynthesis candidate in candidates) { Set<WordSynthesis> inSynthesis = new Set<WordSynthesis>(); Set<WordSynthesis> outSynthesis = new Set<WordSynthesis>(); for (int i = 0; i < m_strata.Count; i++) { // start applying at the stratum that this lex entry belongs to if (m_strata[i] == candidate.Root.Stratum) inSynthesis.Add(candidate); outSynthesis.Clear(); foreach (WordSynthesis cur in inSynthesis) { if (m_traceStrataSynthesis) { // create the stratum synthesis input trace record StratumSynthesisTrace stratumTrace = new StratumSynthesisTrace(m_strata[i], true, cur.Clone()); cur.CurrentTrace.AddChild(stratumTrace); } foreach (WordSynthesis outWs in m_strata[i].Apply(cur)) { // promote the word synthesis to the next stratum if (i != m_strata.Count - 1) outWs.Stratum = m_strata[i + 1]; if (m_traceStrataSynthesis) // create the stratum synthesis output trace record for the output analysis outWs.CurrentTrace.AddChild(new StratumSynthesisTrace(m_strata[i], false, outWs.Clone())); outSynthesis.Add(outWs); } } inSynthesis.Clear(); inSynthesis.AddMany(outSynthesis); } foreach (WordSynthesis ws in outSynthesis) { if (ws.IsValid) allValidSyntheses.Add(ws); } } Set<WordSynthesis> results = new Set<WordSynthesis>(); // sort the resulting syntheses according to the order of precedence of each allomorph in // their respective morphemes List<WordSynthesis> sortedSyntheses = new List<WordSynthesis>(allValidSyntheses); sortedSyntheses.Sort(); WordSynthesis prevValidSynthesis = null; foreach (WordSynthesis cur in sortedSyntheses) { // enforce the disjunctive property of allomorphs by ensuring that this word synthesis // has the highest order of precedence for its allomorphs, also check that the phonetic // shape matches the original input word if ((prevValidSynthesis == null || !cur.Morphs.SameMorphemes(prevValidSynthesis.Morphs)) && SurfaceStratum.CharacterDefinitionTable.IsMatch(word, cur.Shape)) { if (m_traceSuccess) // create the report a success output trace record for the output analysis cur.CurrentTrace.AddChild(new ReportSuccessTrace(cur)); // do not add to the result if it has the same root, shape, and morphemes as another result bool duplicate = false; foreach (WordSynthesis ws in results) { if (cur.Duplicates(ws)) { duplicate = true; break; } } if (!duplicate) { results.Add(cur); } } prevValidSynthesis = cur; } return results; }