/// <summary> /// Initializes a new instance of the <see cref="WordAnalysis"/> class. /// </summary> /// <param name="shape">The shape.</param> /// <param name="stratum"></param> internal WordAnalysis(PhoneticShape shape, Stratum stratum) { m_shape = shape; m_pos = new HCObjectSet<PartOfSpeech>(); m_mrules = new List<MorphologicalRule>(); m_mrulesUnapplied = new Dictionary<MorphologicalRule, int>(); m_rzFeatures = new FeatureValues(); m_stratum = stratum; }
/// <summary> /// Copy constructor. /// </summary> /// <param name="wa">The word analysis.</param> public WordAnalysis(WordAnalysis wa) { m_shape = wa.m_shape.Clone(); m_pos = new HCObjectSet<PartOfSpeech>(wa.m_pos); m_rootAllomorph = wa.m_rootAllomorph; if (wa.m_nonHead != null) m_nonHead = wa.m_nonHead.Clone(); m_mrules = new List<MorphologicalRule>(wa.m_mrules); m_mrulesUnapplied = new Dictionary<MorphologicalRule, int>(wa.m_mrulesUnapplied); m_rzFeatures = wa.m_rzFeatures.Clone(); m_curTrace = wa.m_curTrace; m_stratum = wa.m_stratum; }
bool UnapplyIterative(PhoneticShape input, Direction dir) { bool unapplied = false; PhoneticShapeNode node = input.GetFirst(dir); Match match; // iterate thru all matches while (FindNextMatchRHS(node, dir, out match)) { // unapply the subrule IList<PhoneticShapeNode> nodes = match.EntireMatch; UnapplyRHS(dir, nodes, match.VariableValues); unapplied = true; node = nodes[nodes.Count - 1].GetNext(dir); } return unapplied; }
/// <summary> /// Unapplies this subrule to specified input phonetic shape. /// </summary> /// <param name="input">The input phonetic shape.</param> public void Unapply(PhoneticShape input) { if (Type == ChangeType.NARROW) { int i = 0; // because deletion rules are self-opaquing it is unclear how many segments // could have been deleted during synthesis, so we unapply deletion rules // multiple times. Unfortunately, this could create a situation where the // deletion rule is unapplied infinitely, so we put an upper limit on the // number of times a deletion rule can unapply. while (i <= m_rule.Morpher.DelReapplications && UnapplyNarrow(input)) i++; } else { Direction dir = Direction.LEFT; switch (m_rule.m_multApplication) { case MultAppOrder.LR_ITERATIVE: case MultAppOrder.SIMULTANEOUS: // simultaneous subrules could be unapplied left-to-right or // right-to-left, we arbitrarily choose left-to-right dir = Direction.LEFT; break; case MultAppOrder.RL_ITERATIVE: dir = Direction.RIGHT; break; } // only simultaneous subrules can be self-opaquing if (IsSelfOpaquing) // unapply the subrule until it no longer makes a change while (UnapplyIterative(input, dir)) { } else UnapplyIterative(input, dir); } }
PhoneticShape UnapplyRHS(Match match) { PhoneticShape output = new PhoneticShape(); output.Add(new Margin(Direction.LEFT)); // iterate thru LHS partitions, copying the matching partition from the // input to the output for (int i = 0; i < m_transform.PartitionCount; i++) m_transform.Unapply(match, i, output); output.Add(new Margin(Direction.RIGHT)); return output; }
/// <summary> /// Determines whether the specified word matches the specified phonetic shape. /// All unused IPA modifiers in the word are ignored when attempting to match /// the phonetic shape. /// </summary> /// <param name="word">The word.</param> /// <param name="shape">The phonetic shape.</param> /// <returns> /// <c>true</c> if the word matches the shape, otherwise <c>false</c>. /// </returns> public override bool IsMatch(string word, PhoneticShape shape) { string tword; if (!StripUnusedChars(word, out tword)) return false; return base.IsMatch(tword, shape); }
/// <summary> /// Converts the specified string to a phonetic shape. It matches the longest possible segment /// first. /// </summary> /// <param name="str">The string.</param> /// <param name="mode">The mode.</param> /// <returns>The phonetic shape, <c>null</c> if the string contains invalid segments.</returns> public PhoneticShape ToPhoneticShape(string str, ModeType mode) { PhoneticShape ps = new PhoneticShape(); int i = 0; ps.Add(new Margin(Direction.LEFT)); while (i < str.Length) { bool match = false; for (int j = str.Length - i; j > 0; j--) { string s = str.Substring(i, j); PhoneticShapeNode node = GetPhoneticShapeNode(s, mode); if (node != null) { try { ps.Add(node); } catch (InvalidOperationException) { return null; } i += j; match = true; break; } } if (!match) { string sPhonemesFoundSoFar = ToRegexString(ps, ModeType.ANALYSIS, true); var missing = new MissingPhoneticShapeException(sPhonemesFoundSoFar, i); throw missing; } } ps.Add(new Margin(Direction.RIGHT)); return ps; }
/// <summary> /// Initializes a new instance of the <see cref="LexLookupTrace"/> class. /// </summary> /// <param name="stratum">The stratum.</param> /// <param name="shape">The shape.</param> internal LexLookupTrace(Stratum stratum, PhoneticShape shape) { m_stratum = stratum; m_shape = shape; }
void ApplyIterative(PhoneticShape input, Direction dir, List<Subrule> subrules) { Match match; PhoneticShapeNode node = input.GetFirst(dir); // iterate thru each LHS match while (FindNextMatchLHS(node, dir, out match)) { IList<PhoneticShapeNode> nodes = match.EntireMatch; VariableValues instantiatedVars = match.VariableValues; bool matched = false; // check each subrule's environment foreach (Subrule sr in subrules) { if (m_lhs.Count == 0 ? sr.MatchEnvEmpty(nodes[0], dir, ModeType.SYNTHESIS, instantiatedVars) : sr.MatchEnvNonempty(nodes, dir, ModeType.SYNTHESIS, instantiatedVars)) { sr.ApplyRHS(dir, nodes, instantiatedVars); matched = true; break; } } if (matched) node = nodes[nodes.Count - 1].GetNext(dir); else node = nodes[0].GetNext(dir); } }
/// <summary> /// Generates a string representation of the specified phonetic shape. /// </summary> /// <param name="shape">The phonetic shape.</param> /// <param name="mode">The mode.</param> /// <param name="includeBdry">if <c>true</c> boundary markers will be included in the /// string representation.</param> /// <returns>The string representation.</returns> public string ToString(PhoneticShape shape, ModeType mode, bool includeBdry) { StringBuilder sb = new StringBuilder(); foreach (PhoneticShapeNode node in shape) { switch (node.Type) { case PhoneticShapeNode.NodeType.SEGMENT: Segment seg = node as Segment; IList<SegmentDefinition> segDefs = GetMatchingSegmentDefinitions(seg, mode); if (segDefs.Count > 0) sb.Append(segDefs[0].StrRep); break; case PhoneticShapeNode.NodeType.BOUNDARY: if (includeBdry) { Boundary bdry = node as Boundary; sb.Append(bdry.BoundaryDefinition.StrRep); } break; } } return sb.ToString(); }
/// <summary> /// Converts the specified phonetic shape to a valid regular expression string. Regular expressions /// formatted for display purposes are NOT guaranteed to compile. /// </summary> /// <param name="shape">The phonetic shape.</param> /// <param name="mode">The mode.</param> /// <param name="displayFormat">if <c>true</c> the result will be formatted for display, otherwise /// it will be formatted for compilation.</param> /// <returns>The regular expression string.</returns> public string ToRegexString(PhoneticShape shape, ModeType mode, bool displayFormat) { StringBuilder sb = new StringBuilder(); foreach (PhoneticShapeNode node in shape) { switch (node.Type) { case PhoneticShapeNode.NodeType.SEGMENT: Segment seg = node as Segment; IList<SegmentDefinition> segDefs = GetMatchingSegmentDefinitions(seg, mode); if (segDefs.Count > 0) { if (segDefs.Count > 1) sb.Append(displayFormat ? "[" : "("); for (int i = 0; i < segDefs.Count; i++) { if (segDefs[i].StrRep.Length > 1) sb.Append("("); if (displayFormat) sb.Append(segDefs[i].StrRep); else sb.Append(Regex.Escape(segDefs[i].StrRep)); if (segDefs[i].StrRep.Length > 1) sb.Append(")"); if (i < segDefs.Count - 1 && !displayFormat) sb.Append("|"); } if (segDefs.Count > 1) sb.Append(displayFormat ? "]" : ")"); if (seg.IsOptional) sb.Append("?"); } break; case PhoneticShapeNode.NodeType.BOUNDARY: Boundary bdry = node as Boundary; if (bdry.BoundaryDefinition.StrRep.Length > 1) sb.Append("("); if (displayFormat) sb.Append(bdry.BoundaryDefinition.StrRep); else sb.Append(Regex.Escape(bdry.BoundaryDefinition.StrRep)); if (bdry.BoundaryDefinition.StrRep.Length > 1) sb.Append(")"); sb.Append("?"); break; case PhoneticShapeNode.NodeType.MARGIN: if (!displayFormat) { Margin margin = node as Margin; sb.Append(margin.MarginType == Direction.LEFT ? "^" : "$"); } break; } } return sb.ToString(); }
/// <summary> /// Converts the specified string to a phonetic shape. It matches the longest possible segment /// first. /// </summary> /// <param name="str">The string.</param> /// <param name="mode">The mode.</param> /// <returns>The phonetic shape, <c>null</c> if the string contains invalid segments.</returns> public PhoneticShape ToPhoneticShape(string str, ModeType mode) { PhoneticShape ps = new PhoneticShape(); int i = 0; ps.Add(new Margin(Direction.LEFT)); while (i < str.Length) { bool match = false; for (int j = str.Length - i; j > 0; j--) { string s = str.Substring(i, j); PhoneticShapeNode node = GetPhoneticShapeNode(s, mode); if (node != null) { try { ps.Add(node); } catch (InvalidOperationException) { return null; } i += j; match = true; break; } } if (!match) return null; } ps.Add(new Margin(Direction.RIGHT)); return ps; }
void Untruncate(PhoneticPattern lhs, PhoneticShape output, bool optional, VariableValues instantiatedVars) { // create segments from the LHS partition pattern and append them to the output foreach (PhoneticPatternNode node in lhs) { switch (node.Type) { case PhoneticPatternNode.NodeType.SIMP_CTXT: SimpleContext ctxt = node as SimpleContext; Segment newSeg = ctxt.UnapplyDeletion(instantiatedVars); newSeg.IsOptional = optional; output.Add(newSeg); break; case PhoneticPatternNode.NodeType.PATTERN: NestedPhoneticPattern nestedPattern = node as NestedPhoneticPattern; // untruncate nested partitions the maximum number of times it can occur, // marking any segments that occur after the minimum number of occurrences // as optional for (int j = 0; j < nestedPattern.MaxOccur; j++) Untruncate(nestedPattern.Pattern, output, j >= nestedPattern.MinOccur, instantiatedVars); break; case PhoneticPatternNode.NodeType.BDRY_CTXT: // skip boundaries break; } } }
/// <summary> /// Unapplies this transform to the specified partition in the specified match. /// </summary> /// <param name="match">The match.</param> /// <param name="partition">The partition.</param> /// <param name="output">The output.</param> public void Unapply(Match match, int partition, PhoneticShape output) { IList<PhoneticShapeNode> nodes = match.GetPartition(partition); if (nodes != null && nodes.Count > 0) { SimpleContext ctxt; if (!m_modifyFromCtxts.TryGetValue(partition, out ctxt)) ctxt = null; foreach (PhoneticShapeNode node in nodes) { switch (node.Type) { case PhoneticShapeNode.NodeType.SEGMENT: Segment newSeg = new Segment(node as Segment); // if there is a modify-from context on this partition, unapply it if (ctxt != null) ctxt.Unapply(newSeg, match.VariableValues); output.Add(newSeg); break; case PhoneticShapeNode.NodeType.BOUNDARY: output.Add(node.Clone()); break; } } } else { // untruncate a partition Untruncate(m_lhs[partition], output, false, match.VariableValues); } }
/// <summary> /// Initializes a new instance of the <see cref="InsertSegments"/> class. /// </summary> /// <param name="pshape">The phonetic shape.</param> public InsertSegments(PhoneticShape pshape) { m_pshape = pshape; }
bool UnapplyNarrow(PhoneticShape input) { List<Match> matches = new List<Match>(); PhoneticShapeNode node = input.First; Match match; // deletion subrules are always treated like simultaneous subrules during unapplication while (FindNextMatchRHS(node, Direction.RIGHT, out match)) { matches.Add(match); node = match.EntireMatch[0].Next; } foreach (Match m in matches) { PhoneticShapeNode cur = m.EntireMatch[m.EntireMatch.Count - 1]; foreach (PhoneticPatternNode lhsNode in m_rule.m_lhs) { if (lhsNode.Type != PhoneticPatternNode.NodeType.SIMP_CTXT) continue; SimpleContext ctxt = lhsNode as SimpleContext; Segment newSeg = ctxt.UnapplyDeletion(m.VariableValues); // mark the undeleted segment as optional newSeg.IsOptional = true; cur.Insert(newSeg, Direction.RIGHT); cur = newSeg; } if (m_analysisTarget.Count > 0) { foreach (PhoneticShapeNode matchNode in m.EntireMatch) matchNode.IsOptional = true; } } return matches.Count > 0; }
void ApplySimultaneous(PhoneticShape input, List<Subrule> subrules) { foreach (Subrule sr in subrules) { // first find all segments which match the LHS List<Match> matches = new List<Match>(); PhoneticShapeNode node = input.First; Match match; while (FindNextMatchLHS(node, Direction.RIGHT, out match)) { // check each candidate match against the subrule's environment IList<PhoneticShapeNode> nodes = match.EntireMatch; VariableValues instantiatedVars = match.VariableValues; if (m_lhs.Count == 0 ? sr.MatchEnvEmpty(nodes[0], Direction.RIGHT, ModeType.SYNTHESIS, instantiatedVars) : sr.MatchEnvNonempty(nodes, Direction.RIGHT, ModeType.SYNTHESIS, instantiatedVars)) { matches.Add(match); node = nodes[nodes.Count - 1].Next; } else { node = nodes[0].Next; } } // then apply changes foreach (Match m in matches) { sr.ApplyRHS(Direction.RIGHT, m.EntireMatch, m.VariableValues); } } }
/// <summary> /// Determines whether the specified word matches the specified phonetic shape. /// </summary> /// <param name="word">The word.</param> /// <param name="shape">The phonetic shape.</param> /// <returns> /// <c>true</c> if the word matches the shape, otherwise <c>false</c>. /// </returns> public virtual bool IsMatch(string word, PhoneticShape shape) { string pattern = ToRegexString(shape, ModeType.SYNTHESIS, false); return Regex.IsMatch(word, pattern, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); }
/// <summary> /// Initializes a new instance of the <see cref="RootTrace"/> class. /// </summary> /// <param name="inputWord">The input word.</param> /// <param name="inputShape">The input shape.</param> internal WordAnalysisTrace(string inputWord, PhoneticShape inputShape) { m_inputWord = inputWord; m_inputShape = inputShape; }
bool ProcessIterative(PhoneticShape input, Direction dir, PhoneticPattern ptemp, ModeType mode) { bool reordered = false; PhoneticShapeNode node = input.GetFirst(dir); Match match; // iterate thru each match while (FindNextMatch(node, dir, ptemp, mode, out match)) { // reorder the matching segments Reorder(dir, match); reordered = true; IList<PhoneticShapeNode> nodes = match.EntireMatch; node = nodes[nodes.Count - 1].GetNext(dir); } return reordered; }
void UnapplyRHS(Match match, out PhoneticShape headShape, out PhoneticShape nonHeadShape) { headShape = new PhoneticShape(); headShape.Add(new Margin(Direction.LEFT)); nonHeadShape = new PhoneticShape(); nonHeadShape.Add(new Margin(Direction.LEFT)); // iterate thru LHS partitions, copying the matching partition from the // input to the output for (int i = 0; i < m_transform.PartitionCount; i++) { PhoneticShape curShape = i < m_firstNonHeadPartition ? headShape : nonHeadShape; m_transform.Unapply(match, i, curShape); } headShape.Add(new Margin(Direction.RIGHT)); nonHeadShape.Add(new Margin(Direction.RIGHT)); }
/// <summary> /// Initializes a new instance of the <see cref="RootAllomorph"/> class. /// </summary> /// <param name="id">The id.</param> /// <param name="desc">The description.</param> /// <param name="morpher">The morpher.</param> /// <param name="shape">The shape.</param> public RootAllomorph(string id, string desc, Morpher morpher, PhoneticShape shape) : base (id, desc, morpher) { m_shape = shape; }