示例#1
0
		/// <summary>
		/// Initializes a new instance of the <see cref="WordAnalysis"/> class.
		/// </summary>
		/// <param name="shape">The shape.</param>
		/// <param name="stratum"></param>
		internal WordAnalysis(PhoneticShape shape, Stratum stratum)
		{
			m_shape = shape;
			m_pos = new HCObjectSet<PartOfSpeech>();
			m_mrules = new List<MorphologicalRule>();
			m_mrulesUnapplied = new Dictionary<MorphologicalRule, int>();
			m_rzFeatures = new FeatureValues();
			m_stratum = stratum;
		}
示例#2
0
		/// <summary>
		/// Copy constructor.
		/// </summary>
		/// <param name="wa">The word analysis.</param>
		public WordAnalysis(WordAnalysis wa)
		{
			m_shape = wa.m_shape.Clone();
			m_pos = new HCObjectSet<PartOfSpeech>(wa.m_pos);
			m_rootAllomorph = wa.m_rootAllomorph;
			if (wa.m_nonHead != null)
				m_nonHead = wa.m_nonHead.Clone();
			m_mrules = new List<MorphologicalRule>(wa.m_mrules);
			m_mrulesUnapplied = new Dictionary<MorphologicalRule, int>(wa.m_mrulesUnapplied);
			m_rzFeatures = wa.m_rzFeatures.Clone();
			m_curTrace = wa.m_curTrace;
			m_stratum = wa.m_stratum;
		}
			bool UnapplyIterative(PhoneticShape input, Direction dir)
			{
				bool unapplied = false;
				PhoneticShapeNode node = input.GetFirst(dir);
				Match match;
				// iterate thru all matches
				while (FindNextMatchRHS(node, dir, out match))
				{
					// unapply the subrule
					IList<PhoneticShapeNode> nodes = match.EntireMatch;
					UnapplyRHS(dir, nodes, match.VariableValues);
					unapplied = true;
					node = nodes[nodes.Count - 1].GetNext(dir);
				}

				return unapplied;
			}
			/// <summary>
			/// Unapplies this subrule to specified input phonetic shape.
			/// </summary>
			/// <param name="input">The input phonetic shape.</param>
			public void Unapply(PhoneticShape input)
			{
				if (Type == ChangeType.NARROW)
				{
					int i = 0;
					// because deletion rules are self-opaquing it is unclear how many segments
					// could have been deleted during synthesis, so we unapply deletion rules
					// multiple times. Unfortunately, this could create a situation where the
					// deletion rule is unapplied infinitely, so we put an upper limit on the
					// number of times a deletion rule can unapply.
					while (i <= m_rule.Morpher.DelReapplications && UnapplyNarrow(input))
						i++;
				}
				else
				{
					Direction dir = Direction.LEFT;
					switch (m_rule.m_multApplication)
					{
						case MultAppOrder.LR_ITERATIVE:
						case MultAppOrder.SIMULTANEOUS:
							// simultaneous subrules could be unapplied left-to-right or
							// right-to-left, we arbitrarily choose left-to-right
							dir = Direction.LEFT;
							break;

						case MultAppOrder.RL_ITERATIVE:
							dir = Direction.RIGHT;
							break;
					}

					// only simultaneous subrules can be self-opaquing
					if (IsSelfOpaquing)
						// unapply the subrule until it no longer makes a change
						while (UnapplyIterative(input, dir)) { }
					else
						UnapplyIterative(input, dir);
				}
			}
			PhoneticShape UnapplyRHS(Match match)
			{
				PhoneticShape output = new PhoneticShape();
				output.Add(new Margin(Direction.LEFT));
				// iterate thru LHS partitions, copying the matching partition from the
				// input to the output
				for (int i = 0; i < m_transform.PartitionCount; i++)
					m_transform.Unapply(match, i, output);
				output.Add(new Margin(Direction.RIGHT));
				return output;
			}
		/// <summary>
		/// Determines whether the specified word matches the specified phonetic shape.
		/// All unused IPA modifiers in the word are ignored when attempting to match
		/// the phonetic shape.
		/// </summary>
		/// <param name="word">The word.</param>
		/// <param name="shape">The phonetic shape.</param>
		/// <returns>
		/// 	<c>true</c> if the word matches the shape, otherwise <c>false</c>.
		/// </returns>
		public override bool IsMatch(string word, PhoneticShape shape)
		{
			string tword;
			if (!StripUnusedChars(word, out tword))
				return false;

			return base.IsMatch(tword, shape);
		}
		/// <summary>
		/// Converts the specified string to a phonetic shape. It matches the longest possible segment
		/// first.
		/// </summary>
		/// <param name="str">The string.</param>
		/// <param name="mode">The mode.</param>
		/// <returns>The phonetic shape, <c>null</c> if the string contains invalid segments.</returns>
		public PhoneticShape ToPhoneticShape(string str, ModeType mode)
		{
			PhoneticShape ps = new PhoneticShape();
			int i = 0;
			ps.Add(new Margin(Direction.LEFT));
			while (i < str.Length)
			{
				bool match = false;
				for (int j = str.Length - i; j > 0; j--)
				{
					string s = str.Substring(i, j);
					PhoneticShapeNode node = GetPhoneticShapeNode(s, mode);
					if (node != null)
					{
						try
						{
							ps.Add(node);
						}
						catch (InvalidOperationException)
						{
							return null;
						}
						i += j;
						match = true;
						break;
					}
				}

				if (!match)
				{
					string sPhonemesFoundSoFar = ToRegexString(ps, ModeType.ANALYSIS, true);
					var missing = new MissingPhoneticShapeException(sPhonemesFoundSoFar, i);
					throw missing;
				}
			}
			ps.Add(new Margin(Direction.RIGHT));

			return ps;
		}
示例#8
0
		/// <summary>
		/// Initializes a new instance of the <see cref="LexLookupTrace"/> class.
		/// </summary>
		/// <param name="stratum">The stratum.</param>
		/// <param name="shape">The shape.</param>
		internal LexLookupTrace(Stratum stratum, PhoneticShape shape)
		{
			m_stratum = stratum;
			m_shape = shape;
		}
		void ApplyIterative(PhoneticShape input, Direction dir, List<Subrule> subrules)
		{
			Match match;
			PhoneticShapeNode node = input.GetFirst(dir);
			// iterate thru each LHS match
			while (FindNextMatchLHS(node, dir, out match))
			{
				IList<PhoneticShapeNode> nodes = match.EntireMatch;
				VariableValues instantiatedVars = match.VariableValues;
				bool matched = false;
				// check each subrule's environment
				foreach (Subrule sr in subrules)
				{
					if (m_lhs.Count == 0
						? sr.MatchEnvEmpty(nodes[0], dir, ModeType.SYNTHESIS, instantiatedVars)
						: sr.MatchEnvNonempty(nodes, dir, ModeType.SYNTHESIS, instantiatedVars))
					{
						sr.ApplyRHS(dir, nodes, instantiatedVars);
						matched = true;
						break;
					}
				}

				if (matched)
					node = nodes[nodes.Count - 1].GetNext(dir);
				else
					node = nodes[0].GetNext(dir);
			}
		}
		/// <summary>
		/// Generates a string representation of the specified phonetic shape.
		/// </summary>
		/// <param name="shape">The phonetic shape.</param>
		/// <param name="mode">The mode.</param>
		/// <param name="includeBdry">if <c>true</c> boundary markers will be included in the
		/// string representation.</param>
		/// <returns>The string representation.</returns>
		public string ToString(PhoneticShape shape, ModeType mode, bool includeBdry)
		{
			StringBuilder sb = new StringBuilder();
			foreach (PhoneticShapeNode node in shape)
			{
				switch (node.Type)
				{
					case PhoneticShapeNode.NodeType.SEGMENT:
						Segment seg = node as Segment;
						IList<SegmentDefinition> segDefs = GetMatchingSegmentDefinitions(seg, mode);
						if (segDefs.Count > 0)
							sb.Append(segDefs[0].StrRep);
						break;

					case PhoneticShapeNode.NodeType.BOUNDARY:
						if (includeBdry)
						{
							Boundary bdry = node as Boundary;
							sb.Append(bdry.BoundaryDefinition.StrRep);
						}
						break;
				}
			}
			return sb.ToString();
		}
		/// <summary>
		/// Converts the specified phonetic shape to a valid regular expression string. Regular expressions
		/// formatted for display purposes are NOT guaranteed to compile.
		/// </summary>
		/// <param name="shape">The phonetic shape.</param>
		/// <param name="mode">The mode.</param>
		/// <param name="displayFormat">if <c>true</c> the result will be formatted for display, otherwise
		/// it will be formatted for compilation.</param>
		/// <returns>The regular expression string.</returns>
		public string ToRegexString(PhoneticShape shape, ModeType mode, bool displayFormat)
		{
			StringBuilder sb = new StringBuilder();
			foreach (PhoneticShapeNode node in shape)
			{
				switch (node.Type)
				{
					case PhoneticShapeNode.NodeType.SEGMENT:
						Segment seg = node as Segment;
						IList<SegmentDefinition> segDefs = GetMatchingSegmentDefinitions(seg, mode);
						if (segDefs.Count > 0)
						{
							if (segDefs.Count > 1)
								sb.Append(displayFormat ? "[" : "(");
							for (int i = 0; i < segDefs.Count; i++)
							{
								if (segDefs[i].StrRep.Length > 1)
									sb.Append("(");

								if (displayFormat)
									sb.Append(segDefs[i].StrRep);
								else
									sb.Append(Regex.Escape(segDefs[i].StrRep));

								if (segDefs[i].StrRep.Length > 1)
									sb.Append(")");
								if (i < segDefs.Count - 1 && !displayFormat)
									sb.Append("|");
							}
							if (segDefs.Count > 1)
								sb.Append(displayFormat ? "]" : ")");

							if (seg.IsOptional)
								sb.Append("?");
						}
						break;

					case PhoneticShapeNode.NodeType.BOUNDARY:
						Boundary bdry = node as Boundary;
						if (bdry.BoundaryDefinition.StrRep.Length > 1)
							sb.Append("(");

						if (displayFormat)
							sb.Append(bdry.BoundaryDefinition.StrRep);
						else
							sb.Append(Regex.Escape(bdry.BoundaryDefinition.StrRep));

						if (bdry.BoundaryDefinition.StrRep.Length > 1)
							sb.Append(")");
						sb.Append("?");
						break;

					case PhoneticShapeNode.NodeType.MARGIN:
						if (!displayFormat)
						{
							Margin margin = node as Margin;
							sb.Append(margin.MarginType == Direction.LEFT ? "^" : "$");
						}
						break;
				}
			}
			return sb.ToString();
		}
		/// <summary>
		/// Converts the specified string to a phonetic shape. It matches the longest possible segment
		/// first.
		/// </summary>
		/// <param name="str">The string.</param>
		/// <param name="mode">The mode.</param>
		/// <returns>The phonetic shape, <c>null</c> if the string contains invalid segments.</returns>
		public PhoneticShape ToPhoneticShape(string str, ModeType mode)
		{
			PhoneticShape ps = new PhoneticShape();
			int i = 0;
			ps.Add(new Margin(Direction.LEFT));
			while (i < str.Length)
			{
				bool match = false;
				for (int j = str.Length - i; j > 0; j--)
				{
					string s = str.Substring(i, j);
					PhoneticShapeNode node = GetPhoneticShapeNode(s, mode);
					if (node != null)
					{
						try
						{
							ps.Add(node);
						}
						catch (InvalidOperationException)
						{
							return null;
						}
						i += j;
						match = true;
						break;
					}
				}

				if (!match)
					return null;
			}
			ps.Add(new Margin(Direction.RIGHT));

			return ps;
		}
示例#13
0
		void Untruncate(PhoneticPattern lhs, PhoneticShape output, bool optional, VariableValues instantiatedVars)
		{
			// create segments from the LHS partition pattern and append them to the output
			foreach (PhoneticPatternNode node in lhs)
			{
				switch (node.Type)
				{
					case PhoneticPatternNode.NodeType.SIMP_CTXT:
						SimpleContext ctxt = node as SimpleContext;
						Segment newSeg = ctxt.UnapplyDeletion(instantiatedVars);
						newSeg.IsOptional = optional;
						output.Add(newSeg);
						break;

					case PhoneticPatternNode.NodeType.PATTERN:
						NestedPhoneticPattern nestedPattern = node as NestedPhoneticPattern;
						// untruncate nested partitions the maximum number of times it can occur,
						// marking any segments that occur after the minimum number of occurrences
						// as optional
						for (int j = 0; j < nestedPattern.MaxOccur; j++)
							Untruncate(nestedPattern.Pattern, output, j >= nestedPattern.MinOccur, instantiatedVars);
						break;

					case PhoneticPatternNode.NodeType.BDRY_CTXT:
						// skip boundaries
						break;
				}
			}
		}
示例#14
0
		/// <summary>
		/// Unapplies this transform to the specified partition in the specified match.
		/// </summary>
		/// <param name="match">The match.</param>
		/// <param name="partition">The partition.</param>
		/// <param name="output">The output.</param>
		public void Unapply(Match match, int partition, PhoneticShape output)
		{
			IList<PhoneticShapeNode> nodes = match.GetPartition(partition);
			if (nodes != null && nodes.Count > 0)
			{
				SimpleContext ctxt;
				if (!m_modifyFromCtxts.TryGetValue(partition, out ctxt))
					ctxt = null;

				foreach (PhoneticShapeNode node in nodes)
				{
					switch (node.Type)
					{
						case PhoneticShapeNode.NodeType.SEGMENT:
							Segment newSeg = new Segment(node as Segment);
							// if there is a modify-from context on this partition, unapply it
							if (ctxt != null)
								ctxt.Unapply(newSeg, match.VariableValues);
							output.Add(newSeg);
							break;

						case PhoneticShapeNode.NodeType.BOUNDARY:
							output.Add(node.Clone());
							break;
					}
				}
			}
			else
			{
				// untruncate a partition
				Untruncate(m_lhs[partition], output, false, match.VariableValues);
			}
		}
示例#15
0
		/// <summary>
		/// Initializes a new instance of the <see cref="InsertSegments"/> class.
		/// </summary>
		/// <param name="pshape">The phonetic shape.</param>
		public InsertSegments(PhoneticShape pshape)
		{
			m_pshape = pshape;
		}
			bool UnapplyNarrow(PhoneticShape input)
			{
				List<Match> matches = new List<Match>();
				PhoneticShapeNode node = input.First;
				Match match;
				// deletion subrules are always treated like simultaneous subrules during unapplication
				while (FindNextMatchRHS(node, Direction.RIGHT, out match))
				{
					matches.Add(match);
					node = match.EntireMatch[0].Next;
				}

				foreach (Match m in matches)
				{
					PhoneticShapeNode cur = m.EntireMatch[m.EntireMatch.Count - 1];
					foreach (PhoneticPatternNode lhsNode in m_rule.m_lhs)
					{
						if (lhsNode.Type != PhoneticPatternNode.NodeType.SIMP_CTXT)
							continue;

						SimpleContext ctxt = lhsNode as SimpleContext;
						Segment newSeg = ctxt.UnapplyDeletion(m.VariableValues);
						// mark the undeleted segment as optional
						newSeg.IsOptional = true;
						cur.Insert(newSeg, Direction.RIGHT);
						cur = newSeg;
					}

					if (m_analysisTarget.Count > 0)
					{
						foreach (PhoneticShapeNode matchNode in m.EntireMatch)
							matchNode.IsOptional = true;
					}
				}

				return matches.Count > 0;
			}
		void ApplySimultaneous(PhoneticShape input, List<Subrule> subrules)
		{
			foreach (Subrule sr in subrules)
			{
				// first find all segments which match the LHS
				List<Match> matches = new List<Match>();
				PhoneticShapeNode node = input.First;
				Match match;
				while (FindNextMatchLHS(node, Direction.RIGHT, out match))
				{
					// check each candidate match against the subrule's environment
					IList<PhoneticShapeNode> nodes = match.EntireMatch;
					VariableValues instantiatedVars = match.VariableValues;
					if (m_lhs.Count == 0
						? sr.MatchEnvEmpty(nodes[0], Direction.RIGHT, ModeType.SYNTHESIS, instantiatedVars)
						: sr.MatchEnvNonempty(nodes, Direction.RIGHT, ModeType.SYNTHESIS, instantiatedVars))
					{
						matches.Add(match);
						node = nodes[nodes.Count - 1].Next;
					}
					else
					{
						node = nodes[0].Next;
					}
				}

				// then apply changes
				foreach (Match m in matches)
				{
					sr.ApplyRHS(Direction.RIGHT, m.EntireMatch, m.VariableValues);
				}
			}
		}
		/// <summary>
		/// Determines whether the specified word matches the specified phonetic shape.
		/// </summary>
		/// <param name="word">The word.</param>
		/// <param name="shape">The phonetic shape.</param>
		/// <returns>
		/// 	<c>true</c> if the word matches the shape, otherwise <c>false</c>.
		/// </returns>
		public virtual bool IsMatch(string word, PhoneticShape shape)
		{
			string pattern = ToRegexString(shape, ModeType.SYNTHESIS, false);
			return Regex.IsMatch(word, pattern, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
		}
示例#19
0
		/// <summary>
		/// Initializes a new instance of the <see cref="RootTrace"/> class.
		/// </summary>
		/// <param name="inputWord">The input word.</param>
		/// <param name="inputShape">The input shape.</param>
		internal WordAnalysisTrace(string inputWord, PhoneticShape inputShape)
		{
			m_inputWord = inputWord;
			m_inputShape = inputShape;
		}
示例#20
0
		bool ProcessIterative(PhoneticShape input, Direction dir, PhoneticPattern ptemp, ModeType mode)
		{
			bool reordered = false;
			PhoneticShapeNode node = input.GetFirst(dir);
			Match match;
			// iterate thru each match
			while (FindNextMatch(node, dir, ptemp, mode, out match))
			{
				// reorder the matching segments
				Reorder(dir, match);
				reordered = true;
				IList<PhoneticShapeNode> nodes = match.EntireMatch;
				node = nodes[nodes.Count - 1].GetNext(dir);
			}

			return reordered;
		}
示例#21
0
			void UnapplyRHS(Match match, out PhoneticShape headShape, out PhoneticShape nonHeadShape)
			{
				headShape = new PhoneticShape();
				headShape.Add(new Margin(Direction.LEFT));
				nonHeadShape = new PhoneticShape();
				nonHeadShape.Add(new Margin(Direction.LEFT));
				// iterate thru LHS partitions, copying the matching partition from the
				// input to the output
				for (int i = 0; i < m_transform.PartitionCount; i++)
				{
					PhoneticShape curShape = i < m_firstNonHeadPartition ? headShape : nonHeadShape;
					m_transform.Unapply(match, i, curShape);
				}
				headShape.Add(new Margin(Direction.RIGHT));
				nonHeadShape.Add(new Margin(Direction.RIGHT));
			}
示例#22
0
			/// <summary>
			/// Initializes a new instance of the <see cref="RootAllomorph"/> class.
			/// </summary>
			/// <param name="id">The id.</param>
			/// <param name="desc">The description.</param>
			/// <param name="morpher">The morpher.</param>
			/// <param name="shape">The shape.</param>
			public RootAllomorph(string id, string desc, Morpher morpher, PhoneticShape shape)
				: base (id, desc, morpher)
			{
				m_shape = shape;
			}