protected override Core.Tokenization.Token CreateToken(string s, System.Text.RegularExpressions.GroupCollection groups) { string unit = groups["unit"].Value; string num = groups["num"].Value; string sep = groups["sep"].Value; string decimalPart = groups["sdec"].Value; if (String.IsNullOrEmpty(decimalPart)) { decimalPart = groups["gdec"].Value; } char separator = (sep.Length > 0) ? sep[0] : '\0'; Core.Tokenization.MeasureToken value; Core.Tokenization.Unit u = Core.Tokenization.PhysicalUnit.Find(unit, _Culture); value = new Core.Tokenization.MeasureToken(s, groups["sign"].Value, decimalPart, groups["frac"].Value, u, unit, separator, _Culture.NumberFormat); return(value); }
// NOTE parsing does not yet support canonical number FSTs since the culture // is not inspected for primary/secondary separators. The only source for // determination whether primary or alternate separators are used is the FST // output. This, however, will always be the alternate separator indicator for // canonical recognizers. private Core.Tokenization.MeasureToken Parse(string surface, string output) { System.Diagnostics.Debug.Assert(surface != null && output != null && surface.Length == output.Length); int sep = output.IndexOf('U'); if (sep <= 0) { throw new Exception("Invalid measurement format"); } string numericSurface = surface.Substring(0, sep); string numericOutput = output.Substring(0, sep); char unitSeparator = '\0'; while (sep < surface.Length && Char.IsWhiteSpace(surface[sep])) { if (unitSeparator == '\0') { unitSeparator = surface[sep]; } ++sep; } string unitPart = surface.Substring(sep); NumberToken nt = NumberFSTRecognizer.ParseNumber(numericSurface, numericOutput); Core.Tokenization.Unit u = Core.Tokenization.PhysicalUnit.Find(unitPart, _FSTRecognizer.Culture); MeasureToken value = new Core.Tokenization.MeasureToken(surface, nt, u, unitPart, unitSeparator); return(value); }