public void TokenizeTheQuestion(Question question) { var timezoneRegex = new StringBuilder(); timezoneRegex.Append("("); var isFirstIteration = true; foreach (var timezone in Timezones.GetAllTimezones()) { if (!isFirstIteration) timezoneRegex.Append("|"); if(timezone.TokenizeOnAbbreviation) timezoneRegex.Append(String.Format("{0}|{1}", timezone.Name, timezone.Abbreviation)); else timezoneRegex.Append(timezone.Name); isFirstIteration = false; } timezoneRegex.Append(")"); var matches = Regex.Matches(question.QuestionText, @"(^|\s)" + timezoneRegex.ToString() + @"(\s|$|,)", RegexOptions.IgnoreCase); foreach (Match match in matches) { Group group = match.Groups[0]; var token = new TimezoneToken(group.Value, group.Index); question.AddToken(token); } }
public void TokenizeTheQuestion(Question question) { var matches = Regex.Matches(question.QuestionText, @"(^|\s)(in|at)\s(the\s)?", RegexOptions.IgnoreCase); foreach (Match match in matches) { Group group = match.Groups[0]; var startPosition = (group.Index + group.Length ); var cityName = question.QuestionText.Substring(startPosition); var endPosition = question.QuestionText.Length; var possibleEarlierTerminations = Regex.Matches(cityName, @"(^|\s)(when|what('?s?)|\d|(on|in|at|a|right|now|if|then|\?)(\s|$))", RegexOptions.IgnoreCase); if(possibleEarlierTerminations.Count > 0) { Group terminationGroup = GetEarlierOccurrenceOfGroup(possibleEarlierTerminations); endPosition = startPosition + terminationGroup.Index; } cityName = question.QuestionText.Substring(startPosition, endPosition - startPosition); if(cityName.Replace(" ","").Length == 0) continue; var tokenResult = new CityToken(cityName, startPosition); question.AddToken(tokenResult); } }
public void TokenizeTheQuestion(Question question) { var matches = Regex.Matches(question.QuestionText, @"(^|\s)(date|time)(\s|$)", RegexOptions.IgnoreCase); foreach (Match match in matches) { Group group = match.Groups[0]; var token = new LiteralDateOrTimeToken(group.Value, group.Index); question.AddToken(token); } }
public void TokenizeTheQuestion(Question question) { var daylightSavingsTimeRegex = @"(day(\s)?light)\ssaving('s|s)?\s(time('s|s)?)?"; var dstRegex = @"dst(\stime)?\s"; var matches = Regex.Matches(question.QuestionText, string.Format(@"(^|\s)({0})|({1})", daylightSavingsTimeRegex, dstRegex), RegexOptions.IgnoreCase); foreach (Match match in matches) { Group group = match.Groups[0]; var token = new DaylightSavingsToken(group.Value, group.Index); question.AddToken(token); } }