public void Add(Uri baseUri, QuoteCollectorRules rules) { // checking preconditions if (baseUri == null || rules == null) { throw new ArgumentNullException(); } _dataCollection.Add(new CultureData(rules.Culture, baseUri, rules)); }
public void Add(QuoteCollectorRules rules) { // checking preconditions if (rules == null) { throw new ArgumentNullException(); } Add(new Uri(String.Format(WikiquoteBaseUrl, rules.Culture.TwoLetterISOLanguageName)), rules); }
private static IEnumerable <TopicChoice> GetTopicChoices(string contents, QuoteCollectorRules rules) { var returnValue = from Match match in rules.DisambiguationTemplateRegex.Matches(contents) where match.Groups[TopicIdRegexGroup].Success let topicId = HttpUtility.HtmlDecode(match.Groups[TopicIdRegexGroup].Value) let topicName = match.Groups[TopicNameRegexGroup].Success ? HttpUtility.HtmlDecode(match.Groups[TopicNameRegexGroup].Value) : topicId let topicDescription = match.Groups[TopicDescriptionRegexGroup].Success ? HttpUtility.HtmlDecode(ReplaceMiscSequences(match.Groups[TopicDescriptionRegexGroup].Value)) : null where !topicId.StartsWith(WikipediaLinkPrefix) select new TopicChoice(topicId, topicName, topicDescription); return(returnValue); }
private SelectableQuoteCollection ParsePageContentsAndExtractQuotes(string topic, CultureInfo culture, string contents, SelectableQuoteCollection oldQuotes) { QuoteCollectorRules rules = _cultureMapper[culture].Rules as QuoteCollectorRules; if (rules.DisambiguationTemplateIdentifiers != null && rules.DisambiguationTemplateRegex != null) { for (int i = 0; i < rules.DisambiguationTemplateIdentifiers.Length; i++) { if (contents.Contains(rules.DisambiguationTemplateIdentifiers[i])) { throw new TopicAmbiguousException(topic, GetTopicChoices(contents, rules)); } } } // removing sections that do not contain quotes (only if required, i.e. quotes have to be determined heuristically) if (rules.WikiSectionsToSkipRegex != null) { contents = rules.WikiSectionsToSkipRegex.Replace(contents, String.Empty); } // replacing xhtml tags by their contents contents = ReplaceXhtmlTags(contents); // various string replacements contents = HttpUtility.HtmlDecode(ReplaceMiscSequences(contents)); // extracting quotes MatchCollection quoteMatches = rules.QuoteRegex.Matches(contents); var quotes = from Match match in quoteMatches where match.Groups[QuoteRegexGroup].Success let quoteHelper = match.Groups[QuoteRegexGroup].Value let quote = (rules.NewlineRegex != null) ? rules.NewlineRegex.Replace(quoteHelper, "\n") : quoteHelper let additionalInformation = match.Groups[AdditionalInformationRegexGroup].Success ? rules.NewlineRegex.Replace(match.Groups[AdditionalInformationRegexGroup].Value, " ") : String.Empty select GetQuote(oldQuotes, quote.TrimEnd(), additionalInformation.TrimEnd()); return(quotes.ToSelectableQuoteCollection(culture)); }
public CultureData(CultureInfo cultureInfo, Uri uri, QuoteCollectorRules rules) { _cultureInfo = cultureInfo; _baseUri = uri; _rules = rules; }