public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.Admonition.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not an admonition"); } buffer.Add(match.Groups["text"].Value); reader.ReadLine(); while (reader.Line != null && !PatternMatcher.BlankCharacters.IsMatch(reader.Line)) { buffer.Add(reader.Line); reader.ReadLine(); } var admonition = new Admonition(match.Groups["style"].Value.ToEnum <AdmonitionStyle>()); admonition.Attributes.Add(attributes); ProcessParagraph(admonition, ref buffer); container.Add(admonition); attributes = null; reader.ReadLine(); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.ElementAttribute.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not a block attribute"); } var attributesValue = match.Groups["attributes"].Value.Trim(); if (attributes == null) { attributes = new AttributeList(); } if (attributesValue.IndexOf(",", StringComparison.Ordinal) == -1) { switch (attributesValue) { case "float": attributes.IsFloating = true; reader.ReadLine(); return; case "discrete": attributes.IsDiscrete = true; reader.ReadLine(); return; default: attributes.Add(ParseElementAttributesWithPosition(attributesValue, 0)); reader.ReadLine(); return; } } var inputs = SplitOnCharacterOutsideQuotes(attributesValue); if (inputs[0] == "quote" || inputs[0] == "verse") { for (var index = 0; index < inputs.Length; index++) { var i = inputs[index]; attributes.Add(new Attribute(i, true)); } reader.ReadLine(); return; } var attributeLists = inputs.Select(ParseElementAttributesWithPosition); attributes = attributeLists.Aggregate(attributes, (first, second) => first.Add(second)); reader.ReadLine(); }
public HashSet <string> Process(IDocumentReader reader) { var res = new HashSet <string>(); while (!reader.EndOfFile()) { var line = reader.ReadLine().ToLower(); if (line != string.Empty) { //split line into sentences var sent = textProcessor.GetSentences(line); foreach (var s in sent) { //tokenize var toks = textProcessor.Tokenize(s); foreach (var t in toks) { //add full word if (!res.Contains(t) && !stopwords.Exists(t)) { res.Add(t); } //add stemmed word var st = textProcessor.Stem(t); if (!res.Contains(st) && !stopwords.Exists(st)) { res.Add(st); } } } } } return(res); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.Title.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not a block title"); } var title = new Title(match.Groups["title"].Value); if (attributes != null) { attributes.Add(title); } else { attributes = new AttributeList { title } }; reader.ReadLine(); } }
public void Parse( Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.Anchor.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not an anchor"); } var id = match.Groups["id"].Value; var reference = !string.IsNullOrEmpty(match.Groups["reference"].Value) ? match.Groups["reference"].Value : null; var anchor = new Anchor(id, reference); if (attributes != null) { attributes.Add(anchor); } else { attributes = new AttributeList { anchor }; } reader.ReadLine(); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.Include.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not an include"); } var include = new Include(match.Groups["path"].Value); var attributesValue = match.Groups["attributes"].Value; if (!string.IsNullOrEmpty(attributesValue)) { var attributeValues = SplitOnCharacterOutsideQuotes(attributesValue); foreach (var attributeValue in attributeValues) { var attributeMatch = PatternMatcher.AttributeNameValue.Match(attributeValue); if (attributeMatch.Success) { switch (attributeMatch.Groups["name"].Value.ToLowerInvariant()) { case "leveloffset": if (int.TryParse(attributeMatch.Groups["value"].Value, out var offset)) { include.LevelOffset = offset; } break; case "lines": include.Lines = attributeMatch.Groups["value"].Value; break; case "tag": case "tags": include.Tags = attributeMatch.Groups["value"].Value; break; case "indent": if (int.TryParse(attributeMatch.Groups["value"].Value, out var indent)) { include.Indent = indent; } break; default: throw new NotImplementedException("TODO: add attribute to include attribute list"); } } } } container.Add(include); attributes = null; reader.ReadLine(); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var comment = new Comment(reader.Line.Substring(2)); container.Add(comment); reader.ReadLine(); }
public void Parse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var document = (Document)container; ParseDocumentTitle(document, reader, ref attributes); reader.ReadLine(); if (reader.Line == null) { return; } ParseAuthors(document, reader.Line); reader.ReadLine(); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var elementDelimiter = PatternMatcher.GetDelimiterRegexFor <TElement>(); var isDelimiter = elementDelimiter.IsMatch(reader.Line); if (isDelimiter) { AttributeList a = null; ProcessParagraph(container, ref buffer, ref a); reader.ReadLine(); while (reader.Line != null && !elementDelimiter.IsMatch(reader.Line)) { buffer.Add(reader.Line); reader.ReadLine(); } } else { buffer.Add(reader.Line); reader.ReadLine(); while (reader.Line != null && !PatternMatcher.BlankCharacters.IsMatch(reader.Line)) { buffer.Add(reader.Line); reader.ReadLine(); } } var element = new TElement { Text = string.Join(Environment.NewLine, buffer) }; element.Attributes.Add(attributes); container.Add(element); attributes = null; buffer = new List <string>(8); reader.ReadLine(); }
public HashSet <string> Process(IDocumentReader reader) { var res = new HashSet <string>(); while (!reader.EndOfFile()) { var word = reader.ReadLine(); if (!res.Contains(word)) { res.Add(word.ToLower()); } } return(res); }
public override void Parse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var delimiterRegex = PatternMatcher.GetDelimiterRegexFor <TElement>(); var element = new TElement(); element.Attributes.Add(attributes); if (delimiterRegex.IsMatch(reader.Line)) { ProcessParagraph(container, ref buffer); reader.ReadLine(); Parse(element, reader, delimiterRegex); } else { ProcessParagraph(element, ref buffer); Parse(element, reader, PatternMatcher.BlankCharacters); } container.Add(element); attributes = null; reader.ReadLine(); }
// TODO: based on html output, a section title should define a section block element into which all proceeding elements should be added, until the next section Title is hit public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.SectionTitle.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not a section title"); } var title = match.Groups["title"].Value; var inlineElements = ProcessInlineElements(title); var level = match.Groups["level"].Value.Length; var sectionTitle = new SectionTitle(inlineElements, level); sectionTitle.Attributes.Add(attributes); container.Add(sectionTitle); attributes = null; reader.ReadLine(); }
protected void DescendingParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { while (reader.Line != null) { if (predicate != null && predicate(reader.Line)) { ProcessParagraph(container, ref buffer, ref attributes); return; } var parsed = false; for (var index = 0; index < Parsers.Length; index++) { var parser = Parsers[index]; if (parser.IsMatch(reader, container, attributes)) { parser.Parse(container, reader, predicate, ref buffer, ref attributes); parsed = true; break; } } if (!parsed) { if (PatternMatcher.BlankCharacters.IsMatch(reader.Line)) { ProcessBuffer(container, ref buffer, ref attributes); } else { buffer.Add(reader.Line); } reader.ReadLine(); } } ProcessBuffer(container, ref buffer, ref attributes); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var attributeEntry = ParseAttributeEntry(reader.Line); var document = container as Document; if (document != null) { if (document.Count == 0) { document.Attributes.Add(attributeEntry); } else { document.Add(attributeEntry); } } else { container.Add(attributeEntry); } reader.ReadLine(); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.CheckListItem.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not a check list item"); } var level = match.Groups["level"].Value; var isChecked = !string.IsNullOrWhiteSpace(match.Groups["checked"].Value); var text = match.Groups["text"].Value; var listItem = new CheckListItem(level.Length, isChecked); listItem.Attributes.Add(attributes); buffer.Add(text); reader.ReadLine(); attributes = null; while (reader.Line != null && !PatternMatcher.ListItemContinuation.IsMatch(reader.Line) && !PatternMatcher.BlankCharacters.IsMatch(reader.Line) && !PatternMatcher.CheckListItem.IsMatch(reader.Line) && !PatternMatcher.ListItem.IsMatch(reader.Line) && (predicate == null || !predicate(reader.Line))) { if (PatternMatcher.ListItemContinuation.IsMatch(reader.Line)) { ProcessBuffer(listItem, ref buffer, ref attributes); reader.ReadLine(); DescendingParse( listItem, reader, line => PatternMatcher.BlankCharacters.IsMatch(line) || PatternMatcher.ListItem.IsMatch(line) || PatternMatcher.CheckListItem.IsMatch(reader.Line), ref buffer, ref attributes); } else { buffer.Add(reader.Line); reader.ReadLine(); } } ProcessBuffer(listItem, ref buffer, ref attributes); UnorderedList unorderedList; if (container.Count > 0) { unorderedList = container[container.Count - 1] as UnorderedList; if (unorderedList != null && unorderedList.Items.Count > 0 && unorderedList.Items[0].Level == listItem.Level) { unorderedList.Items.Add(listItem); } else { unorderedList = new UnorderedList { Items = { listItem } }; container.Add(unorderedList); } } else { unorderedList = new UnorderedList { Items = { listItem } }; container.Add(unorderedList); } attributes = null; }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.LabeledListItem.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not a labeled list item"); } var label = match.Groups["label"].Value; var level = match.Groups["level"].Value.Length; // levels start at 0 if (level > 0) { level -= 2; } var labeledListItem = new LabeledListItem(label, level); labeledListItem.Attributes.Add(attributes); attributes = null; var text = match.Groups["text"].Value; // labeled lists are lenient with whitespace so can have whitespace after the label // and before any content. if (!string.IsNullOrWhiteSpace(text)) { buffer.Add(text); reader.ReadLine(); } else { reader.ReadLine(); while (reader.Line != null && PatternMatcher.BlankCharacters.IsMatch(reader.Line)) { reader.ReadLine(); } } while (reader.Line != null && !PatternMatcher.BlankCharacters.IsMatch(reader.Line) && !PatternMatcher.LabeledListItem.IsMatch(reader.Line) && (predicate == null || !predicate(reader.Line))) { if (PatternMatcher.ListItemContinuation.IsMatch(reader.Line)) { ProcessBuffer(labeledListItem, ref buffer, ref attributes); reader.ReadLine(); DescendingParse( labeledListItem, reader, line => PatternMatcher.BlankCharacters.IsMatch(line) || PatternMatcher.LabeledListItem.IsMatch(line), ref buffer, ref attributes); } else { buffer.Add(reader.Line); reader.ReadLine(); } } ProcessBuffer(labeledListItem, ref buffer, ref attributes); LabeledList labeledList; if (container.Count > 0) { labeledList = container[container.Count - 1] as LabeledList; if (labeledList != null && labeledList.Items.Count > 0 && labeledList.Items[0].Level == labeledListItem.Level) { labeledList.Items.Add(labeledListItem); } else { labeledList = new LabeledList { Items = { labeledListItem } }; container.Add(labeledList); } } else { labeledList = new LabeledList { Items = { labeledListItem } }; container.Add(labeledList); } attributes = null; }
public override void Parse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { reader.ReadLine(); DescendingParse(container, reader, predicate, ref buffer, ref attributes); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.Media.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not a media"); } var path = match.Groups["path"].Value; Media media; switch (match.Groups["media"].Value.ToLowerInvariant()) { case "image": media = new Image(path); break; case "video": media = new Video(path); break; case "audio": media = new Audio(path); break; default: throw new ArgumentException("unrecognized media type"); } media.Attributes.Add(attributes); var attributesValue = match.Groups["attributes"].Value; int?width = null; int?height = null; if (!string.IsNullOrEmpty(attributesValue)) { var attributeValues = SplitOnCharacterOutsideQuotes(attributesValue); for (int index = 0; index < attributeValues.Length; index++) { var attributeValue = attributeValues[index]; int dimension; if (index == 0) { media.AlternateText = attributeValue; } else if (index == 1 && int.TryParse(attributeValue, out dimension)) { width = dimension; } else if (index == 2 && int.TryParse(attributeValue, out dimension)) { height = dimension; } else { var attributeMatch = PatternMatcher.AttributeNameValue.Match(attributeValue); if (attributeMatch.Success) { switch (attributeMatch.Groups["name"].Value.ToLowerInvariant()) { case "link": media.Link = attributeMatch.Groups["value"].Value; break; case "title": media.Title = attributeMatch.Groups["value"].Value; break; case "float": media.Float = attributeMatch.Groups["value"].Value; break; case "align": media.Align = attributeMatch.Groups["value"].Value; break; case "role": media.Role = attributeMatch.Groups["value"].Value; break; default: throw new NotImplementedException("TODO: add attribute to media attribute list"); } } } } } if (width.HasValue && height.HasValue) { media.SetWidthAndHeight(width.Value, height.Value); } container.Add(media); attributes = null; reader.ReadLine(); }
public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes) { var match = PatternMatcher.OrderedListItem.Match(reader.Line); if (!match.Success) { throw new ArgumentException("not an ordered list item"); } var level = match.Groups["level"].Value; var orderedListItem = new OrderedListItem(level.Length); orderedListItem.Attributes.Add(attributes); var number = match.Groups["number"].Value; var upperAlpha = match.Groups["upperalpha"].Value; var lowerAlpha = match.Groups["loweralpha"].Value; var upperRoman = match.Groups["upperRoman"].Value; var lowerRoman = match.Groups["lowerRoman"].Value; if (!string.IsNullOrEmpty(number)) { orderedListItem.Number = int.Parse(number); } else if (!string.IsNullOrEmpty(upperAlpha)) { orderedListItem.Numbering = NumberStyle.UpperAlpha; orderedListItem.Number = Array.IndexOf(Patterns.UpperAlphabet, upperAlpha) + 1; } else if (!string.IsNullOrEmpty(lowerAlpha)) { orderedListItem.Numbering = NumberStyle.LowerAlpha; orderedListItem.Number = Array.IndexOf(Patterns.LowerAlphabet, lowerAlpha) + 1; } else if (!string.IsNullOrEmpty(upperRoman)) { orderedListItem.Numbering = NumberStyle.UpperRoman; orderedListItem.Number = RomanNumerals.ToInt(upperRoman); } else if (!string.IsNullOrEmpty(lowerRoman)) { orderedListItem.Numbering = NumberStyle.LowerRoman; orderedListItem.Number = RomanNumerals.ToInt(lowerRoman); } var text = match.Groups["text"].Value; buffer.Add(text); reader.ReadLine(); attributes = null; while (reader.Line != null && !PatternMatcher.BlankCharacters.IsMatch(reader.Line) && !PatternMatcher.OrderedListItem.IsMatch(reader.Line) && (predicate == null || !predicate(reader.Line))) { if (PatternMatcher.ListItemContinuation.IsMatch(reader.Line)) { ProcessBuffer(orderedListItem, ref buffer, ref attributes); reader.ReadLine(); DescendingParse( orderedListItem, reader, line => PatternMatcher.BlankCharacters.IsMatch(line) || PatternMatcher.OrderedListItem.IsMatch(line), ref buffer, ref attributes); } else { buffer.Add(reader.Line); reader.ReadLine(); } } ProcessBuffer(orderedListItem, ref buffer, ref attributes); OrderedList orderedList; if (container.Count > 0) { orderedList = container[container.Count - 1] as OrderedList; if (orderedList != null && orderedList.Items.Count > 0 && orderedList.Items[0].Level == orderedListItem.Level) { orderedList.Items.Add(orderedListItem); } else { orderedList = new OrderedList { Items = { orderedListItem } }; container.Add(orderedList); } } else { orderedList = new OrderedList { Items = { orderedListItem } }; container.Add(orderedList); } attributes = null; }