Example #1
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.Admonition.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not an admonition");
            }

            buffer.Add(match.Groups["text"].Value);
            reader.ReadLine();
            while (reader.Line != null && !PatternMatcher.BlankCharacters.IsMatch(reader.Line))
            {
                buffer.Add(reader.Line);
                reader.ReadLine();
            }

            var admonition = new Admonition(match.Groups["style"].Value.ToEnum <AdmonitionStyle>());

            admonition.Attributes.Add(attributes);
            ProcessParagraph(admonition, ref buffer);
            container.Add(admonition);
            attributes = null;

            reader.ReadLine();
        }
Example #2
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.ElementAttribute.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not a block attribute");
            }

            var attributesValue = match.Groups["attributes"].Value.Trim();

            if (attributes == null)
            {
                attributes = new AttributeList();
            }

            if (attributesValue.IndexOf(",", StringComparison.Ordinal) == -1)
            {
                switch (attributesValue)
                {
                case "float":
                    attributes.IsFloating = true;
                    reader.ReadLine();
                    return;

                case "discrete":
                    attributes.IsDiscrete = true;
                    reader.ReadLine();
                    return;

                default:
                    attributes.Add(ParseElementAttributesWithPosition(attributesValue, 0));
                    reader.ReadLine();
                    return;
                }
            }

            var inputs = SplitOnCharacterOutsideQuotes(attributesValue);

            if (inputs[0] == "quote" || inputs[0] == "verse")
            {
                for (var index = 0; index < inputs.Length; index++)
                {
                    var i = inputs[index];
                    attributes.Add(new Attribute(i, true));
                }
                reader.ReadLine();
                return;
            }

            var attributeLists = inputs.Select(ParseElementAttributesWithPosition);

            attributes = attributeLists.Aggregate(attributes, (first, second) => first.Add(second));

            reader.ReadLine();
        }
Example #3
0
        public HashSet <string> Process(IDocumentReader reader)
        {
            var res = new HashSet <string>();

            while (!reader.EndOfFile())
            {
                var line = reader.ReadLine().ToLower();
                if (line != string.Empty)
                {
                    //split line into sentences
                    var sent = textProcessor.GetSentences(line);
                    foreach (var s in sent)
                    {
                        //tokenize
                        var toks = textProcessor.Tokenize(s);
                        foreach (var t in toks)
                        {
                            //add full word
                            if (!res.Contains(t) && !stopwords.Exists(t))
                            {
                                res.Add(t);
                            }
                            //add stemmed word
                            var st = textProcessor.Stem(t);
                            if (!res.Contains(st) && !stopwords.Exists(st))
                            {
                                res.Add(st);
                            }
                        }
                    }
                }
            }
            return(res);
        }
Example #4
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.Title.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not a block title");
            }

            var title = new Title(match.Groups["title"].Value);

            if (attributes != null)
            {
                attributes.Add(title);
            }
            else
            {
                attributes = new AttributeList {
                    title
                }
            };

            reader.ReadLine();
        }
    }
Example #5
0
        public void Parse(
            Container container,
            IDocumentReader reader,
            Func <string, bool> predicate,
            ref List <string> buffer,
            ref AttributeList attributes)
        {
            var match = PatternMatcher.Anchor.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not an anchor");
            }

            var id = match.Groups["id"].Value;

            var reference = !string.IsNullOrEmpty(match.Groups["reference"].Value)
                ? match.Groups["reference"].Value
                : null;

            var anchor = new Anchor(id, reference);

            if (attributes != null)
            {
                attributes.Add(anchor);
            }
            else
            {
                attributes = new AttributeList {
                    anchor
                };
            }

            reader.ReadLine();
        }
Example #6
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.Include.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not an include");
            }

            var include         = new Include(match.Groups["path"].Value);
            var attributesValue = match.Groups["attributes"].Value;

            if (!string.IsNullOrEmpty(attributesValue))
            {
                var attributeValues = SplitOnCharacterOutsideQuotes(attributesValue);
                foreach (var attributeValue in attributeValues)
                {
                    var attributeMatch = PatternMatcher.AttributeNameValue.Match(attributeValue);
                    if (attributeMatch.Success)
                    {
                        switch (attributeMatch.Groups["name"].Value.ToLowerInvariant())
                        {
                        case "leveloffset":
                            if (int.TryParse(attributeMatch.Groups["value"].Value, out var offset))
                            {
                                include.LevelOffset = offset;
                            }
                            break;

                        case "lines":
                            include.Lines = attributeMatch.Groups["value"].Value;
                            break;

                        case "tag":
                        case "tags":
                            include.Tags = attributeMatch.Groups["value"].Value;
                            break;

                        case "indent":
                            if (int.TryParse(attributeMatch.Groups["value"].Value, out var indent))
                            {
                                include.Indent = indent;
                            }
                            break;

                        default:
                            throw new NotImplementedException("TODO: add attribute to include attribute list");
                        }
                    }
                }
            }

            container.Add(include);
            attributes = null;

            reader.ReadLine();
        }
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var comment = new Comment(reader.Line.Substring(2));

            container.Add(comment);

            reader.ReadLine();
        }
        public void Parse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                          ref AttributeList attributes)
        {
            var document = (Document)container;

            ParseDocumentTitle(document, reader, ref attributes);

            reader.ReadLine();

            if (reader.Line == null)
            {
                return;
            }

            ParseAuthors(document, reader.Line);

            reader.ReadLine();
        }
Example #9
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var elementDelimiter = PatternMatcher.GetDelimiterRegexFor <TElement>();
            var isDelimiter      = elementDelimiter.IsMatch(reader.Line);

            if (isDelimiter)
            {
                AttributeList a = null;
                ProcessParagraph(container, ref buffer, ref a);
                reader.ReadLine();
                while (reader.Line != null && !elementDelimiter.IsMatch(reader.Line))
                {
                    buffer.Add(reader.Line);
                    reader.ReadLine();
                }
            }
            else
            {
                buffer.Add(reader.Line);
                reader.ReadLine();
                while (reader.Line != null && !PatternMatcher.BlankCharacters.IsMatch(reader.Line))
                {
                    buffer.Add(reader.Line);
                    reader.ReadLine();
                }
            }

            var element = new TElement {
                Text = string.Join(Environment.NewLine, buffer)
            };

            element.Attributes.Add(attributes);
            container.Add(element);
            attributes = null;
            buffer     = new List <string>(8);

            reader.ReadLine();
        }
Example #10
0
        public HashSet <string> Process(IDocumentReader reader)
        {
            var res = new HashSet <string>();

            while (!reader.EndOfFile())
            {
                var word = reader.ReadLine();
                if (!res.Contains(word))
                {
                    res.Add(word.ToLower());
                }
            }
            return(res);
        }
Example #11
0
        public override void Parse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                   ref AttributeList attributes)
        {
            var delimiterRegex = PatternMatcher.GetDelimiterRegexFor <TElement>();
            var element        = new TElement();

            element.Attributes.Add(attributes);
            if (delimiterRegex.IsMatch(reader.Line))
            {
                ProcessParagraph(container, ref buffer);
                reader.ReadLine();
                Parse(element, reader, delimiterRegex);
            }
            else
            {
                ProcessParagraph(element, ref buffer);
                Parse(element, reader, PatternMatcher.BlankCharacters);
            }

            container.Add(element);
            attributes = null;

            reader.ReadLine();
        }
Example #12
0
        // TODO: based on html output, a section title should define a section block element into which all proceeding elements should be added, until the next section Title is hit
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.SectionTitle.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not a section title");
            }

            var title          = match.Groups["title"].Value;
            var inlineElements = ProcessInlineElements(title);
            var level          = match.Groups["level"].Value.Length;
            var sectionTitle   = new SectionTitle(inlineElements, level);

            sectionTitle.Attributes.Add(attributes);
            container.Add(sectionTitle);
            attributes = null;

            reader.ReadLine();
        }
        protected void DescendingParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                       ref AttributeList attributes)
        {
            while (reader.Line != null)
            {
                if (predicate != null && predicate(reader.Line))
                {
                    ProcessParagraph(container, ref buffer, ref attributes);
                    return;
                }

                var parsed = false;
                for (var index = 0; index < Parsers.Length; index++)
                {
                    var parser = Parsers[index];
                    if (parser.IsMatch(reader, container, attributes))
                    {
                        parser.Parse(container, reader, predicate, ref buffer, ref attributes);
                        parsed = true;
                        break;
                    }
                }

                if (!parsed)
                {
                    if (PatternMatcher.BlankCharacters.IsMatch(reader.Line))
                    {
                        ProcessBuffer(container, ref buffer, ref attributes);
                    }
                    else
                    {
                        buffer.Add(reader.Line);
                    }

                    reader.ReadLine();
                }
            }

            ProcessBuffer(container, ref buffer, ref attributes);
        }
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var attributeEntry = ParseAttributeEntry(reader.Line);
            var document       = container as Document;

            if (document != null)
            {
                if (document.Count == 0)
                {
                    document.Attributes.Add(attributeEntry);
                }
                else
                {
                    document.Add(attributeEntry);
                }
            }
            else
            {
                container.Add(attributeEntry);
            }

            reader.ReadLine();
        }
Example #15
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.CheckListItem.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not a check list item");
            }

            var level     = match.Groups["level"].Value;
            var isChecked = !string.IsNullOrWhiteSpace(match.Groups["checked"].Value);
            var text      = match.Groups["text"].Value;

            var listItem = new CheckListItem(level.Length, isChecked);

            listItem.Attributes.Add(attributes);

            buffer.Add(text);
            reader.ReadLine();

            attributes = null;

            while (reader.Line != null &&
                   !PatternMatcher.ListItemContinuation.IsMatch(reader.Line) &&
                   !PatternMatcher.BlankCharacters.IsMatch(reader.Line) &&
                   !PatternMatcher.CheckListItem.IsMatch(reader.Line) &&
                   !PatternMatcher.ListItem.IsMatch(reader.Line) &&
                   (predicate == null || !predicate(reader.Line)))
            {
                if (PatternMatcher.ListItemContinuation.IsMatch(reader.Line))
                {
                    ProcessBuffer(listItem, ref buffer, ref attributes);
                    reader.ReadLine();
                    DescendingParse(
                        listItem,
                        reader,
                        line => PatternMatcher.BlankCharacters.IsMatch(line) ||
                        PatternMatcher.ListItem.IsMatch(line) ||
                        PatternMatcher.CheckListItem.IsMatch(reader.Line),
                        ref buffer,
                        ref attributes);
                }
                else
                {
                    buffer.Add(reader.Line);
                    reader.ReadLine();
                }
            }

            ProcessBuffer(listItem, ref buffer, ref attributes);

            UnorderedList unorderedList;

            if (container.Count > 0)
            {
                unorderedList = container[container.Count - 1] as UnorderedList;

                if (unorderedList != null && unorderedList.Items.Count > 0 && unorderedList.Items[0].Level == listItem.Level)
                {
                    unorderedList.Items.Add(listItem);
                }
                else
                {
                    unorderedList = new UnorderedList {
                        Items = { listItem }
                    };
                    container.Add(unorderedList);
                }
            }
            else
            {
                unorderedList = new UnorderedList {
                    Items = { listItem }
                };
                container.Add(unorderedList);
            }

            attributes = null;
        }
Example #16
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.LabeledListItem.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not a labeled list item");
            }

            var label = match.Groups["label"].Value;
            var level = match.Groups["level"].Value.Length;

            // levels start at 0
            if (level > 0)
            {
                level -= 2;
            }

            var labeledListItem = new LabeledListItem(label, level);

            labeledListItem.Attributes.Add(attributes);
            attributes = null;

            var text = match.Groups["text"].Value;

            // labeled lists are lenient with whitespace so can have whitespace after the label
            // and before any content.
            if (!string.IsNullOrWhiteSpace(text))
            {
                buffer.Add(text);
                reader.ReadLine();
            }
            else
            {
                reader.ReadLine();
                while (reader.Line != null &&
                       PatternMatcher.BlankCharacters.IsMatch(reader.Line))
                {
                    reader.ReadLine();
                }
            }

            while (reader.Line != null &&
                   !PatternMatcher.BlankCharacters.IsMatch(reader.Line) &&
                   !PatternMatcher.LabeledListItem.IsMatch(reader.Line) &&
                   (predicate == null || !predicate(reader.Line)))
            {
                if (PatternMatcher.ListItemContinuation.IsMatch(reader.Line))
                {
                    ProcessBuffer(labeledListItem, ref buffer, ref attributes);
                    reader.ReadLine();
                    DescendingParse(
                        labeledListItem,
                        reader,
                        line => PatternMatcher.BlankCharacters.IsMatch(line) ||
                        PatternMatcher.LabeledListItem.IsMatch(line),
                        ref buffer,
                        ref attributes);
                }
                else
                {
                    buffer.Add(reader.Line);
                    reader.ReadLine();
                }
            }

            ProcessBuffer(labeledListItem, ref buffer, ref attributes);

            LabeledList labeledList;

            if (container.Count > 0)
            {
                labeledList = container[container.Count - 1] as LabeledList;

                if (labeledList != null && labeledList.Items.Count > 0 && labeledList.Items[0].Level == labeledListItem.Level)
                {
                    labeledList.Items.Add(labeledListItem);
                }
                else
                {
                    labeledList = new LabeledList {
                        Items = { labeledListItem }
                    };
                    container.Add(labeledList);
                }
            }
            else
            {
                labeledList = new LabeledList {
                    Items = { labeledListItem }
                };
                container.Add(labeledList);
            }

            attributes = null;
        }
Example #17
0
 public override void Parse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer, ref AttributeList attributes)
 {
     reader.ReadLine();
     DescendingParse(container, reader, predicate, ref buffer, ref attributes);
 }
Example #18
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.Media.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not a media");
            }

            var   path = match.Groups["path"].Value;
            Media media;

            switch (match.Groups["media"].Value.ToLowerInvariant())
            {
            case "image":
                media = new Image(path);
                break;

            case "video":
                media = new Video(path);
                break;

            case "audio":
                media = new Audio(path);
                break;

            default:
                throw new ArgumentException("unrecognized media type");
            }

            media.Attributes.Add(attributes);
            var attributesValue = match.Groups["attributes"].Value;
            int?width           = null;
            int?height          = null;

            if (!string.IsNullOrEmpty(attributesValue))
            {
                var attributeValues = SplitOnCharacterOutsideQuotes(attributesValue);

                for (int index = 0; index < attributeValues.Length; index++)
                {
                    var attributeValue = attributeValues[index];
                    int dimension;

                    if (index == 0)
                    {
                        media.AlternateText = attributeValue;
                    }
                    else if (index == 1 && int.TryParse(attributeValue, out dimension))
                    {
                        width = dimension;
                    }
                    else if (index == 2 && int.TryParse(attributeValue, out dimension))
                    {
                        height = dimension;
                    }
                    else
                    {
                        var attributeMatch = PatternMatcher.AttributeNameValue.Match(attributeValue);

                        if (attributeMatch.Success)
                        {
                            switch (attributeMatch.Groups["name"].Value.ToLowerInvariant())
                            {
                            case "link":
                                media.Link = attributeMatch.Groups["value"].Value;
                                break;

                            case "title":
                                media.Title = attributeMatch.Groups["value"].Value;
                                break;

                            case "float":
                                media.Float = attributeMatch.Groups["value"].Value;
                                break;

                            case "align":
                                media.Align = attributeMatch.Groups["value"].Value;
                                break;

                            case "role":
                                media.Role = attributeMatch.Groups["value"].Value;
                                break;

                            default:
                                throw new NotImplementedException("TODO: add attribute to media attribute list");
                            }
                        }
                    }
                }
            }
            if (width.HasValue && height.HasValue)
            {
                media.SetWidthAndHeight(width.Value, height.Value);
            }

            container.Add(media);
            attributes = null;

            reader.ReadLine();
        }
Example #19
0
        public override void InternalParse(Container container, IDocumentReader reader, Func <string, bool> predicate, ref List <string> buffer,
                                           ref AttributeList attributes)
        {
            var match = PatternMatcher.OrderedListItem.Match(reader.Line);

            if (!match.Success)
            {
                throw new ArgumentException("not an ordered list item");
            }

            var level           = match.Groups["level"].Value;
            var orderedListItem = new OrderedListItem(level.Length);

            orderedListItem.Attributes.Add(attributes);

            var number     = match.Groups["number"].Value;
            var upperAlpha = match.Groups["upperalpha"].Value;
            var lowerAlpha = match.Groups["loweralpha"].Value;
            var upperRoman = match.Groups["upperRoman"].Value;
            var lowerRoman = match.Groups["lowerRoman"].Value;

            if (!string.IsNullOrEmpty(number))
            {
                orderedListItem.Number = int.Parse(number);
            }
            else if (!string.IsNullOrEmpty(upperAlpha))
            {
                orderedListItem.Numbering = NumberStyle.UpperAlpha;
                orderedListItem.Number    = Array.IndexOf(Patterns.UpperAlphabet, upperAlpha) + 1;
            }
            else if (!string.IsNullOrEmpty(lowerAlpha))
            {
                orderedListItem.Numbering = NumberStyle.LowerAlpha;
                orderedListItem.Number    = Array.IndexOf(Patterns.LowerAlphabet, lowerAlpha) + 1;
            }
            else if (!string.IsNullOrEmpty(upperRoman))
            {
                orderedListItem.Numbering = NumberStyle.UpperRoman;
                orderedListItem.Number    = RomanNumerals.ToInt(upperRoman);
            }
            else if (!string.IsNullOrEmpty(lowerRoman))
            {
                orderedListItem.Numbering = NumberStyle.LowerRoman;
                orderedListItem.Number    = RomanNumerals.ToInt(lowerRoman);
            }

            var text = match.Groups["text"].Value;

            buffer.Add(text);
            reader.ReadLine();

            attributes = null;

            while (reader.Line != null &&
                   !PatternMatcher.BlankCharacters.IsMatch(reader.Line) &&
                   !PatternMatcher.OrderedListItem.IsMatch(reader.Line) &&
                   (predicate == null || !predicate(reader.Line)))
            {
                if (PatternMatcher.ListItemContinuation.IsMatch(reader.Line))
                {
                    ProcessBuffer(orderedListItem, ref buffer, ref attributes);
                    reader.ReadLine();
                    DescendingParse(
                        orderedListItem,
                        reader,
                        line => PatternMatcher.BlankCharacters.IsMatch(line) || PatternMatcher.OrderedListItem.IsMatch(line),
                        ref buffer,
                        ref attributes);
                }
                else
                {
                    buffer.Add(reader.Line);
                    reader.ReadLine();
                }
            }

            ProcessBuffer(orderedListItem, ref buffer, ref attributes);

            OrderedList orderedList;

            if (container.Count > 0)
            {
                orderedList = container[container.Count - 1] as OrderedList;

                if (orderedList != null && orderedList.Items.Count > 0 && orderedList.Items[0].Level == orderedListItem.Level)
                {
                    orderedList.Items.Add(orderedListItem);
                }
                else
                {
                    orderedList = new OrderedList {
                        Items = { orderedListItem }
                    };
                    container.Add(orderedList);
                }
            }
            else
            {
                orderedList = new OrderedList {
                    Items = { orderedListItem }
                };
                container.Add(orderedList);
            }

            attributes = null;
        }