예제 #1
0
        public void Parse_TextEmphasisAndBold_Successfully()
        {
            var text   = "_the wrong_ barking up __tree__";
            var tokens = tokenizer.Tokenize(text);

            var textNode = new TextNode();

            textNode.Add(new WordNode(WordType.SpacedWord, " barking"));
            textNode.Add(new WordNode(WordType.SpacedWord, " up"));
            textNode.Add(new WordNode(WordType.Space, " "));

            var emphasisNode = new TextNode(TextType.Emphasis);

            emphasisNode.Add(new WordNode(WordType.SimpleWord, "the"));
            emphasisNode.Add(new WordNode(WordType.SpacedWord, " wrong"));

            var boldNode = new TextNode(TextType.Bold);

            boldNode.Add(new WordNode(WordType.SimpleWord, "tree"));

            var expected = new SentenceNode();

            expected.Add(emphasisNode);
            expected.Add(textNode);
            expected.Add(boldNode);

            Parser.ParseSentence(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #2
0
        public void Parse_TextAndEmphasis_Successfully()
        {
            var text   = "break the ice _go for broke_";
            var tokens = tokenizer.Tokenize(text);

            var textNode = new TextNode();

            textNode.Add(new WordNode(WordType.SimpleWord, "break"));
            textNode.Add(new WordNode(WordType.SpacedWord, " the"));
            textNode.Add(new WordNode(WordType.SpacedWord, " ice"));
            textNode.Add(new WordNode(WordType.Space, " "));

            var emphasisNode = new TextNode(TextType.Emphasis);

            emphasisNode.Add(new WordNode(WordType.SimpleWord, "go"));
            emphasisNode.Add(new WordNode(WordType.SpacedWord, " for"));
            emphasisNode.Add(new WordNode(WordType.SpacedWord, " broke"));

            var expected = new SentenceNode();

            expected.Add(textNode);
            expected.Add(emphasisNode);

            Parser.ParseSentence(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #3
0
        public void Parse_TwoWords_Successfully(string text, WordType firstType, WordType secondType, string firstExpectedValue, string secondExpectedValue)
        {
            var tokens = tokenizer.Tokenize(text);

            var expected = new TextNode();

            expected.Add(new WordNode(firstType, firstExpectedValue));
            expected.Add(new WordNode(secondType, secondExpectedValue));

            Parser.ParseText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
        public void Parse_IfAfterOpenUnderscoreFollowNonWhitespace_AsText()
        {
            var text     = "_ science";
            var tokens   = tokenizer.Tokenize(text);
            var expected = new TextNode();

            expected.Add(new WordNode(WordType.SimpleWord, "_"));
            expected.Add(new WordNode(WordType.SpacedWord, " science"));

            Parser.ParseEmphasisText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #5
0
        public void Parse_WordAndSpace_Successfully(string word, WordType wordType)
        {
            var text   = word + " ";
            var tokens = tokenizer.Tokenize(text);

            var expected = new TextNode();

            expected.Add(new WordNode(wordType, word));
            expected.Add(new WordNode(WordType.Space, " "));

            Parser.ParseText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
        public void Parse_IfBeforeCloseUnderscoreNonWhitespace_AsText()
        {
            var text     = "_norm _";
            var tokens   = tokenizer.Tokenize(text);
            var expected = new TextNode();

            expected.Add(new WordNode(WordType.SimpleWord, "_"));
            expected.Add(new WordNode(WordType.SimpleWord, "norm"));
            expected.Add(new WordNode(WordType.Space, " "));

            Parser.ParseEmphasisText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
        public void Parse_NotPairedUnderscores_AsText()
        {
            var text     = "_treatment__";
            var tokens   = tokenizer.Tokenize(text);
            var expected = new TextNode();

            expected.Add(new WordNode(WordType.SimpleWord, "_"));
            expected.Add(new WordNode(WordType.SimpleWord, "treatment"));
            expected.Add(new WordNode(WordType.SimpleWord, "__"));

            Parser.ParseEmphasisText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #8
0
        public static TextNode ParseText(Deque <Token> tokens)
        {
            var plainText = new TextNode();

            while (true)
            {
                if (tokens.Count == 0)
                {
                    break;
                }

                var currentToken = tokens.PeekFirst();

                if (currentToken.Type == TokenType.EscapeChar)
                {
                    throw new NotSupportedException();
                }

                var type = currentToken.Type;
                if (type != TokenType.Space &&
                    type != TokenType.Text &&
                    type != TokenType.Num)
                {
                    break;
                }

                var newWord = currentToken.Type == TokenType.Space ? ParseSpacedWord(tokens) : ParseSimpleWord(tokens);
                plainText.Add(newWord);
            }

            return(plainText);
        }
        public void Parse_TextSurroundedByUnderscores_Successfully()
        {
            var text     = "_what am i, chopped liver?_";
            var tokens   = tokenizer.Tokenize(text);
            var expected = new TextNode(TextType.Emphasis);

            expected.Add(new WordNode(WordType.SimpleWord, "what"));
            expected.Add(new WordNode(WordType.SpacedWord, " am"));
            expected.Add(new WordNode(WordType.SpacedWord, " i,"));
            expected.Add(new WordNode(WordType.SpacedWord, " chopped"));
            expected.Add(new WordNode(WordType.SpacedWord, " liver?"));

            Parser.ParseEmphasisText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #10
0
        public void Parse_SimpleText_Successfully()
        {
            var text     = "hard pill to swallow";
            var tokens   = tokenizer.Tokenize(text);
            var textNode = new TextNode();

            textNode.Add(new WordNode(WordType.SimpleWord, "hard"));
            textNode.Add(new WordNode(WordType.SpacedWord, " pill"));
            textNode.Add(new WordNode(WordType.SpacedWord, " to"));
            textNode.Add(new WordNode(WordType.SpacedWord, " swallow"));
            var expected = new SentenceNode();

            expected.Add(textNode);

            Parser.ParseSentence(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #11
0
        public static TextNode ParseEmphasisText(Deque <Token> tokens)
        {
            var underscore = tokens.PopFirst();

            if (tokens.Count == 0)
            {
                return(ParseText(new Deque <Token> {
                    new Token(TokenType.Text, underscore.Value)
                }));
            }

            if (tokens.PeekFirst().Type == TokenType.Space)
            {
                tokens.Insert(0, new Token(TokenType.Text, underscore.Value));
                return(ParseText(tokens));
            }

            var emphasisText = new TextNode(TextType.Emphasis);

            while (true)
            {
                if (tokens.Count == 0)
                {
                    emphasisText.Words.Insert(0, new WordNode(WordType.SimpleWord, underscore.Value));
                    return(new TextNode(TextType.Text, emphasisText.Words));
                }

                var currentToken = tokens.PeekFirst();

                switch (currentToken.Type)
                {
                case TokenType.Underscore:
                    tokens.PopFirst();
                    if (emphasisText.Words.Last().Type == WordType.Space)
                    {
                        emphasisText.Words.Insert(0, new WordNode(WordType.SimpleWord, underscore.Value));
                        return(new TextNode(TextType.Text, emphasisText.Words));
                    }

                    return(emphasisText);

                case TokenType.DoubleUnderscore:
                    tokens.PopFirst();
                    var doubleUnderscore = new WordNode(WordType.SimpleWord, underscore.Value + underscore.Value);
                    emphasisText.Add(doubleUnderscore);
                    break;

                default:
                    var innerText  = ParseText(tokens);
                    var innerWords = innerText.Words;

                    emphasisText.AddRange(innerWords);
                    break;
                }
            }
        }
예제 #12
0
        public void Parse_SingleTextStartingWithSpace_Successfully()
        {
            var text     = " an arm and a leg";
            var tokens   = tokenizer.Tokenize(text);
            var textNode = new TextNode();

            textNode.Add(new WordNode(WordType.SpacedWord, " an"));
            textNode.Add(new WordNode(WordType.SpacedWord, " arm"));
            textNode.Add(new WordNode(WordType.SpacedWord, " and"));
            textNode.Add(new WordNode(WordType.SpacedWord, " a"));
            textNode.Add(new WordNode(WordType.SpacedWord, " leg"));
            var expected = new SentenceNode();

            expected.Add(textNode);

            Parser.ParseSentence(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #13
0
        public void Parse_SingleDoubleUnderscore_AsText()
        {
            var text     = "__";
            var tokens   = tokenizer.Tokenize(text);
            var expected = new TextNode();

            expected.Add(new WordNode(WordType.SimpleWord, "__"));

            Parser.ParseBoldText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #14
0
        public void Parse_SingleWord_Successfully(string text, WordType type, string value)
        {
            var tokens = tokenizer.Tokenize(text);

            var expected = new TextNode();

            expected.Add(new WordNode(type, value));

            Parser.ParseText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #15
0
        public void Parse_EmphasisTextInside_Successfully()
        {
            var text     = "__a chip on _your_ shoulder__";
            var tokens   = tokenizer.Tokenize(text);
            var expected = new TextNode(TextType.Bold);

            expected.Add(new WordNode(WordType.SimpleWord, "a"));
            expected.Add(new WordNode(WordType.SpacedWord, " chip"));
            expected.Add(new WordNode(WordType.SpacedWord, " on"));
            expected.Add(new WordNode(WordType.Space, " "));
            expected.Add(new WordNode(WordType.SimpleWord, "<em>"));
            expected.Add(new WordNode(WordType.SimpleWord, "your"));
            expected.Add(new WordNode(WordType.SimpleWord, "</em>"));
            expected.Add(new WordNode(WordType.SpacedWord, " shoulder"));

            Parser.ParseBoldText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
        public void Parse_DoubleUnderscoresInside_AsText()
        {
            var text     = "_easy __as__ pie_";
            var tokens   = tokenizer.Tokenize(text);
            var expected = new TextNode(TextType.Emphasis);

            expected.Add(new WordNode(WordType.SimpleWord, "easy"));
            expected.Add(new WordNode(WordType.Space, " "));
            expected.Add(new WordNode(WordType.SimpleWord, "__"));
            expected.Add(new WordNode(WordType.SimpleWord, "as"));
            expected.Add(new WordNode(WordType.SimpleWord, "__"));
            expected.Add(new WordNode(WordType.SpacedWord, " pie"));

            Parser.ParseEmphasisText(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #17
0
        public void Parse_SingleTextStartingAndEndingWithSpace_Successfully()
        {
            var text     = " everything but the kitchen sink ";
            var tokens   = tokenizer.Tokenize(text);
            var textNode = new TextNode();

            textNode.Add(new WordNode(WordType.SpacedWord, " everything"));
            textNode.Add(new WordNode(WordType.SpacedWord, " but"));
            textNode.Add(new WordNode(WordType.SpacedWord, " the"));
            textNode.Add(new WordNode(WordType.SpacedWord, " kitchen"));
            textNode.Add(new WordNode(WordType.SpacedWord, " sink"));
            textNode.Add(new WordNode(WordType.Space, " "));
            var expected = new SentenceNode();

            expected.Add(textNode);

            Parser.ParseSentence(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #18
0
        public void Parse_SingleTextEndingWithSpace_Successfully()
        {
            var text     = "barking up the wrong tree ";
            var tokens   = tokenizer.Tokenize(text);
            var textNode = new TextNode();

            textNode.Add(new WordNode(WordType.SimpleWord, "barking"));
            textNode.Add(new WordNode(WordType.SpacedWord, " up"));
            textNode.Add(new WordNode(WordType.SpacedWord, " the"));
            textNode.Add(new WordNode(WordType.SpacedWord, " wrong"));
            textNode.Add(new WordNode(WordType.SpacedWord, " tree"));
            textNode.Add(new WordNode(WordType.Space, " "));
            var expected = new SentenceNode();

            expected.Add(textNode);

            Parser.ParseSentence(tokens)
            .Should()
            .BeEquivalentTo(expected);
        }
예제 #19
0
        public static void WriteWordLookUp(EpsgData data, BinaryWriter textWriter, BinaryWriter indexWriter)
        {
            var roots = new List<TextNode>();
            foreach(var text in data.WordLookUpList) {
                var containerRoot = TextNode.FindContainingRoot(roots, text);
                if(null == containerRoot) {
                    containerRoot = new TextNode(text);
                    var containedRoots = roots.Where(r => containerRoot.Contains(r.Text)).ToList();
                    foreach(var containedRoot in containedRoots) {
                        roots.Remove(containedRoot);
                        if(!containerRoot.Add(containedRoot)) {
                            throw new InvalidOperationException();
                        }
                    }
                    roots.Add(containerRoot);
                }else {
                    if(!containerRoot.Add(text)) {
                        throw new InvalidOperationException();
                    }
                }
            }

            for (int quality = Math.Min(6,roots.Select(x => x.Text.Length).Max()/2); quality >= 0; quality--) {
                for (int i = 0; i < roots.Count; i++) {
                    for (int j = i + 1; j < roots.Count; j++) {
                        int overlapAt = StringUtils.OverlapIndex(roots[i].Text, roots[j].Text);
                        if (overlapAt >= 0 && (roots[i].Text.Length - overlapAt) >= quality) {
                            var newText = roots[i].Text.Substring(0, overlapAt) + roots[j].Text;
                            var newNode = new TextNode(newText, new[]{roots[i], roots[j]});
                            roots.RemoveAt(j);
                            roots[i] = newNode;
                            i--;
                            break;
                        }
                        overlapAt = StringUtils.OverlapIndex(roots[j].Text, roots[i].Text);
                        if (overlapAt >= 0 && (roots[j].Text.Length - overlapAt) >= quality) {
                            var newText = roots[j].Text.Substring(0, overlapAt) + roots[i].Text;
                            var newNode = new TextNode(newText, new[]{roots[j], roots[i]});
                            roots.RemoveAt(j);
                            roots[i] = newNode;
                            i--;
                            break;
                        }
                    }
                }
            }

            var offsetLookUp = new Dictionary<string, int>();
            int rootOffset = 0;
            foreach(var root in roots) {
                var rootText = root.Text;
                var rootBytes = Encoding.UTF8.GetBytes(rootText);
                textWriter.Write(rootBytes);
                foreach(var text in root.GetAllString()) {
                    int startIndex = rootText.IndexOf(text, StringComparison.Ordinal);
                    var localOffset = Encoding.UTF8.GetByteCount(rootText.Substring(0, startIndex));
                    offsetLookUp.Add(text, rootOffset + localOffset);
                }
                rootOffset += rootBytes.Length;
            }

            foreach(var word in data.WordLookUpList) {
                indexWriter.Write((ushort)offsetLookUp[word]);
                indexWriter.Write((byte)(Encoding.UTF8.GetByteCount(word)));
            }
        }
예제 #20
0
        public static TextNode ParseBoldText(Deque <Token> tokens)
        {
            var doubleUnderscore = tokens.PopFirst();

            if (tokens.Count == 0)
            {
                return(ParseText(new Deque <Token> {
                    new Token(TokenType.Text, doubleUnderscore.Value)
                }));
            }

            if (tokens.PeekFirst().Type == TokenType.Space)
            {
                tokens.Insert(0, new Token(TokenType.Text, doubleUnderscore.Value));
                return(ParseText(tokens));
            }

            var boldText = new TextNode(TextType.Bold);

            while (true)
            {
                if (tokens.Count == 0)
                {
                    boldText.Words.Insert(0, new WordNode(WordType.SimpleWord, doubleUnderscore.Value));
                    return(new TextNode(TextType.Text, boldText.Words));
                }

                var currentToken = tokens.PeekFirst();

                switch (currentToken.Type)
                {
                case TokenType.DoubleUnderscore:
                    tokens.PopFirst();
                    if (boldText.Words.Last().Type == WordType.Space)
                    {
                        boldText.Words.Insert(0, new WordNode(WordType.SimpleWord, doubleUnderscore.Value));
                        return(new TextNode(TextType.Text, boldText.Words));
                    }

                    return(boldText);

                case TokenType.Underscore:
                    var innerEmphasisText = ParseEmphasisText(tokens);
                    if (innerEmphasisText.Type == TextType.Text)
                    {
                        boldText.AddRange(innerEmphasisText.Words);
                        continue;
                    }

                    var openEmTag  = new WordNode(WordType.SimpleWord, "<em>");
                    var closeEmTag = new WordNode(WordType.SimpleWord, "</em>");

                    boldText.Add(openEmTag);
                    boldText.AddRange(innerEmphasisText.Words);
                    boldText.Add(closeEmTag);
                    break;

                default:
                    var innerText  = ParseText(tokens);
                    var innerWords = innerText.Words;

                    boldText.AddRange(innerWords);
                    break;
                }
            }
        }
예제 #21
0
        public static void WriteWordLookUp(EpsgData data, BinaryWriter textWriter, BinaryWriter indexWriter)
        {
            var roots = new List <TextNode>();

            foreach (var text in data.WordLookUpList)
            {
                var containerRoot = TextNode.FindContainingRoot(roots, text);
                if (null == containerRoot)
                {
                    containerRoot = new TextNode(text);
                    var containedRoots = roots.Where(r => containerRoot.Contains(r.Text)).ToList();
                    foreach (var containedRoot in containedRoots)
                    {
                        roots.Remove(containedRoot);
                        if (!containerRoot.Add(containedRoot))
                        {
                            throw new InvalidOperationException();
                        }
                    }
                    roots.Add(containerRoot);
                }
                else
                {
                    if (!containerRoot.Add(text))
                    {
                        throw new InvalidOperationException();
                    }
                }
            }

            for (int quality = Math.Min(6, roots.Select(x => x.Text.Length).Max() / 2); quality >= 0; quality--)
            {
                for (int i = 0; i < roots.Count; i++)
                {
                    for (int j = i + 1; j < roots.Count; j++)
                    {
                        int overlapAt = StringUtils.OverlapIndex(roots[i].Text, roots[j].Text);
                        if (overlapAt >= 0 && (roots[i].Text.Length - overlapAt) >= quality)
                        {
                            var newText = roots[i].Text.Substring(0, overlapAt) + roots[j].Text;
                            var newNode = new TextNode(newText, new[] { roots[i], roots[j] });
                            roots.RemoveAt(j);
                            roots[i] = newNode;
                            i--;
                            break;
                        }
                        overlapAt = StringUtils.OverlapIndex(roots[j].Text, roots[i].Text);
                        if (overlapAt >= 0 && (roots[j].Text.Length - overlapAt) >= quality)
                        {
                            var newText = roots[j].Text.Substring(0, overlapAt) + roots[i].Text;
                            var newNode = new TextNode(newText, new[] { roots[j], roots[i] });
                            roots.RemoveAt(j);
                            roots[i] = newNode;
                            i--;
                            break;
                        }
                    }
                }
            }

            var offsetLookUp = new Dictionary <string, int>();
            int rootOffset   = 0;

            foreach (var root in roots)
            {
                var rootText  = root.Text;
                var rootBytes = Encoding.UTF8.GetBytes(rootText);
                textWriter.Write(rootBytes);
                foreach (var text in root.GetAllString())
                {
                    int startIndex  = rootText.IndexOf(text, StringComparison.Ordinal);
                    var localOffset = Encoding.UTF8.GetByteCount(rootText.Substring(0, startIndex));
                    offsetLookUp.Add(text, rootOffset + localOffset);
                }
                rootOffset += rootBytes.Length;
            }

            foreach (var word in data.WordLookUpList)
            {
                indexWriter.Write((ushort)offsetLookUp[word]);
                indexWriter.Write((byte)(Encoding.UTF8.GetByteCount(word)));
            }
        }