Пример #1
0
        private void ReverseCharacterPositions(int index1, int index2)
        {
            DocumentCharacterPositions tempPos = _characters[index1].Positions;

            DocumentCharacter tempCharacter = _characters[index1];

            tempCharacter.Positions = _characters[index2].Positions;
            _characters[index1]     = tempCharacter;

            tempCharacter           = _characters[index2];
            tempCharacter.Positions = tempPos;
            _characters[index2]     = tempCharacter;
        }
Пример #2
0
        public static DocumentText FromSvgDocument(SvgDocument document)
        {
            // Out list of charcaters
            var characters = new List <DocumentCharacter>();

            // To strip whitespaces
            var hasWhitespaces = false;
            var prevCharacter  = new DocumentCharacter();

            // Callback
            SvgSortElementsCallback sortCallback = (callabackDocument, info, userData) =>
            {
                // Is it text?
                var textData = info.TextData;
                if (textData != null)
                {
                    // yes, does it have any characters?
                    var length = textData.Text.Length;
                    for (var i = 0; i < length; i++)
                    {
                        // Yes, convert to DocumentCharacter and add them to the list
                        var flags     = textData.CharacterFlags[i];
                        var positions = DocumentCharacterPositions.None;
                        if ((flags & SvgTextCharacterFlags.EndOfWord) == SvgTextCharacterFlags.EndOfWord)
                        {
                            positions |= DocumentCharacterPositions.EndOfWord;
                        }
                        if ((flags & SvgTextCharacterFlags.EndOfLine) == SvgTextCharacterFlags.EndOfLine)
                        {
                            positions |= DocumentCharacterPositions.EndOfLine;
                        }

                        var character = new DocumentCharacter
                        {
                            Code      = textData.Text[i],
                            Bounds    = textData.Bounds[i],
                            Positions = positions,
                            Direction = textData.Directions[i]
                        };

                        // If the character duplicated, then remove the old one
                        if (prevCharacter.Bounds == character.Bounds &&
                            prevCharacter.Code == character.Code &&
                            prevCharacter.Direction == character.Direction)
                        {
                            characters.Remove(prevCharacter);
                        }

                        characters.Add(character);

                        if (!hasWhitespaces && Char.IsWhiteSpace(character.Code))
                        {
                            hasWhitespaces = true;
                        }

                        prevCharacter = character;
                    }
                }

                return(true);
            };

            // Accept text elements only.
            SvgFilterElementsCallback filterElementsCallback = (svgDocument, nodeHandle, userData) =>
            {
                // Accept text elements only.
                if (nodeHandle.ElementType != SvgElementType.Text)
                {
                    return(false);
                }

                return(true);
            };

            // Setup options and sort
            var options = new SvgSortOptions {
                ExtractText = SvgExtractText.Character, SortFlags = SvgSortFlags.Default
            };

            document.SetFilterElementsCallback(filterElementsCallback, null);
            document.SortElements(options, sortCallback, null);
            document.SetFilterElementsCallback(null, null);

            // Characters is set, before using it, strip the whitespaces
            if (hasWhitespaces)
            {
                var temp      = new List <DocumentCharacter>();
                var count     = characters.Count;
                var positions = DocumentCharacterPositions.None;
                var newIndex  = -1;
                for (var index = 0; index < count; index++)
                {
                    var character = characters[index];

                    // If this character is a whitespace, dont add it, just steal its position and add it to the previous character
                    if (Char.IsWhiteSpace(character.Code))
                    {
                        positions |= character.Positions;
                    }
                    else
                    {
                        // If have stolen flags from dropping white space characters, add them to the last non-white space character
                        if (positions != DocumentCharacterPositions.None)
                        {
                            if (newIndex != -1)
                            {
                                var tempCharacter = temp[newIndex];
                                tempCharacter.Positions |= positions;
                                temp[newIndex]           = tempCharacter;
                            }

                            positions = DocumentCharacterPositions.None;
                        }

                        // Add it
                        temp.Add(character);
                        newIndex++;
                    }
                }

                characters = temp;
            }

            var result = new DocumentText(characters);

            return(result);
        }
Пример #3
0
        public void BuildWords()
        {
            var characterIndex = 0;
            var characterCount = _characters.Count;

            var words             = new List <DocumentWord>();
            var lastReverseWord   = false;
            var startReverseIndex = -1;

            // Loop through all the characters
            while (characterIndex < characterCount)
            {
                // Find the total bounding rectangle, begin and end index of the next word
                var  wordBounds          = LeadRectD.Empty;
                var  beginCharacterIndex = characterIndex;
                bool reverseWord         = false;

                // Loop till we reach the EndOfWord of EndOfLine
                var more      = true;
                var character = new DocumentCharacter();
                while (more && characterIndex < characterCount)
                {
                    character = _characters[characterIndex];

                    // Add the bounding rectangle of this character
                    var characterBounds = character.Bounds;

                    if (wordBounds.IsEmpty)
                    {
                        wordBounds = characterBounds;
                    }
                    else
                    {
                        wordBounds = LeadRectD.UnionRects(wordBounds, characterBounds);
                    }

                    more = characterIndex < characterCount &&
                           (character.Positions & DocumentCharacterPositions.EndOfWord) != DocumentCharacterPositions.EndOfWord &&
                           (character.Positions & DocumentCharacterPositions.EndOfLine) != DocumentCharacterPositions.EndOfLine;

                    characterIndex++;

                    if (character.Direction == SvgCharacterDirection.RightToLeft)
                    {
                        reverseWord = true;
                    }
                }

                // From the begin and end index, collect the characters into a string
                var sb = new StringBuilder();
                for (var i = beginCharacterIndex; i < characterIndex; i++)
                {
                    sb.Append(_characters[i].Code);
                }

                var word = new DocumentWord();
                word.Value  = sb.ToString();
                word.Bounds = wordBounds;
                word.FirstCharacterIndex = beginCharacterIndex;
                word.LastCharacterIndex  = characterIndex - 1;

                if (reverseWord)
                {
                    char[] charArray  = word.Value.ToCharArray();
                    int    startIndex = 0;
                    for (int i = 0; i < charArray.Length; i++)
                    {
                        int nextIndex = i + 1;
                        if (_characters[i + word.FirstCharacterIndex].Direction == SvgCharacterDirection.RightToLeft &&
                            (nextIndex >= charArray.Length ||
                             _characters[nextIndex + word.FirstCharacterIndex].Direction == SvgCharacterDirection.LeftToRight))
                        {
                            Array.Reverse(charArray, startIndex, nextIndex - startIndex);
                            startIndex = i;
                        }
                    }

                    word.Value = new string(charArray);
                }


                if (lastReverseWord && reverseWord)
                {
                    startReverseIndex = startReverseIndex == -1 ? words.Count - 1: startReverseIndex;
                    words.Insert(startReverseIndex, word);
                }
                else
                {
                    if (startReverseIndex != -1)
                    {
                        ReverseCharacterPositions(words[startReverseIndex].LastCharacterIndex, words[words.Count - 1].LastCharacterIndex);
                        startReverseIndex = -1;
                    }
                    words.Add(word);
                }

                lastReverseWord = reverseWord && (character.Positions & DocumentCharacterPositions.EndOfLine) != DocumentCharacterPositions.EndOfLine;
            }

            if (startReverseIndex != -1)
            {
                ReverseCharacterPositions(words[startReverseIndex].LastCharacterIndex, words[words.Count - 1].LastCharacterIndex);
            }

            _words = words;
        }