Пример #1
0
        private List <TextToken> GetSearchBeforeContext(BookTokenIterator bookTokenIterator, int startTokenId, int count = 8)
        {
            var result  = new List <TextToken>();
            var tokenId = startTokenId;

            while (--tokenId >= 0 && result.Count < count)
            {
                bookTokenIterator.MoveTo(tokenId);
                bookTokenIterator.MoveNext();

                if (bookTokenIterator.Current is NewPageToken)
                {
                    break;
                }

                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                {
                    continue;
                }

                result.Insert(0, textToken);
            }

            bookTokenIterator.MoveTo(startTokenId);
            bookTokenIterator.MoveNext();
            return(result);
        }
Пример #2
0
        public string GetText(BookModel book, int tokenOffset, int wordsCount, out int lastTokenId)
        {
            lastTokenId = -1;
            var result = new List <string>();

            using (var tokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID)))
            {
                int words = 0;
                tokenIterator.MoveTo(tokenOffset);
                while (tokenIterator.MoveNext() && words < wordsCount)
                {
                    if (tokenIterator.Current is NewPageToken && result.Count > 0)
                    {
                        break;
                    }

                    var textToken = tokenIterator.Current as TextToken;
                    if (textToken == null)
                    {
                        continue;
                    }
                    lastTokenId = textToken.ID;
                    result.Add(textToken.Text);
                    words++;
                }
            }
            return(string.Join(" ", result));
        }
Пример #3
0
        public IEnumerable<TokenBlockBase> GetLines(BookTokenIterator bookTokens, string lastText, int firstTokenID,
                                                    int stopTokenID = -1, string stopText = null)
        {
            _firstTokenID = firstTokenID;

            _tree = bookTokens.BuildTree(_firstTokenID);
            _lastOpenTag = _tree.Peek();
            _fontSize = GetCurrentFontSize();
            _separator = false;
            bool firstText = true;
            _marginLeft = _marginRight = 0.0;

            foreach (TagOpenToken openTagToken in _tree.Reverse())
                EnterMargin(openTagToken.TextProperties);

            if (string.IsNullOrEmpty(stopText) && stopTokenID > 0)
                --stopTokenID;

            while (bookTokens.MoveNext())
            {
                foreach (TokenBlockBase baseTokenLine in OutputLines(false))
                    yield return baseTokenLine;

                if (!Append(bookTokens, lastText, stopTokenID, stopText, ref firstText)) 
                    break;
            }
            foreach (TokenBlockBase baseTokenLine in OutputLines(true))
                yield return baseTokenLine;
        }
Пример #4
0
        private List <TextToken> GetSearchAfterContext(BookTokenIterator bookTokenIterator, int endTokenId, int count = 8)
        {
            var result  = new List <TextToken>();
            var tokenId = endTokenId;

            while (++tokenId < bookTokenIterator.Count && result.Count < count)
            {
                bookTokenIterator.MoveTo(tokenId);
                bookTokenIterator.MoveNext();

                if (bookTokenIterator.Current is NewPageToken)
                {
                    break;
                }

                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                {
                    continue;
                }

                result.Add(textToken);
            }

            bookTokenIterator.MoveTo(endTokenId);
            bookTokenIterator.MoveNext();
            return(result);
        }
Пример #5
0
        private List <BookSearchResult> SearchOneWord(BookTokenIterator bookTokenIterator, string query, int count)
        {
            var result = new List <BookSearchResult>();

            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                {
                    continue;
                }

                if (textToken.Text.IndexOf(query, StringComparison.InvariantCultureIgnoreCase) >= 0)
                {
                    var previousContext = GetSearchBeforeContext(bookTokenIterator, textToken.ID);
                    var afterContext    = GetSearchAfterContext(bookTokenIterator, textToken.ID);

                    result.Add(new BookSearchResult
                    {
                        PreviousContext = previousContext,
                        SearchResult    = new List <TextToken> {
                            textToken
                        },
                        NextContext = afterContext
                    });

                    if (result.Count >= count)
                    {
                        break;
                    }
                }
            }
            return(result);
        }
Пример #6
0
        private List <BookSearchResult> SearchGroupWords(BookTokenIterator bookTokenIterator, List <string> query, int count)
        {
            var result = new List <BookSearchResult>();

            var       firstWordQuery = query[0];
            var       lastWordQuery  = query.Last();
            TextToken firstWordToken;

            while ((firstWordToken = FindFirstWord(bookTokenIterator, firstWordQuery)) != null)
            {
                var resultSequence = new List <TextToken>();
                resultSequence.Add(firstWordToken);

                bool findNextSequence = false;
                for (int i = 1; i < query.Count - 1; i++)
                {
                    TextToken intermediateToken;
                    if (CheckIntermediateWord(bookTokenIterator, query[i], out intermediateToken))
                    {
                        resultSequence.Add(intermediateToken);
                    }
                    else
                    {
                        findNextSequence = true;
                        break;
                    }
                }

                if (findNextSequence)
                {
                    continue;
                }

                TextToken lastToken;
                if (CheckLastWord(bookTokenIterator, lastWordQuery, out lastToken))
                {
                    resultSequence.Add(lastToken);

                    var previousContext = GetSearchBeforeContext(bookTokenIterator, firstWordToken.ID);
                    var afterContext    = GetSearchAfterContext(bookTokenIterator, lastToken.ID);

                    result.Add(new BookSearchResult
                    {
                        PreviousContext = previousContext,
                        SearchResult    = resultSequence,
                        NextContext     = afterContext
                    });

                    if (result.Count >= count)
                    {
                        break;
                    }
                }
            }
            return(result);
        }
Пример #7
0
 public void Init()
 {
     if (_bookTokenIterator != null && _book != null)
     {
         int tokenId = _bookTokenIterator.Current.ID;
         _bookTokenIterator.Dispose();
         _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID));
         _bookTokenIterator.MoveTo(tokenId);
         _bookTokenIterator.MoveNext();
     }
 }
Пример #8
0
 public void Init()
 {
     if (_bookTokenIterator != null && _book != null)
     {
         int tokenId = _bookTokenIterator.Current.ID;
         _bookTokenIterator.Dispose();
         _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID));
         _bookTokenIterator.MoveTo(tokenId);
         _bookTokenIterator.MoveNext();
     }
 }
Пример #9
0
 public Task <PageInfo> GetPageAsync(int tokenID, string startText)
 {
     return(Task.Factory.StartNew(
                delegate
     {
         using (var tokens = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID)))
         {
             PageInfo page = BookFactory
                             .GetBookParser(_book.Type, tokens, _fontSize, _pageSize, _images)
                             .GetPage(tokenID, startText);
             return page;
         }
     }));
 }
Пример #10
0
 public Task<PageInfo> GetPageAsync(int tokenID, string startText)
 {
     return Task.Factory.StartNew(
         delegate
             {
                 using (var tokens = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID)))
                 {
                     PageInfo page = BookFactory
                         .GetBookParser(_book.Type, tokens, _fontSize, _pageSize, _images)
                         .GetPage(tokenID, startText);
                     return page;
                 }
             });
 }
Пример #11
0
 public static IBookBuilder GetBookParser(string bookType, BookTokenIterator bookTokens, int fontSize, Size pageSize, IEnumerable<BookImage> images)
 {
     var headerSizes = new ReadOnlyCollection<double>(new List<double> {24, 32, 42});
     IFontHelper activeFontHelper = GetActiveFontMetrics(AppSettings.Default.FontSettings.FontFamily.Source);
     switch (bookType)
     {
         case "fb2":
         case "txt":
         case "epub":
         case "html":
             return new BookBuilder(bookTokens, images, headerSizes, activeFontHelper, pageSize, fontSize , AppSettings.Default.Hyphenation, AppSettings.Default.UseCSSFontSize);
         default:
             throw new NotSupportedException("Book type '" + bookType + "' is not supported!");
     }
 }
Пример #12
0
        public Task<List<BookSearchResult>> Search(BookModel book, string query, int count)
        {
            if (string.IsNullOrEmpty(query) || book == null)
                return Task<List<BookSearchResult>>.Factory.StartNew(() => new List<BookSearchResult>());

            _bookTokenIterator?.Dispose();

            _book = book;
            _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID));

            _query = PrepareQuery(query);

            return Task<List<BookSearchResult>>
                .Factory.StartNew(() => Load(_bookTokenIterator, _query, count));
        }
Пример #13
0
        private TextToken FindFirstWord(BookTokenIterator bookTokenIterator, string query)
        {
            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                {
                    continue;
                }

                if (textToken.Text.EndsWith(query, StringComparison.InvariantCultureIgnoreCase))
                {
                    return(textToken);
                }
            }
            return(null);
        }
Пример #14
0
 public void Init()
 {
     if (_bookTokenIterator == null || _book == null) return;
     var tokenId = 0;
     try
     {
         tokenId = _bookTokenIterator.Current.ID;
     }
     catch (Exception)
     {
         // ignored
     }
     _bookTokenIterator.Dispose();
     _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID));
     _bookTokenIterator.MoveTo(tokenId);
     _bookTokenIterator.MoveNext();
 }
Пример #15
0
        public static IBookBuilder GetBookParser(string bookType, BookTokenIterator bookTokens, int fontSize, Size pageSize, IEnumerable <BookImage> images)
        {
            var headerSizes = new ReadOnlyCollection <double>(new List <double> {
                24, 32, 42
            });
            IFontHelper activeFontHelper = GetActiveFontMetrics(AppSettings.Default.FontSettings.FontFamily.Source);

            switch (bookType)
            {
            case "fb2":
            case "txt":
            case "epub":
            case "html":
                return(new BookBuilder(bookTokens, images, headerSizes, activeFontHelper, pageSize, fontSize, AppSettings.Default.Hyphenation, AppSettings.Default.UseCSSFontSize));

            default:
                throw new NotSupportedException("Book type '" + bookType + "' is not supported!");
            }
        }
Пример #16
0
 public BookBuilder(
     BookTokenIterator bookTokens,
     IEnumerable<BookImage> images, 
     IList<double> headerSizes,
     IFontHelper helper,
     Size pageSize,
     double textSize, 
     bool hyphenation,
     bool useCssFontSize)
 {
     _bookTokens = bookTokens;
     _images = images;
     _headerSizes = headerSizes;
     _helper = helper;
     _pageSize = pageSize;
     _textSize = textSize;
     _hyphenation = hyphenation;
     _useCssFontSize = useCssFontSize;
 }
Пример #17
0
        private bool CheckLastWord(BookTokenIterator bookTokenIterator, string query, out TextToken result)
        {
            result = null;
            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                {
                    continue;
                }

                if (textToken.Text.StartsWith(query, StringComparison.InvariantCultureIgnoreCase))
                {
                    result = textToken;
                    return(true);
                }
                return(false);
            }
            return(false);
        }
Пример #18
0
        public Task <List <BookSearchResult> > Search(BookModel book, string query, int count)
        {
            if (string.IsNullOrEmpty(query) || book == null)
            {
                return(Task <List <BookSearchResult> > .Factory.StartNew(() => new List <BookSearchResult>()));
            }

            if (_bookTokenIterator != null)
            {
                _bookTokenIterator.Dispose();
            }

            _book = book;
            _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID));

            _query = PrepareQuery(query);

            return(Task <List <BookSearchResult> >
                   .Factory.StartNew(() => Load(_bookTokenIterator, _query, count)));
        }
Пример #19
0
        public string GetLastParagraphByToken(BookModel book, int tokenOffset, out string pointer)
        {
            var result = new List<string>();

            using (var tokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID)))
            {                
                var token = FindNewToken(tokenOffset, tokenIterator);
                pointer = token.Pointer;
                var tokenId = token.ID;
                tokenIterator.MoveTo(tokenId);
                while (tokenIterator.MoveNext())
                {
                    if (tokenIterator.Current is TagOpenToken && result.Count > 0)
                        break;

                    var textToken = tokenIterator.Current as TextToken;
                    if (textToken == null)
                        continue;                    
                    result.Add(textToken.Text);                    
                }
            }
            return string.Join(" ", result);
        }
Пример #20
0
        private List<BookSearchResult> Load(BookTokenIterator bookTokenIterator, List<string> query, int count)
        {
            var result = new List<BookSearchResult>();

            try
            {
                if (query.Count == 1)
                {
                    result = SearchOneWord(bookTokenIterator, query[0], count);
                }

                if (query.Count > 1)
                {
                    result = SearchGroupWords(bookTokenIterator, query, count);
                }
            }
            catch (TokenIteratorUnableMoveNextException tokenExp)
            {
                throw new SearchInBookInterruptedException("Book tokenizer exception has occured", tokenExp);
            }
            
            return result;
        }
Пример #21
0
        private List <BookSearchResult> Load(BookTokenIterator bookTokenIterator, List <string> query, int count)
        {
            var result = new List <BookSearchResult>();

            try
            {
                if (query.Count == 1)
                {
                    result = SearchOneWord(bookTokenIterator, query[0], count);
                }

                if (query.Count > 1)
                {
                    result = SearchGroupWords(bookTokenIterator, query, count);
                }
            }
            catch (TokenIteratorUnableMoveNextException tokenExp)
            {
                throw new SearchInBookInterruptedException("Book tokenizer exception has occured", tokenExp);
            }

            return(result);
        }
Пример #22
0
        public string GetText(BookModel book, int tokenOffset, int wordsCount, out int lastTokenId)
        {
            lastTokenId = -1;
            var result = new List<string>();

            using (var tokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID)))
            {
                int words = 0;
                tokenIterator.MoveTo(tokenOffset);
                while (tokenIterator.MoveNext() && words < wordsCount)
                {
                    if(tokenIterator.Current is NewPageToken && result.Count > 0)
                        break;

                    var textToken = tokenIterator.Current as TextToken;
                    if(textToken == null)
                        continue;
                    lastTokenId = textToken.ID;
                    result.Add(textToken.Text);
                    words++;
                }
            }
            return string.Join(" ", result);
        }
Пример #23
0
        private bool CheckLastWord(BookTokenIterator bookTokenIterator, string query, out TextToken result)
        {
            result = null;
            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                    continue;

                if (textToken.Text.StartsWith(query, StringComparison.InvariantCultureIgnoreCase))
                {
                    result = textToken;
                    return true;
                }
                return false;
            }
            return false;
        }
Пример #24
0
        private TextToken FindFirstWord(BookTokenIterator bookTokenIterator, string query)
        {
            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                    continue;

                if (textToken.Text.EndsWith(query))
                {
                    return textToken;
                }
            }
            return null;
        }
Пример #25
0
        private bool Append(BookTokenIterator bookTokens, string lastText, int stopTokenID, string stopText, ref bool firstText)
        {
            TokenBase token = bookTokens.Current;

            var pageBreakToken = token as NewPageToken;
            if (pageBreakToken != null)
                AppendToLine(pageBreakToken);

            var imageToken = token as PictureToken;
            if (imageToken != null)
                AppendToLine(imageToken);

            var openTagToken = token as TagOpenToken;
            if (openTagToken != null)
                AppendToLine(openTagToken);

            var closeTagToken = token as TagCloseToken;
            if (closeTagToken != null)
                AppendToLine(closeTagToken);

            var textSeparatorToken = token as WhitespaceToken;
            if (textSeparatorToken != null)
                AppendSeparator();

            var textToken = token as TextToken;
            if (textToken != null && AppendTextToken(textToken, lastText, stopTokenID, stopText, ref firstText) ||
                stopTokenID >= 0 && token.ID >= stopTokenID)
                return false;

            return true;
        }
Пример #26
0
        private bool CheckIntermediateWord(BookTokenIterator bookTokenIterator, string query, out TextToken result)
        {
            result = null;
            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                    continue;

                if (textToken.Text.Equals(query))
                {
                    result = textToken;
                    return true;
                }
                return false;
            }
            return false;
        }
Пример #27
0
 public BookSearch(BookModel book)
 {
     _book = book;
     _bookTokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID));
 }
Пример #28
0
        private List<BookSearchResult> SearchOneWord(BookTokenIterator bookTokenIterator, string query, int count)
        {
            var result = new List<BookSearchResult>();
            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                    continue;

                if (textToken.Text.IndexOf(query, StringComparison.InvariantCultureIgnoreCase) >= 0)
                {
                    var previousContext = GetSearchBeforeContext(bookTokenIterator, textToken.ID);
                    var afterContext = GetSearchAfterContext(bookTokenIterator, textToken.ID);

                    result.Add(new BookSearchResult
                                   {
                                       PreviousContext = previousContext,
                                       SearchResult = new List<TextToken>{textToken},
                                       NextContext = afterContext
                                   });

                    if (result.Count >= count)
                        break;
                }
            }
            return result;
        }
Пример #29
0
        private List<BookSearchResult> SearchGroupWords(BookTokenIterator bookTokenIterator, List<string> query, int count)
        {
            var result = new List<BookSearchResult>();

            var firstWordQuery = query[0];
            var lastWordQuery = query.Last();
            TextToken firstWordToken;
            while ((firstWordToken = FindFirstWord(bookTokenIterator, firstWordQuery)) != null)
            {
                var resultSequence = new List<TextToken>();
                resultSequence.Add(firstWordToken);

                bool findNextSequence = false;
                for (int i = 1; i < query.Count - 1; i++)
                {
                    TextToken intermediateToken;
                    if (CheckIntermediateWord(bookTokenIterator, query[i], out intermediateToken))
                    {
                        resultSequence.Add(intermediateToken);
                    }
                    else
                    {
                        findNextSequence = true;
                        break;
                    }
                }

                if (findNextSequence)
                    continue;

                TextToken lastToken;
                if (CheckLastWord(bookTokenIterator, lastWordQuery, out lastToken))
                {
                    resultSequence.Add(lastToken);

                    var previousContext = GetSearchBeforeContext(bookTokenIterator, firstWordToken.ID);
                    var afterContext = GetSearchAfterContext(bookTokenIterator, lastToken.ID);

                    result.Add(new BookSearchResult
                                   {
                                       PreviousContext = previousContext, 
                                       SearchResult = resultSequence, 
                                       NextContext = afterContext
                                   });

                    if (result.Count >= count)
                        break;
                }
            }
            return result;
        }
Пример #30
0
        private TextToken FindFirstWord(BookTokenIterator bookTokenIterator, string query)
        {
            while (bookTokenIterator.MoveNext())
            {
                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                    continue;

                if (textToken.Text.EndsWith(query, StringComparison.InvariantCultureIgnoreCase))
                {
                    return textToken;
                }
            }
            return null;
        }
Пример #31
0
 private static TagOpenToken FindNewToken(int tokenOffset, BookTokenIterator tokenIterator)
 {
     List<TagOpenToken> tokens = new List<TagOpenToken>();
     var offset = 40;
     var searchToken = tokenOffset - offset;
     var idx = searchToken;
     if (searchToken < 0) searchToken = 0;
     tokenIterator.MoveTo(searchToken);
     while (tokenIterator.MoveNext() && idx < tokenOffset)
     {
         if (!(tokenIterator.Current is TagOpenToken)) { idx++; continue;}
         var tagToken = tokenIterator.Current as TagOpenToken;
         if (tagToken.Name.Contains("p"))
         {
             tokens.Add(tagToken);
         }
         idx++;
     }
     var result = tokens.Count > 0 ? tokens.Last() : FindNewToken(searchToken, tokenIterator);
     return result;
 }
Пример #32
0
        private List<TextToken> GetSearchBeforeContext(BookTokenIterator bookTokenIterator, int startTokenId, int count = 8)
        {
            var result = new List<TextToken>();
            var tokenId = startTokenId;

            while (--tokenId >= 0 && result.Count < count)
            {
                bookTokenIterator.MoveTo(tokenId);
                bookTokenIterator.MoveNext();

                if (bookTokenIterator.Current is NewPageToken)
                    break;

                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                    continue;

                result.Insert(0, textToken);
            }

            bookTokenIterator.MoveTo(startTokenId);
            bookTokenIterator.MoveNext();
            return result;
        }
Пример #33
0
        private List<TextToken> GetSearchAfterContext(BookTokenIterator bookTokenIterator, int endTokenId, int count = 8)
        {
            var result = new List<TextToken>();
            var tokenId = endTokenId;

            while (++tokenId < bookTokenIterator.Count && result.Count < count)
            {
                bookTokenIterator.MoveTo(tokenId);
                bookTokenIterator.MoveNext();

                if (bookTokenIterator.Current is NewPageToken)
                    break;

                var textToken = bookTokenIterator.Current as TextToken;
                if (textToken == null)
                    continue;

                result.Add(textToken);
            }

            bookTokenIterator.MoveTo(endTokenId);
            bookTokenIterator.MoveNext();
            return result;
        }