private List <TextToken> GetSearchBeforeContext(BookTokenIterator bookTokenIterator, int startTokenId, int count = 8) { var result = new List <TextToken>(); var tokenId = startTokenId; while (--tokenId >= 0 && result.Count < count) { bookTokenIterator.MoveTo(tokenId); bookTokenIterator.MoveNext(); if (bookTokenIterator.Current is NewPageToken) { break; } var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) { continue; } result.Insert(0, textToken); } bookTokenIterator.MoveTo(startTokenId); bookTokenIterator.MoveNext(); return(result); }
public string GetText(BookModel book, int tokenOffset, int wordsCount, out int lastTokenId) { lastTokenId = -1; var result = new List <string>(); using (var tokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID))) { int words = 0; tokenIterator.MoveTo(tokenOffset); while (tokenIterator.MoveNext() && words < wordsCount) { if (tokenIterator.Current is NewPageToken && result.Count > 0) { break; } var textToken = tokenIterator.Current as TextToken; if (textToken == null) { continue; } lastTokenId = textToken.ID; result.Add(textToken.Text); words++; } } return(string.Join(" ", result)); }
public IEnumerable<TokenBlockBase> GetLines(BookTokenIterator bookTokens, string lastText, int firstTokenID, int stopTokenID = -1, string stopText = null) { _firstTokenID = firstTokenID; _tree = bookTokens.BuildTree(_firstTokenID); _lastOpenTag = _tree.Peek(); _fontSize = GetCurrentFontSize(); _separator = false; bool firstText = true; _marginLeft = _marginRight = 0.0; foreach (TagOpenToken openTagToken in _tree.Reverse()) EnterMargin(openTagToken.TextProperties); if (string.IsNullOrEmpty(stopText) && stopTokenID > 0) --stopTokenID; while (bookTokens.MoveNext()) { foreach (TokenBlockBase baseTokenLine in OutputLines(false)) yield return baseTokenLine; if (!Append(bookTokens, lastText, stopTokenID, stopText, ref firstText)) break; } foreach (TokenBlockBase baseTokenLine in OutputLines(true)) yield return baseTokenLine; }
private List <TextToken> GetSearchAfterContext(BookTokenIterator bookTokenIterator, int endTokenId, int count = 8) { var result = new List <TextToken>(); var tokenId = endTokenId; while (++tokenId < bookTokenIterator.Count && result.Count < count) { bookTokenIterator.MoveTo(tokenId); bookTokenIterator.MoveNext(); if (bookTokenIterator.Current is NewPageToken) { break; } var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) { continue; } result.Add(textToken); } bookTokenIterator.MoveTo(endTokenId); bookTokenIterator.MoveNext(); return(result); }
private List <BookSearchResult> SearchOneWord(BookTokenIterator bookTokenIterator, string query, int count) { var result = new List <BookSearchResult>(); while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) { continue; } if (textToken.Text.IndexOf(query, StringComparison.InvariantCultureIgnoreCase) >= 0) { var previousContext = GetSearchBeforeContext(bookTokenIterator, textToken.ID); var afterContext = GetSearchAfterContext(bookTokenIterator, textToken.ID); result.Add(new BookSearchResult { PreviousContext = previousContext, SearchResult = new List <TextToken> { textToken }, NextContext = afterContext }); if (result.Count >= count) { break; } } } return(result); }
private List <BookSearchResult> SearchGroupWords(BookTokenIterator bookTokenIterator, List <string> query, int count) { var result = new List <BookSearchResult>(); var firstWordQuery = query[0]; var lastWordQuery = query.Last(); TextToken firstWordToken; while ((firstWordToken = FindFirstWord(bookTokenIterator, firstWordQuery)) != null) { var resultSequence = new List <TextToken>(); resultSequence.Add(firstWordToken); bool findNextSequence = false; for (int i = 1; i < query.Count - 1; i++) { TextToken intermediateToken; if (CheckIntermediateWord(bookTokenIterator, query[i], out intermediateToken)) { resultSequence.Add(intermediateToken); } else { findNextSequence = true; break; } } if (findNextSequence) { continue; } TextToken lastToken; if (CheckLastWord(bookTokenIterator, lastWordQuery, out lastToken)) { resultSequence.Add(lastToken); var previousContext = GetSearchBeforeContext(bookTokenIterator, firstWordToken.ID); var afterContext = GetSearchAfterContext(bookTokenIterator, lastToken.ID); result.Add(new BookSearchResult { PreviousContext = previousContext, SearchResult = resultSequence, NextContext = afterContext }); if (result.Count >= count) { break; } } } return(result); }
public void Init() { if (_bookTokenIterator != null && _book != null) { int tokenId = _bookTokenIterator.Current.ID; _bookTokenIterator.Dispose(); _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID)); _bookTokenIterator.MoveTo(tokenId); _bookTokenIterator.MoveNext(); } }
public Task <PageInfo> GetPageAsync(int tokenID, string startText) { return(Task.Factory.StartNew( delegate { using (var tokens = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID))) { PageInfo page = BookFactory .GetBookParser(_book.Type, tokens, _fontSize, _pageSize, _images) .GetPage(tokenID, startText); return page; } })); }
public Task<PageInfo> GetPageAsync(int tokenID, string startText) { return Task.Factory.StartNew( delegate { using (var tokens = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID))) { PageInfo page = BookFactory .GetBookParser(_book.Type, tokens, _fontSize, _pageSize, _images) .GetPage(tokenID, startText); return page; } }); }
public static IBookBuilder GetBookParser(string bookType, BookTokenIterator bookTokens, int fontSize, Size pageSize, IEnumerable<BookImage> images) { var headerSizes = new ReadOnlyCollection<double>(new List<double> {24, 32, 42}); IFontHelper activeFontHelper = GetActiveFontMetrics(AppSettings.Default.FontSettings.FontFamily.Source); switch (bookType) { case "fb2": case "txt": case "epub": case "html": return new BookBuilder(bookTokens, images, headerSizes, activeFontHelper, pageSize, fontSize , AppSettings.Default.Hyphenation, AppSettings.Default.UseCSSFontSize); default: throw new NotSupportedException("Book type '" + bookType + "' is not supported!"); } }
public Task<List<BookSearchResult>> Search(BookModel book, string query, int count) { if (string.IsNullOrEmpty(query) || book == null) return Task<List<BookSearchResult>>.Factory.StartNew(() => new List<BookSearchResult>()); _bookTokenIterator?.Dispose(); _book = book; _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID)); _query = PrepareQuery(query); return Task<List<BookSearchResult>> .Factory.StartNew(() => Load(_bookTokenIterator, _query, count)); }
private TextToken FindFirstWord(BookTokenIterator bookTokenIterator, string query) { while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) { continue; } if (textToken.Text.EndsWith(query, StringComparison.InvariantCultureIgnoreCase)) { return(textToken); } } return(null); }
public void Init() { if (_bookTokenIterator == null || _book == null) return; var tokenId = 0; try { tokenId = _bookTokenIterator.Current.ID; } catch (Exception) { // ignored } _bookTokenIterator.Dispose(); _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID)); _bookTokenIterator.MoveTo(tokenId); _bookTokenIterator.MoveNext(); }
public static IBookBuilder GetBookParser(string bookType, BookTokenIterator bookTokens, int fontSize, Size pageSize, IEnumerable <BookImage> images) { var headerSizes = new ReadOnlyCollection <double>(new List <double> { 24, 32, 42 }); IFontHelper activeFontHelper = GetActiveFontMetrics(AppSettings.Default.FontSettings.FontFamily.Source); switch (bookType) { case "fb2": case "txt": case "epub": case "html": return(new BookBuilder(bookTokens, images, headerSizes, activeFontHelper, pageSize, fontSize, AppSettings.Default.Hyphenation, AppSettings.Default.UseCSSFontSize)); default: throw new NotSupportedException("Book type '" + bookType + "' is not supported!"); } }
public BookBuilder( BookTokenIterator bookTokens, IEnumerable<BookImage> images, IList<double> headerSizes, IFontHelper helper, Size pageSize, double textSize, bool hyphenation, bool useCssFontSize) { _bookTokens = bookTokens; _images = images; _headerSizes = headerSizes; _helper = helper; _pageSize = pageSize; _textSize = textSize; _hyphenation = hyphenation; _useCssFontSize = useCssFontSize; }
private bool CheckLastWord(BookTokenIterator bookTokenIterator, string query, out TextToken result) { result = null; while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) { continue; } if (textToken.Text.StartsWith(query, StringComparison.InvariantCultureIgnoreCase)) { result = textToken; return(true); } return(false); } return(false); }
public Task <List <BookSearchResult> > Search(BookModel book, string query, int count) { if (string.IsNullOrEmpty(query) || book == null) { return(Task <List <BookSearchResult> > .Factory.StartNew(() => new List <BookSearchResult>())); } if (_bookTokenIterator != null) { _bookTokenIterator.Dispose(); } _book = book; _bookTokenIterator = new BookTokenIterator(_book.GetTokensPath(), TokensTool.GetTokens(_book.BookID)); _query = PrepareQuery(query); return(Task <List <BookSearchResult> > .Factory.StartNew(() => Load(_bookTokenIterator, _query, count))); }
public string GetLastParagraphByToken(BookModel book, int tokenOffset, out string pointer) { var result = new List<string>(); using (var tokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID))) { var token = FindNewToken(tokenOffset, tokenIterator); pointer = token.Pointer; var tokenId = token.ID; tokenIterator.MoveTo(tokenId); while (tokenIterator.MoveNext()) { if (tokenIterator.Current is TagOpenToken && result.Count > 0) break; var textToken = tokenIterator.Current as TextToken; if (textToken == null) continue; result.Add(textToken.Text); } } return string.Join(" ", result); }
private List<BookSearchResult> Load(BookTokenIterator bookTokenIterator, List<string> query, int count) { var result = new List<BookSearchResult>(); try { if (query.Count == 1) { result = SearchOneWord(bookTokenIterator, query[0], count); } if (query.Count > 1) { result = SearchGroupWords(bookTokenIterator, query, count); } } catch (TokenIteratorUnableMoveNextException tokenExp) { throw new SearchInBookInterruptedException("Book tokenizer exception has occured", tokenExp); } return result; }
private List <BookSearchResult> Load(BookTokenIterator bookTokenIterator, List <string> query, int count) { var result = new List <BookSearchResult>(); try { if (query.Count == 1) { result = SearchOneWord(bookTokenIterator, query[0], count); } if (query.Count > 1) { result = SearchGroupWords(bookTokenIterator, query, count); } } catch (TokenIteratorUnableMoveNextException tokenExp) { throw new SearchInBookInterruptedException("Book tokenizer exception has occured", tokenExp); } return(result); }
public string GetText(BookModel book, int tokenOffset, int wordsCount, out int lastTokenId) { lastTokenId = -1; var result = new List<string>(); using (var tokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID))) { int words = 0; tokenIterator.MoveTo(tokenOffset); while (tokenIterator.MoveNext() && words < wordsCount) { if(tokenIterator.Current is NewPageToken && result.Count > 0) break; var textToken = tokenIterator.Current as TextToken; if(textToken == null) continue; lastTokenId = textToken.ID; result.Add(textToken.Text); words++; } } return string.Join(" ", result); }
private bool CheckLastWord(BookTokenIterator bookTokenIterator, string query, out TextToken result) { result = null; while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) continue; if (textToken.Text.StartsWith(query, StringComparison.InvariantCultureIgnoreCase)) { result = textToken; return true; } return false; } return false; }
private TextToken FindFirstWord(BookTokenIterator bookTokenIterator, string query) { while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) continue; if (textToken.Text.EndsWith(query)) { return textToken; } } return null; }
private bool Append(BookTokenIterator bookTokens, string lastText, int stopTokenID, string stopText, ref bool firstText) { TokenBase token = bookTokens.Current; var pageBreakToken = token as NewPageToken; if (pageBreakToken != null) AppendToLine(pageBreakToken); var imageToken = token as PictureToken; if (imageToken != null) AppendToLine(imageToken); var openTagToken = token as TagOpenToken; if (openTagToken != null) AppendToLine(openTagToken); var closeTagToken = token as TagCloseToken; if (closeTagToken != null) AppendToLine(closeTagToken); var textSeparatorToken = token as WhitespaceToken; if (textSeparatorToken != null) AppendSeparator(); var textToken = token as TextToken; if (textToken != null && AppendTextToken(textToken, lastText, stopTokenID, stopText, ref firstText) || stopTokenID >= 0 && token.ID >= stopTokenID) return false; return true; }
private bool CheckIntermediateWord(BookTokenIterator bookTokenIterator, string query, out TextToken result) { result = null; while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) continue; if (textToken.Text.Equals(query)) { result = textToken; return true; } return false; } return false; }
public BookSearch(BookModel book) { _book = book; _bookTokenIterator = new BookTokenIterator(book.GetTokensPath(), TokensTool.GetTokens(book.BookID)); }
private List<BookSearchResult> SearchOneWord(BookTokenIterator bookTokenIterator, string query, int count) { var result = new List<BookSearchResult>(); while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) continue; if (textToken.Text.IndexOf(query, StringComparison.InvariantCultureIgnoreCase) >= 0) { var previousContext = GetSearchBeforeContext(bookTokenIterator, textToken.ID); var afterContext = GetSearchAfterContext(bookTokenIterator, textToken.ID); result.Add(new BookSearchResult { PreviousContext = previousContext, SearchResult = new List<TextToken>{textToken}, NextContext = afterContext }); if (result.Count >= count) break; } } return result; }
private List<BookSearchResult> SearchGroupWords(BookTokenIterator bookTokenIterator, List<string> query, int count) { var result = new List<BookSearchResult>(); var firstWordQuery = query[0]; var lastWordQuery = query.Last(); TextToken firstWordToken; while ((firstWordToken = FindFirstWord(bookTokenIterator, firstWordQuery)) != null) { var resultSequence = new List<TextToken>(); resultSequence.Add(firstWordToken); bool findNextSequence = false; for (int i = 1; i < query.Count - 1; i++) { TextToken intermediateToken; if (CheckIntermediateWord(bookTokenIterator, query[i], out intermediateToken)) { resultSequence.Add(intermediateToken); } else { findNextSequence = true; break; } } if (findNextSequence) continue; TextToken lastToken; if (CheckLastWord(bookTokenIterator, lastWordQuery, out lastToken)) { resultSequence.Add(lastToken); var previousContext = GetSearchBeforeContext(bookTokenIterator, firstWordToken.ID); var afterContext = GetSearchAfterContext(bookTokenIterator, lastToken.ID); result.Add(new BookSearchResult { PreviousContext = previousContext, SearchResult = resultSequence, NextContext = afterContext }); if (result.Count >= count) break; } } return result; }
private TextToken FindFirstWord(BookTokenIterator bookTokenIterator, string query) { while (bookTokenIterator.MoveNext()) { var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) continue; if (textToken.Text.EndsWith(query, StringComparison.InvariantCultureIgnoreCase)) { return textToken; } } return null; }
private static TagOpenToken FindNewToken(int tokenOffset, BookTokenIterator tokenIterator) { List<TagOpenToken> tokens = new List<TagOpenToken>(); var offset = 40; var searchToken = tokenOffset - offset; var idx = searchToken; if (searchToken < 0) searchToken = 0; tokenIterator.MoveTo(searchToken); while (tokenIterator.MoveNext() && idx < tokenOffset) { if (!(tokenIterator.Current is TagOpenToken)) { idx++; continue;} var tagToken = tokenIterator.Current as TagOpenToken; if (tagToken.Name.Contains("p")) { tokens.Add(tagToken); } idx++; } var result = tokens.Count > 0 ? tokens.Last() : FindNewToken(searchToken, tokenIterator); return result; }
private List<TextToken> GetSearchBeforeContext(BookTokenIterator bookTokenIterator, int startTokenId, int count = 8) { var result = new List<TextToken>(); var tokenId = startTokenId; while (--tokenId >= 0 && result.Count < count) { bookTokenIterator.MoveTo(tokenId); bookTokenIterator.MoveNext(); if (bookTokenIterator.Current is NewPageToken) break; var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) continue; result.Insert(0, textToken); } bookTokenIterator.MoveTo(startTokenId); bookTokenIterator.MoveNext(); return result; }
private List<TextToken> GetSearchAfterContext(BookTokenIterator bookTokenIterator, int endTokenId, int count = 8) { var result = new List<TextToken>(); var tokenId = endTokenId; while (++tokenId < bookTokenIterator.Count && result.Count < count) { bookTokenIterator.MoveTo(tokenId); bookTokenIterator.MoveNext(); if (bookTokenIterator.Current is NewPageToken) break; var textToken = bookTokenIterator.Current as TextToken; if (textToken == null) continue; result.Add(textToken); } bookTokenIterator.MoveTo(endTokenId); bookTokenIterator.MoveNext(); return result; }