public string GuessBookIsbn(string fullPath) { string isbn; var fileName = Path.GetFileName(fullPath); if (string.IsNullOrEmpty(fileName)) { return(null); } // Step 1: Check if the book path has the ISBN var strMatch = @"[\d]+X?"; if (Regex.IsMatch(fileName, strMatch)) { for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch()) { isbn = m.ToString(); if (isbn.Length == 10 || isbn.Length == 13) { return(isbn); } } } strMatch = @"[\d\.\-_ ]+X?"; if (Regex.IsMatch(fileName, strMatch)) { for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch()) { isbn = m.ToString(); if (isbn.Length < 10) { continue; } isbn = isbn.Replace(".", string.Empty); isbn = isbn.Replace(" ", string.Empty); isbn = isbn.Replace("_", string.Empty); if (isbn.Length > 13) { continue; } if (isbn.Length == 10 || isbn.Length == 13) { return(isbn); } } } // Step 2: // if book is PDF then read the ext and search for a line var extension = Path.GetExtension(fullPath); if (extension != null && extension.ToUpper().Equals(".PDF")) { var pdfTextParser = new PdfIsbnParser(); try { var foundIsbn = pdfTextParser.Go(fullPath); if (foundIsbn != string.Empty) { return(foundIsbn); } } catch (BookieException ex) { Logger.Log.Error("Unable to parse PDF", ex); } } // Step 3: // Use the path as a book title and search for it var bookTitle = fileName.Replace(".", " "); bookTitle = bookTitle.Replace("_", " "); bookTitle = bookTitle.Replace("-", " "); var searchUrl = "http://www.google.com/search?hl=en&q=" + HttpUtility.UrlEncode(bookTitle); var documentText = SimpleBrowseToPage(searchUrl); var r1 = Regex.Match(documentText, @"www.amazon.com/.*?/(\d{9}X|\d{10,13})"); if (r1.Success) { isbn = r1.Groups[1].ToString(); return(isbn); } searchUrl = "http://www.google.com/search?hl=en&q=%22" + HttpUtility.UrlEncode(bookTitle); searchUrl += "%22+amazon"; documentText = SimpleBrowseToPage(searchUrl); var r2 = Regex.Match(documentText, @"www.amazon.com/.*?/(\d{9}X|\d{10,13})"); if (!r2.Success) { return(string.Empty); } isbn = r2.Groups[1].ToString(); return(isbn); }
public string GuessBookIsbn(string fullPath) { string isbn; var fileName = Path.GetFileName(fullPath); if (string.IsNullOrEmpty(fileName)) { return null; } // Step 1: Check if the book path has the ISBN var strMatch = @"[\d]+X?"; if (Regex.IsMatch(fileName, strMatch)) { for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch()) { isbn = m.ToString(); if (isbn.Length == 10 || isbn.Length == 13) return isbn; } } strMatch = @"[\d\.\-_ ]+X?"; if (Regex.IsMatch(fileName, strMatch)) { for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch()) { isbn = m.ToString(); if (isbn.Length < 10) continue; isbn = isbn.Replace(".", string.Empty); isbn = isbn.Replace(" ", string.Empty); isbn = isbn.Replace("_", string.Empty); if (isbn.Length > 13) continue; if (isbn.Length == 10 || isbn.Length == 13) return isbn; } } // Step 2: // if book is PDF then read the ext and search for a line var extension = Path.GetExtension(fullPath); if (extension != null && extension.ToUpper().Equals(".PDF")) { var pdfTextParser = new PdfIsbnParser(); try { var foundIsbn = pdfTextParser.Go(fullPath); if (foundIsbn != string.Empty) return foundIsbn; } catch (BookieException ex) { Logger.Log.Error("Unable to parse PDF", ex); } } // Step 3: // Use the path as a book title and search for it var bookTitle = fileName.Replace(".", " "); bookTitle = bookTitle.Replace("_", " "); bookTitle = bookTitle.Replace("-", " "); var searchUrl = "http://www.google.com/search?hl=en&q=" + HttpUtility.UrlEncode(bookTitle); var documentText = SimpleBrowseToPage(searchUrl); var r1 = Regex.Match(documentText, @"www.amazon.com/.*?/(\d{9}X|\d{10,13})"); if (r1.Success) { isbn = r1.Groups[1].ToString(); return isbn; } searchUrl = "http://www.google.com/search?hl=en&q=%22" + HttpUtility.UrlEncode(bookTitle); searchUrl += "%22+amazon"; documentText = SimpleBrowseToPage(searchUrl); var r2 = Regex.Match(documentText, @"www.amazon.com/.*?/(\d{9}X|\d{10,13})"); if (!r2.Success) { return string.Empty; } isbn = r2.Groups[1].ToString(); return isbn; }