public string GuessBookIsbn(string fullPath)
        {
            string isbn;
            var    fileName = Path.GetFileName(fullPath);

            if (string.IsNullOrEmpty(fileName))
            {
                return(null);
            }

            // Step 1: Check if the book path has the ISBN
            var strMatch = @"[\d]+X?";

            if (Regex.IsMatch(fileName, strMatch))
            {
                for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch())
                {
                    isbn = m.ToString();
                    if (isbn.Length == 10 || isbn.Length == 13)
                    {
                        return(isbn);
                    }
                }
            }

            strMatch = @"[\d\.\-_ ]+X?";
            if (Regex.IsMatch(fileName, strMatch))
            {
                for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch())
                {
                    isbn = m.ToString();
                    if (isbn.Length < 10)
                    {
                        continue;
                    }
                    isbn = isbn.Replace(".", string.Empty);
                    isbn = isbn.Replace(" ", string.Empty);
                    isbn = isbn.Replace("_", string.Empty);
                    if (isbn.Length > 13)
                    {
                        continue;
                    }

                    if (isbn.Length == 10 || isbn.Length == 13)
                    {
                        return(isbn);
                    }
                }
            }

            // Step 2:
            // if book is PDF then read the ext and search for a line
            var extension = Path.GetExtension(fullPath);

            if (extension != null && extension.ToUpper().Equals(".PDF"))
            {
                var pdfTextParser = new PdfIsbnParser();
                try
                {
                    var foundIsbn = pdfTextParser.Go(fullPath);
                    if (foundIsbn != string.Empty)
                    {
                        return(foundIsbn);
                    }
                }
                catch (BookieException ex)
                {
                    Logger.Log.Error("Unable to parse PDF", ex);
                }
            }

            // Step 3:
            // Use the path as a book title and search for it
            var bookTitle = fileName.Replace(".", " ");

            bookTitle = bookTitle.Replace("_", " ");
            bookTitle = bookTitle.Replace("-", " ");
            var searchUrl    = "http://www.google.com/search?hl=en&q=" + HttpUtility.UrlEncode(bookTitle);
            var documentText = SimpleBrowseToPage(searchUrl);
            var r1           = Regex.Match(documentText,
                                           @"www.amazon.com/.*?/(\d{9}X|\d{10,13})");

            if (r1.Success)
            {
                isbn = r1.Groups[1].ToString();
                return(isbn);
            }

            searchUrl    = "http://www.google.com/search?hl=en&q=%22" + HttpUtility.UrlEncode(bookTitle);
            searchUrl   += "%22+amazon";
            documentText = SimpleBrowseToPage(searchUrl);
            var r2 = Regex.Match(documentText,
                                 @"www.amazon.com/.*?/(\d{9}X|\d{10,13})");

            if (!r2.Success)
            {
                return(string.Empty);
            }
            isbn = r2.Groups[1].ToString();
            return(isbn);
        }
示例#2
0
        public string GuessBookIsbn(string fullPath)
        {
            string isbn;
            var fileName = Path.GetFileName(fullPath);
            if (string.IsNullOrEmpty(fileName))
            {
                return null;
            }

            // Step 1: Check if the book path has the ISBN
            var strMatch = @"[\d]+X?";
            if (Regex.IsMatch(fileName, strMatch))
            {
                for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch())
                {
                    isbn = m.ToString();
                    if (isbn.Length == 10 || isbn.Length == 13)
                        return isbn;
                }
            }

            strMatch = @"[\d\.\-_ ]+X?";
            if (Regex.IsMatch(fileName, strMatch))
            {
                for (var m = Regex.Match(fileName, strMatch); m.Success; m = m.NextMatch())
                {
                    isbn = m.ToString();
                    if (isbn.Length < 10) continue;
                    isbn = isbn.Replace(".", string.Empty);
                    isbn = isbn.Replace(" ", string.Empty);
                    isbn = isbn.Replace("_", string.Empty);
                    if (isbn.Length > 13) continue;

                    if (isbn.Length == 10 || isbn.Length == 13)
                        return isbn;
                }
            }

            // Step 2:
            // if book is PDF then read the ext and search for a line
            var extension = Path.GetExtension(fullPath);
            if (extension != null && extension.ToUpper().Equals(".PDF"))
            {
                var pdfTextParser = new PdfIsbnParser();
                try
                {
                    var foundIsbn = pdfTextParser.Go(fullPath);
                    if (foundIsbn != string.Empty) return foundIsbn;
                }
                catch (BookieException ex)
                {
                    Logger.Log.Error("Unable to parse PDF", ex);
                }
            }

            // Step 3:
            // Use the path as a book title and search for it
            var bookTitle = fileName.Replace(".", " ");
            bookTitle = bookTitle.Replace("_", " ");
            bookTitle = bookTitle.Replace("-", " ");
            var searchUrl = "http://www.google.com/search?hl=en&q=" + HttpUtility.UrlEncode(bookTitle);
            var documentText = SimpleBrowseToPage(searchUrl);
            var r1 = Regex.Match(documentText,
                @"www.amazon.com/.*?/(\d{9}X|\d{10,13})");
            if (r1.Success)
            {
                isbn = r1.Groups[1].ToString();
                return isbn;
            }

            searchUrl = "http://www.google.com/search?hl=en&q=%22" + HttpUtility.UrlEncode(bookTitle);
            searchUrl += "%22+amazon";
            documentText = SimpleBrowseToPage(searchUrl);
            var r2 = Regex.Match(documentText,
                @"www.amazon.com/.*?/(\d{9}X|\d{10,13})");
            if (!r2.Success)
            {
                return string.Empty;
            }
            isbn = r2.Groups[1].ToString();
            return isbn;
        }