예제 #1
0
        /// <summary>
        /// Makes a complete clone of the epub
        /// </summary>
        /// <returns>A full clone of the epub</returns>
        public Epub Clone()
        {
            // copy the zip file to a memory stream and read it
            Ionic.Zip.ZipFile newFile;
            MemoryStream      ms = new MemoryStream();

            file.Save(ms);
            ms.Position = 0;
            newFile     = Ionic.Zip.ZipFile.Read(ms);

            Epub clone = new Epub(newFile)
            {
                Author = Author,
                Title  = Title,
            };

            clone.Entries    = new Dictionary <string, HtmlEntry>();
            clone.EntryOrder = new List <HtmlEntry>();

            // copy all the entries
            foreach (var e in EntryOrder)
            {
                var clonedEntry = new HtmlEntry()
                {
                    Href = e.Href, Html = e.Html, MimeType = e.MimeType
                };
                clone.Entries.Add(e.Href, clonedEntry);
                clone.EntryOrder.Add(clonedEntry);
            }

            return(clone);
        }
예제 #2
0
        /// <summary>
        /// Analyses the epub by determining all the word entries
        /// </summary>
        /// <param name="epub">The epub file</param>
        /// <returns>A collection of word entries from the entire book</returns>
        public Dictionary <string, WordEntry> AnalyseEpub(Epub epub)
        {
            // get a list of all words by their lower case string value
            Dictionary <string, List <Word> > wordsOccurences = GetWordsByText(epub);
            var wordEntries = CreateWordEntriesFromOccurrences(wordsOccurences);

            return(wordEntries);
        }
        public WordDistributionAnalysis(Epub epub, SortableBindingList <WordEntry> dataSource)
        {
            InitializeComponent();

            this.epub = epub;
            // make a copy
            this.wordEntries = dataSource.OriginalList.ToList();
            grid.DataSource  = new SortableBindingList <CheckableWordEntry>(wordEntries.Select(we => new CheckableWordEntry(we)).ToList());
        }
예제 #4
0
        /// <summary>
        /// Loads an epub file and analyse all the words
        /// </summary>
        /// <param name="path">The full path to the epub file</param>
        public void OpenEpub(string path)
        {
            // cancel any previous loading action and reset the grid and listbox
            grid.DataSource = null;
            lstOccurrences.Items.Clear();
            loader.CancelAll();

            // read the epub file structure
            Epub epub = Epub.FromFile(path);

            // change the caption of the form with the filename
            Text = "Epub spell checker - " + System.IO.Path.GetFileName(path);

            // analyse the epub async
            loader.LoadAsync <Dictionary <string, WordEntry> >((state) =>
            {
                // set progress to marquee
                state.Text     = "Loading epub...";
                state.Progress = -1;

                // get all the word entries in the book
                var wordEntries = manager.AnalyseEpub(epub);
                return(wordEntries);
            }, wes =>
            {
                // if there was a previously loaded epub, dispose it
                if (currentEpub != null)
                {
                    currentEpub.Dispose();
                }

                currentEpub = epub;

                // bind the word entry list to the datagridview
                var bs          = new SortableBindingList <WordEntry>(wes.Values);
                grid.DataSource = bs;

                // update the grid to match the current filter
                ApplyFilter(false);

                // update statistics of the word entry list
                UpdateStatistics();

                // continue with loading suggestions for each unknown word
                FillSuggestions(wes);

                CheckEditMenuItemAvailibility();
            });
        }
예제 #5
0
        /// <summary>
        /// Applies the fixed text of the word entries on the given epub file
        /// </summary>
        /// <param name="epub">The epub file to change</param>
        /// <param name="wordEntries">The word entry collection</param>
        public void Apply(Epub epub, IEnumerable <WordEntry> wordEntries)
        {
            // in order to preserve the character offsets of all the words, the words have to be replaced
            // in descending order per href entry.
            // group all the word entries by the href, and then sort them by character offset in descending order.
            var wordOccurencesByHrefInDescOrder = wordEntries.SelectMany(we => we.Occurrences.Select(occ => new KeyValuePair <WordEntry, Word>(we, occ)))
                                                  .GroupBy(pair => pair.Value.Href)
                                                  .ToDictionary(g => g.Key, g => g.OrderByDescending(pair => pair.Value.CharOffset).ToArray());

            foreach (var pair in wordOccurencesByHrefInDescOrder)
            {
                var href = pair.Key;
                var wordEntryOccurrencePairs = pair.Value;

                var    te   = (Epub.HtmlEntry)epub.Entries[href];
                string html = te.Html;

                // replace the words in the html of the epub entry
                string replacedHtml = GetReplacedHtml(html, wordEntryOccurrencePairs);
                te.Html = replacedHtml;
            }
        }
예제 #6
0
        /// <summary>
        /// Gets all the words present and group by their lower string value
        /// </summary>
        /// <param name="epub">The epub file</param>
        /// <returns>A list of words grouped by their lower string value</returns>
        private Dictionary <string, List <Word> > GetWordsByText(Epub epub)
        {
            Dictionary <string, List <Word> > wordsOccurences = new Dictionary <string, List <Word> >();

            foreach (var entry in epub.Entries.Values.Where(e => e is Epub.HtmlEntry).Cast <Epub.HtmlEntry>())
            {
                // get all the words from the current text file
                var words = GetWords(entry.Href, entry.Html);

                // append the words to the occurence dictionary
                foreach (var w in words)
                {
                    List <Word> occurences;
                    if (!wordsOccurences.TryGetValue(w.Text.ToLower(), out occurences))
                    {
                        wordsOccurences[w.Text.ToLower()] = occurences = new List <Word>();
                    }

                    occurences.Add(w);
                }
            }
            return(wordsOccurences);
        }
예제 #7
0
        /// <summary>
        /// Fully read an epub file to memory and keep the text entries and some general info like Title and Author seperate
        /// </summary>
        /// <param name="path">The path of the epub file</param>
        /// <returns>An epub object read from the given file</returns>
        public static Epub FromFile(string path)
        {
            // read the entire file, and interpret it as a zip file
            var epubBytes = System.IO.File.ReadAllBytes(path);
            var file      = Ionic.Zip.ZipFile.Read(epubBytes);

            Epub epub = new Epub(file);

            // read the metadata container xml info
            XmlDocument doc = new XmlDocument();

            using (MemoryStream ms = new MemoryStream())
            {
                file[@"META-INF\container.xml"].Extract(ms);
                ms.Position = 0;
                doc.Load(ms);
            }

            // determine the href of the content manifest, which is stored in the full-path attribute of the rootfile tag
            var node = doc.ChildNodes.GetAllNodes().Where(n => n.Name == "rootfile").FirstOrDefault();

            if (node != null)
            {
                string contentPath = node.Attributes["full-path"].Value;

                // keep the relative path to the manifest file, because all entries in the manifest will be relative
                string basePath = System.IO.Path.GetDirectoryName(contentPath);
                using (MemoryStream ms = new MemoryStream())
                {
                    file[contentPath].Extract(ms);
                    ms.Position = 0;
                    doc         = new XmlDocument();
                    doc.LoadXml(XDocument.Load(ms).Root.StripNamespaces().ToString());
                }

                // read the title if present
                var titleNode = doc.SelectSingleNode("package/metadata/title");
                if (titleNode != null)
                {
                    epub.Title = titleNode.InnerText;
                }

                // read the author if present
                var authorNode = doc.SelectSingleNode("package/metadata/creator");
                if (authorNode != null)
                {
                    epub.Author = authorNode.InnerText;
                }

                // read all the entries in the manifest
                var items = doc.SelectNodes("package/manifest/item");

                Dictionary <string, HtmlEntry> entries = new Dictionary <string, HtmlEntry>(items.Count);
                var entryOrder = new List <HtmlEntry>(items.Count);

                foreach (var item in items.Cast <XmlNode>())
                {
                    string href     = System.IO.Path.Combine(basePath, item.Attributes["href"].Value);
                    string mimeType = item.Attributes["media-type"].Value;

                    // if the entry is a html file
                    if (mimeType == "application/xhtml+xml" || mimeType.Contains("html") || mimeType.Contains("xml"))
                    {
                        // extract the file to a a memory stream and read it to a string
                        using (MemoryStream ms = new MemoryStream())
                        {
                            file[Uri.UnescapeDataString(href)].Extract(ms);
                            ms.Position = 0;
                            StreamReader reader = new StreamReader(ms);
                            string       html   = reader.ReadToEnd();

                            // store the entry
                            var te = new HtmlEntry()
                            {
                                Href     = href,
                                MimeType = mimeType,
                                Html     = html
                            };

                            entries.Add(href, te);
                            entryOrder.Add(te);
                        }
                    }
                }
                epub.Entries    = entries;
                epub.EntryOrder = entryOrder;
            }
            else
            {
                throw new Exception("No content metadata");
            }



            return(epub);
        }