public static WordOnPage Combine(WordOnPage wop1, WordOnPage wop2, string IndexAs)
        {
            WordOnPage wop = new WordOnPage();

            wop.CombinedWords = new List <string>();

            wop.CombinedWords.Add(wop1.Word);
            foreach (string str in wop1.CombinedWords)
            {
                wop.CombinedWords.Add(str);
            }

            return(wop);
        }
示例#2
0
        public async Task <bool> ProcessIt(Stream DocumentToLoad, DocumentTypes docType, bool DictionaryMatch, bool ExcludeNumberWords)
        {
            var ret = true;

            await Task.Run(async() =>
            {
                try
                {
                    int PageNum = 0;

                    if (docType == DocumentTypes.PDF)
                    {
                        PagesInBook = new List <BookPage>();

                        using (PdfDocument pdfDoc = new PdfDocument(new PdfReader(DocumentToLoad)))
                        {
                            int maxPages = pdfDoc.GetNumberOfPages();
                            PageNum      = pdfDoc.GetNumberOfPages();

                            StringBuilder text = new StringBuilder();

                            for (int x = 1; x <= pdfDoc.GetNumberOfPages(); x++)
                            {
                                FireUpdateEvent(EventUpdateTypes.ParsingFromPDF, "Page: " + x.ToString(), maxPages, x, Warnings.Count());

                                PagesInBook.Add(new BookPage(PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(x))));
                            }
                        }
                    }

                    List <WordOnPage> index = new List <WordOnPage>();

                    int pageCount = 0;
                    int pageTotal = PagesInBook.Count();
                    // Loop through all of the pages
                    foreach (BookPage bp in PagesInBook)
                    {
                        pageCount++;

                        FireUpdateEvent(EventUpdateTypes.Indexing, "Page: " + pageCount.ToString(), pageTotal, pageCount, Warnings.Count());

                        // Loop through all of the Words on the page
                        foreach (string word in bp.Words)
                        {
                            bool foundInIndex = false;

                            bool doit = true;

                            if (ContainsNumber(word) && ExcludeNumberWords)
                            {
                                doit = false;
                            }

                            if (doit)
                            {
                                // Loop through our existing Index
                                foreach (WordOnPage wop in index.Where(x => x.Word.ToLower().Trim() == word.ToLower().Trim()))
                                {
                                    // the Word is found in the index

                                    foundInIndex           = true;
                                    bool pageAlreadyMarked = false;

                                    // Loop through the pages this word is found on
                                    foreach (PageClass page in wop.Page.Where(p => p.Page == pageCount))
                                    {
                                        pageAlreadyMarked = true;
                                        break;
                                    }

                                    // If this page was not marked for this word
                                    if (!pageAlreadyMarked)
                                    {
                                        // add the page mark
                                        wop.Page.Add(new PageClass(pageCount));
                                    }

                                    break;
                                }

                                // If the word was not found in the index
                                if (!foundInIndex)
                                {
                                    WordOnPage nwop = new WordOnPage();
                                    nwop.Word       = word.Trim();
                                    //nwop.WordType = GetWordType(nwop.Word);
                                    nwop.Page = new List <PageClass>();
                                    nwop.Page.Add(new PageClass(pageCount));
                                    index.Add(nwop);
                                }
                            }
                        }
                    }

                    RawWordList = index.OrderBy(p => p.Word).ToList();

                    int c = 0;

                    if (DictionaryMatch)
                    {
                        foreach (WordOnPage wop in RawWordList)
                        {
                            c++;

                            FireUpdateEvent(EventUpdateTypes.DictionarySearch, wop.Word, RawWordList.Count, c, Warnings.Count());

                            bool found = false;
                            foreach (DictionaryWord dic in DictionaryWords.Where(x => x.Word.ToLower().Trim() == wop.Word.ToLower().Trim()))
                            {
                                wop.WordType = dic.WordType;
                                found        = true;
                                break;
                            }

                            if (!found)
                            {
                                wop.WordType = "Unknown";
                            }
                        }
                    }

                    List <WordOnPage> finalList = new List <WordOnPage>();

                    StringBuilder stb = new StringBuilder();
                    foreach (WordOnPage wop in RawWordList)
                    {
                        //if (wop.)
                        //{

                        //    finalList.Add(wop);
                        //    //StringBuilder sbpages = new StringBuilder();
                        //    //foreach (int page in wop.Page)
                        //    //{
                        //    //    sbpages.Append(page.ToString() + ", ");
                        //    //}
                        //    //stb.AppendLine(wop.Word + " - " + sbpages.ToString());
                        //}
                    }

                    // Serialize(ProjectFileToCreate, this);
                }
                catch (Exception ex)
                {
                    Debug.WriteLine("ERROR:" + ex.Message);
                    FireUpdateEvent(EventUpdateTypes.Error, ex.Message, 0, 0, 0);
                }
            });

            return(ret);
        }