コード例 #1
0
        public static Int32 QueryWeb(QueryWebCommandOptions options)
        {
            try
            {
                String queryUrl = "";

                if (!String.IsNullOrEmpty(options.WebsiteAddress))
                {
                    queryUrl = options.WebsiteAddress;
                }
                else if (!String.IsNullOrEmpty(options.SearchEngineQuery))
                {
                    queryUrl = $"https://www.google.com/search?q={WebUtility.UrlEncode(options.SearchEngineQuery)}";
                }

                String queryResult;

                using (var webClient = new WebClient())
                {
                    queryResult = webClient.DownloadString(queryUrl);
                }

                var websiteText = HtmlToTextConverter.ConvertHtmlToText(queryResult);
                Console.WriteLine(websiteText);

                return(0);
            }
            catch (Exception ex)
            {
                Console.Error.WriteLine("Unhandled exception:");
                Console.Error.WriteLine(ex.ToString());
                return(1);
            }
        }
コード例 #2
0
        public void RunConversion(string input, string expected)
        {
            HtmlToTextConverter converter = new HtmlToTextConverter();
            string output = converter.Convert(input);

            Assert.Equal(expected, output);
        }
コード例 #3
0
ファイル: HtmlInjection.cs プロジェクト: yankaics/cms-1
        protected override void Dispose(bool disposing)
        {
            if (fragmentToHtmlConverter != null)
            {
                ((IDisposable)fragmentToHtmlConverter).Dispose();
                fragmentToHtmlConverter = null;
            }

            if (fragmentToTextConverter != null)
            {
                ((IDisposable)fragmentToTextConverter).Dispose();
                fragmentToTextConverter = null;
            }
#if false
            if (this.fragmentToRtfConverter != null)
            {
                ((IDisposable)this.fragmentToRtfConverter).Dispose();
                this.fragmentToRtfConverter = null;
            }
#endif
            if (fragmentParser != null)
            {
                ((IDisposable)fragmentParser).Dispose();
                fragmentParser = null;
            }

            Reset();
            base.Dispose(disposing);
        }
        public static async Task <string> HtmlToText(string html)
        {
            var config  = Configuration.Default;
            var context = BrowsingContext.New(config);

            var document = await context.OpenAsync(req => req.Content(html));

            var converter = new HtmlToTextConverter();

            return(converter.Convert(document.Body));
        }
コード例 #5
0
 protected void SetupFromTopic(UserDiscussionTopic ud)
 {
     this.UserTopic               = ud;
     this.DiscussionDate          = null;//no date for topics
     this.DiscussionTitle         = HttpUtility.HtmlDecode(ud.Topic.Title);
     this.DiscussionDescription   = HtmlToTextConverter.StripHtml(ud.Topic.Description);
     this.DiscussionResponseCount = ud.ChildResponseCounts.TotalResponseCount;
     this.UnreadResponseCount     = ud.ChildResponseCounts.UnreadResponseCount;
     this.TotalResponsesLine      = CalculateTotalResponsesLine(this.DiscussionResponseCount);
     this.MyResponsesLine         = CalculateMyResponsesLine(ud.ChildResponseCounts.PersonalResponseCount);
     this.NavigationPath          = "/Views/DiscussionPage.xaml?topicId=" + ud.Topic.ID;
     this.IconTemplate            = CalculateIconFromResponseCounts(ud.ChildResponseCounts);
 }
コード例 #6
0
 protected void SetupFromResponse(UserDiscussionResponse ud)
 {
     this.UserResponse            = ud;
     this.DiscussionDate          = ud.Response.PostedDate;
     this.DiscussionTitle         = HttpUtility.HtmlDecode(ud.Response.Title);
     this.DiscussionDescription   = HtmlToTextConverter.StripHtml(ud.Response.Description);
     this.DiscussionResponseCount = ud.ChildResponseCounts.TotalResponseCount;
     this.UnreadResponseCount     = ud.ChildResponseCounts.UnreadResponseCount;
     this.TotalResponsesLine      = CalculateTotalResponsesLine(this.DiscussionResponseCount);
     this.MyResponsesLine         = CalculateMyResponsesLine(ud.ChildResponseCounts.PersonalResponseCount);
     this.AuthorName     = ud.Response.Author.DisplayName;
     this.NavigationPath = "/Views/DiscussionPage.xaml?responseId=" + ud.Response.ID;
     this.IconTemplate   = CalculateIconFromResponseCounts(ud.ChildResponseCounts);
 }
コード例 #7
0
        public ImageItem[] GetDayContents(TrainingDayDTO day)
        {
            HtmlToTextConverter htmlConverter = new HtmlToTextConverter();
            List <ImageItem>    items         = new List <ImageItem>();

            foreach (var blog in day.Objects.OfType <BlogEntryDTO>())
            {
                ImageItem item = new ImageItem();
                item.BackBrush = EntryObjectColors.Blog;
                item.Content   = (string)htmlConverter.Convert(blog.Comment, typeof(string), null, CultureInfo.CurrentCulture);
                item.Entry     = blog;
                item.ToolTip   = Name;
                item.Image     = Image;
                items.Add(item);
            }
            return(items.ToArray());
        }
コード例 #8
0
 public override void Inject(bool head, TextOutput output)
 {
     if (head)
     {
         if (injectHead != null && !headInjected)
         {
             HtmlParser parser = new HtmlParser(new ConverterBufferInput(injectHead, progressMonitor), false, injectionFormat == HeaderFooterFormat.Text, 64, 8, testBoundaryConditions);
             fragmentToTextConverter = new HtmlToTextConverter(parser, output, null, true, injectionFormat == HeaderFooterFormat.Text, false, null, true, 0);
             while (!fragmentToTextConverter.Flush())
             {
             }
             headInjected = true;
             if (injectTail == null)
             {
                 ((IDisposable)fragmentToTextConverter).Dispose();
                 fragmentToTextConverter = null;
                 return;
             }
         }
     }
     else
     {
         if (injectHead != null && !headInjected)
         {
             headInjected = true;
         }
         if (injectTail != null && !tailInjected)
         {
             if (fragmentToTextConverter == null)
             {
                 HtmlParser parser = new HtmlParser(new ConverterBufferInput(injectTail, progressMonitor), false, injectionFormat == HeaderFooterFormat.Text, 64, 8, testBoundaryConditions);
                 fragmentToTextConverter = new HtmlToTextConverter(parser, output, null, true, injectionFormat == HeaderFooterFormat.Text, false, null, true, 0);
             }
             else
             {
                 fragmentToTextConverter.Initialize(injectTail, injectionFormat == HeaderFooterFormat.Text);
             }
             while (!fragmentToTextConverter.Flush())
             {
             }
             ((IDisposable)fragmentToTextConverter).Dispose();
             fragmentToTextConverter = null;
             tailInjected            = true;
         }
     }
 }
コード例 #9
0
ファイル: Program.cs プロジェクト: zackmark29/Textify
        static async Task Main(string[] args)
        {
            string url = "https://openfiber.it/mondo-open-fiber/comunicati-stampa/";
            //url = "https://event.unitn.it/cerimonia-laurea/";
            //url = "https://blog.botfactory.it";
            //url = "https://www.trentinoinrete.it/Documentazioni-per-gli-Enti-Locali/Previsione-degli-interventi-per-comune";

            HttpClient http = new HttpClient();
            string     html = await http.GetStringAsync(url);

            HtmlToTextConverter converter = new HtmlToTextConverter();
            string output = converter.Convert(html);

            File.WriteAllText("out.txt", output);

            Console.WriteLine(output);
        }
コード例 #10
0
 public void Load(Action successCallback)
 {
     App.Model.BuildService(new FetchMyCurrentCoursesService()).SetExpiration(TimeSpan.FromDays(1.0)).Execute(service =>
     {
         var oc = new ObservableCollection <Course>();
         foreach (var c in service.Result)
         {
             c.Title = HtmlToTextConverter.StripHtml(c.Title); //don't need html in title
             oc.Add(c);
             CourseIdMap[c.ID] = c;
         }
         this.MyCourses = oc;
         if (successCallback != null)
         {
             successCallback();
         }
     });
 }
コード例 #11
0
        public HtmlDocument Load()
        {
            Print.Show("Loading url: " + Uri.AbsoluteUri);
            HtmlDocument doc = null;

            try
            {
                doc = new HtmlWeb().Load(Uri.AbsoluteUri);
            }
            catch (Exception ex)
            {
                Print.Show(ex.Message);
                return(null);
            }

            Print.Show("Saving url: " + Uri.AbsoluteUri + " Size:" + doc.ParsedText.Length);
            if (doc.ParsedText == null || doc.ParsedText.Length < 10)
            {
                return(doc);
            }

            //get text from title and body
            var title = doc.DocumentNode.SelectSingleNode("//head//title");
            var body  = doc.DocumentNode.SelectSingleNode("//body");

            //remove script
            var nodes = body.SelectNodes("//script|//style");

            foreach (var node in nodes)
            {
                node.ParentNode.RemoveChild(node);
            }

            HtmlToTextConverter textConverter = new HtmlToTextConverter();

            Worker.SaveDocument(new SiteInfo
            {
                BodyContent  = textConverter.ToText(body.InnerText),
                TitleContent = textConverter.ToText(title.InnerText),
                Url          = Uri.AbsoluteUri
            });
            return(doc);
        }
コード例 #12
0
 protected override void Dispose(bool disposing)
 {
     if (fragmentToHtmlConverter != null)
     {
         ((IDisposable)fragmentToHtmlConverter).Dispose();
         fragmentToHtmlConverter = null;
     }
     if (fragmentToTextConverter != null)
     {
         ((IDisposable)fragmentToTextConverter).Dispose();
         fragmentToTextConverter = null;
     }
     if (fragmentParser != null)
     {
         ((IDisposable)fragmentParser).Dispose();
         fragmentParser = null;
     }
     base.Reset();
     base.Dispose(disposing);
 }
コード例 #13
0
        public override void Inject(bool head, TextOutput output)
        {
            HtmlParser parser;

            if (head)
            {
                if (this.injectHead != null && !this.headInjected)
                {
                    parser = new HtmlParser(
                        new ConverterBufferInput(this.injectHead, this.progressMonitor),
                        false,
                        (this.injectionFormat == HeaderFooterFormat.Text),
                        64,
                        8,
                        this.testBoundaryConditions);

                    this.fragmentToTextConverter = new HtmlToTextConverter(
                        parser,
                        output,
                        null,
                        true,
                        this.injectionFormat == HeaderFooterFormat.Text,
                        false,
                        true,
                        0);

                    while (!this.fragmentToTextConverter.Flush())
                    {
                    }

                    this.headInjected = true;


                    if (this.injectTail == null)
                    {
                        ((IDisposable)this.fragmentToTextConverter).Dispose();
                        this.fragmentToTextConverter = null;
                    }
                }
            }
            else
            {
                if (this.injectHead != null && !this.headInjected)
                {
                    InternalDebug.Assert(false);


                    this.headInjected = true;
                }

                if (this.injectTail != null && !this.tailInjected)
                {
                    if (this.fragmentToTextConverter == null)
                    {
                        parser = new HtmlParser(
                            new ConverterBufferInput(this.injectTail, this.progressMonitor),
                            false,
                            (this.injectionFormat == HeaderFooterFormat.Text),
                            64,
                            8,
                            this.testBoundaryConditions);

                        this.fragmentToTextConverter = new HtmlToTextConverter(
                            parser,
                            output,
                            null,
                            true,
                            this.injectionFormat == HeaderFooterFormat.Text,
                            false,
                            true,
                            0);
                    }
                    else
                    {
                        this.fragmentToTextConverter.Initialize(
                            this.injectTail,
                            (this.injectionFormat == HeaderFooterFormat.Text));
                    }

                    while (!this.fragmentToTextConverter.Flush())
                    {
                    }


                    ((IDisposable)this.fragmentToTextConverter).Dispose();
                    this.fragmentToTextConverter = null;

                    this.tailInjected = true;
                }
            }
        }
コード例 #14
0
        /// <summary>
        /// Add documents.
        /// </summary>
        /// <param name="writer">The index writer.</param>
        /// <param name="directoryInfo">The directory information where all the files that are to be added are located.</param>
        /// <param name="files">The list of files that are to be added.</param>
        /// <param name="documents">The supported documents search filter, used to indicate what files are to be added.</param>
        public void AddDocuments(Lucene.Net.Index.IndexWriter writer, DirectoryInfo directoryInfo, string[] files, SupportedDocumentExtension documents)
        {
            Nequeo.Html.HtmlToTextConverter stream = new HtmlToTextConverter();

            FieldType pathFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = false,
                Stored       = true,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };
            FieldType contentFieldType = new Lucene.Net.Documents.FieldType()
            {
                Indexed      = true,
                Tokenized    = documents.TokenizeContent,
                Stored       = documents.StoreContent,
                IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            };

            // For each file.
            for (int i = 0; i < files.Length; i++)
            {
                // If the file exists
                if (File.Exists(files[i]))
                {
                    Lucene.Net.Documents.Document document = new Lucene.Net.Documents.Document();

                    try
                    {
                        FileInfo fileInfo = new FileInfo(files[i]);
                        string   file     = files[i].Replace(directoryInfo.Root.FullName, "").ToLower();

                        Lucene.Net.Documents.Field path     = new Field("path", file.ToLower().Replace("\\", "/"), pathFieldType);
                        Lucene.Net.Documents.Field modified = new Field("modified", fileInfo.LastWriteTime.ToShortDateString() + " " + fileInfo.LastWriteTime.ToShortTimeString(), pathFieldType);

                        // Add the fields.
                        document.Add(path);
                        document.Add(modified);

                        // Create the stream reader.
                        string content = stream.Convert(files[i]);

                        // If content exists.
                        if (!String.IsNullOrEmpty(content))
                        {
                            // Split the white spaces from the text.
                            string[] words = content.Words();

                            // If words exist.
                            if (words != null && words.Length > 0)
                            {
                                // Add the query for each word.
                                for (int j = 0; j < words.Length; j++)
                                {
                                    // Format the word.
                                    string word = words[j].ToLower().RemovePunctuationFromStartAndEnd();

                                    // If a word exists.
                                    if (!String.IsNullOrEmpty(word))
                                    {
                                        Lucene.Net.Documents.Field contentField = new Field("content", word, contentFieldType);
                                        document.Add(contentField);
                                    }
                                }
                            }
                        }

                        // Add the document.
                        writer.AddDocument(document.Fields);

                        // Commit after a set number of documents.
                        documents.TotalDocumentSize += fileInfo.Length;
                        if (documents.TotalDocumentSize > documents.MaxDocumentSizePerCommit)
                        {
                            writer.Commit();
                            documents.TotalDocumentSize = 0;
                        }
                    }
                    catch (Exception)
                    {
                        throw;
                    }
                }
            }
        }
コード例 #15
0
        public void StripeHtml_RemovesAllHtml_FromString()
        {
            var result = HtmlToTextConverter.StripHtml(source);

            Assert.Equal("\r\r Lorem ipsum dolor sit amet, potenti nec quam non ut in, suspendisse maecenas nisl commodo nec. Auctor mollis sollicitudin orci orci, leo donec condimentum elementum dui, suscipit elit. Accumsan massa id, ut vivamus. Accumsan gravida risus, pellentesque quisque malesuada, quam eget orci sollicitudin, pede pharetra. Dui felis viverra et pellentesque minima, sem arcu wisi, quasi leo vitae orci netus praesent, nunc vulputate consequatur molestie, lacus ipsum dui massa accumsan interdum. Semper dolorem. Justo sit justo. Eros penatibus, dictum pellentesque, eget dolor tortor, pede sodales adipiscing.\r\rPurus eget, eveniet eu id non in nonummy, est nunc sed hac est turpis ut. Lectus commodo donec nulla parturient morbi morbi, interdum fermentum ac taciti, commodo in neque porta per aliquet, pellentesque consequat at primis vitae, dolor vitae. Ut nullam penatibus et blandit mattis euismod, cupiditate lacinia non et ullamcorper blandit morbi, eros wisi tincidunt velit. Dapibus dui libero, incidunt integer. Lacus metus bibendum sit adipiscing eget, vitae pede venenatis magna, tincidunt consectetuer bibendum, aliquam suspendisse libero quam, non massa mauris lorem in. Rhoncus lacus lobortis dui, dignissim nec est ligula lacinia, et ligula metus. Mauris dictum, adipiscing a nonummy, purus et auctor eu at est dolor. Commodo lobortis duis libero, tempor ac nibh metus turpis donec integer. Elit non arcu ut, dapibus sem tristique felis consequat platea sapien, ligula sociis tempus posuere dignissim odio, ornare ab nibh quis odio ut lacus.\r\rThe end", result);
        }