private static void Main(string[] args)
        {
            if (args.Length < 3)
            {
                Console.WriteLine("usage: MarkdownParser input_file html_output_file text_output_file");
                Environment.Exit(-1);
            }

            if (!File.Exists(args[0]))
            {
                Console.WriteLine("{0} does not exist.", args[0]);
                Environment.Exit(-1);
            }

            //convert markdown to html
            var markdownSource = File.ReadAllText(args[0]);
            var htmlSource = new Markdown().Transform(markdownSource);
            File.WriteAllText(args[1], htmlSource);

            //convert html to txt
            var html2txt = new HtmlToText();
            var text = html2txt.Convert(args[1]);
            //use OS newlines
            text = Regex.Replace(text, "(?<!\r)\n", Environment.NewLine);
            Console.WriteLine(text);
            File.WriteAllText(args[2], text);
        }
Esempio n. 2
0
		static void Test()
		{
			HtmlToText htt = new HtmlToText();
			string s = htt.Convert(@"..\..\mshome.htm");
			StreamWriter sw = new StreamWriter("mshome.txt");
			sw.Write(s);
			sw.Flush();
			sw.Close();
		}
 public string[] Top10ContentWords(string url)
 {
     var proxy = new Web2StringProxy.ServiceClient();
     var html = proxy.GetWebContent(url);
     proxy.Close();
     var htmlToText = new HtmlToText();
     var content = htmlToText.ConvertHtml(html);
     var contentWords = Regex.Split(content, @"[\W\d]+").Where(c => !String.IsNullOrWhiteSpace(c) && !_stopWords.Contains(c));
     var topTenContentWords = (from w in contentWords
                               group w by w
                                   into g
                                   let count = g.Count()
                                   orderby count descending
                                   select g.Key).Take(10);
     return topTenContentWords.ToArray();
 }