public void TestReadRTF_Html() { string path = TestDataSample.GetRTFPath("htmlrtf2.rtf"); var parser = new RTFTextParser(new ParserContext(path)); string result = parser.Parse(); Assert.IsNotNullOrEmpty(result); }
public void TestReadRTF_FormattedText() { string path = TestDataSample.GetRTFPath("Formated text.rtf"); var parser = new RTFTextParser(new ParserContext(path)); string result = parser.Parse(); string[] lines = result.Replace("\r\n", "\n").Split('\n'); Assert.AreEqual(lines.Length, 11); Assert.AreEqual("11111111111", lines[0]); Assert.AreEqual("22222222222", lines[1]); Assert.AreEqual("张三李四王五", lines[2]); Assert.AreEqual("RTF Sample , Author : yuans , contact : [email protected] , site : http://www.cnblogs.com/xdesigner .", lines[7]); }
public List <Attatchment> FetchAttachementsData() { string index = _source.index; Elastic elastic = new Elastic(index); List <Attatchment> lstAttachments = new List <Attatchment>(); int id = elastic.GetMaxId() + 1; Console.WriteLine("===> Max id:" + id); int maxRetries = _source.retries ?? 5; WebClient wc = new WebClient(); bool end = false; int retry = 0; do { var tempFileName = Path.GetTempFileName(); try { string url = _source.url ?? "{0}"; url = string.Format(url, id++); Console.WriteLine("=> Get " + url); wc.DownloadFile(url, tempFileName); var mimeType = wc.ResponseHeaders["content-type"]; Console.WriteLine("=> Mimetype " + mimeType); var fileName = wc.ResponseHeaders["Content-Disposition"].Substring(wc.ResponseHeaders["Content-Disposition"].IndexOf("filename=") + 9).Replace("\"", ""); Console.WriteLine("=> Filename " + fileName); if (string.IsNullOrEmpty(mimeType)) { end = true; } var body = string.Empty; if (fileName.ToLower().EndsWith(".pdf")) { var pdf = new PDFTextParser(new Toxy.ParserContext(tempFileName)); body = pdf.Parse(); } else if (fileName.ToLower().EndsWith(".docx")) { System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); var docx = new Word2007TextParser(new Toxy.ParserContext(tempFileName)); body = docx.Parse(); } else if (fileName.ToLower().EndsWith(".rtf")) { System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); var rtf = new RTFTextParser(new Toxy.ParserContext(tempFileName)); body = rtf.Parse(); } else if (fileName.ToLower().EndsWith(".doc")) { System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); var doc = new Word2003TextParser(new Toxy.ParserContext(tempFileName)); body = doc.Parse(); } if (!string.IsNullOrEmpty(body)) { Attatchment attatch = new Attatchment(id.ToString(), fileName, url, mimeType, body, DateTime.Now); elastic.SaveItem(attatch); } retry = 0; //lstAttachments.Add(attatch); } catch (Exception ex) { Console.WriteLine(ex); retry++; } try { File.Delete(tempFileName); } catch { } // best effort tempFileName = null; } while (!end && retry < maxRetries); return(lstAttachments); }