Esempio n. 1
0
        public void TestReadRTF_Html()
        {
            string path   = TestDataSample.GetRTFPath("htmlrtf2.rtf");
            var    parser = new RTFTextParser(new ParserContext(path));
            string result = parser.Parse();

            Assert.IsNotNullOrEmpty(result);
        }
Esempio n. 2
0
        public void TestReadRTF_FormattedText()
        {
            string path   = TestDataSample.GetRTFPath("Formated text.rtf");
            var    parser = new RTFTextParser(new ParserContext(path));
            string result = parser.Parse();

            string[] lines = result.Replace("\r\n", "\n").Split('\n');
            Assert.AreEqual(lines.Length, 11);
            Assert.AreEqual("11111111111", lines[0]);
            Assert.AreEqual("22222222222", lines[1]);
            Assert.AreEqual("张三李四王五", lines[2]);
            Assert.AreEqual("RTF Sample , Author : yuans , contact : [email protected] , site : http://www.cnblogs.com/xdesigner .", lines[7]);
        }
Esempio n. 3
0
        public List <Attatchment> FetchAttachementsData()
        {
            string  index   = _source.index;
            Elastic elastic = new Elastic(index);

            List <Attatchment> lstAttachments = new List <Attatchment>();

            int id = elastic.GetMaxId() + 1;

            Console.WriteLine("===> Max id:" + id);

            int maxRetries = _source.retries ?? 5;

            WebClient wc = new WebClient();

            bool end   = false;
            int  retry = 0;

            do
            {
                var tempFileName = Path.GetTempFileName();

                try
                {
                    string url = _source.url ?? "{0}";
                    url = string.Format(url, id++);
                    Console.WriteLine("=> Get " + url);

                    wc.DownloadFile(url, tempFileName);
                    var mimeType = wc.ResponseHeaders["content-type"];
                    Console.WriteLine("=> Mimetype " + mimeType);
                    var fileName = wc.ResponseHeaders["Content-Disposition"].Substring(wc.ResponseHeaders["Content-Disposition"].IndexOf("filename=") + 9).Replace("\"", "");
                    Console.WriteLine("=> Filename " + fileName);

                    if (string.IsNullOrEmpty(mimeType))
                    {
                        end = true;
                    }

                    var body = string.Empty;

                    if (fileName.ToLower().EndsWith(".pdf"))
                    {
                        var pdf = new PDFTextParser(new Toxy.ParserContext(tempFileName));
                        body = pdf.Parse();
                    }
                    else if (fileName.ToLower().EndsWith(".docx"))
                    {
                        System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
                        var docx = new Word2007TextParser(new Toxy.ParserContext(tempFileName));
                        body = docx.Parse();
                    }
                    else if (fileName.ToLower().EndsWith(".rtf"))
                    {
                        System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
                        var rtf = new RTFTextParser(new Toxy.ParserContext(tempFileName));
                        body = rtf.Parse();
                    }
                    else if (fileName.ToLower().EndsWith(".doc"))
                    {
                        System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
                        var doc = new Word2003TextParser(new Toxy.ParserContext(tempFileName));

                        body = doc.Parse();
                    }

                    if (!string.IsNullOrEmpty(body))
                    {
                        Attatchment attatch = new Attatchment(id.ToString(), fileName, url, mimeType, body, DateTime.Now);
                        elastic.SaveItem(attatch);
                    }
                    retry = 0;
                    //lstAttachments.Add(attatch);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                    retry++;
                }

                try { File.Delete(tempFileName); }
                catch { } // best effort
                tempFileName = null;
            } while (!end && retry < maxRetries);

            return(lstAttachments);
        }