public void BaseTestWithHeaderRow(string filename)
        {
            ParserContext      context = new ParserContext(TestDataSample.GetExcelPath(filename));
            ISpreadsheetParser parser  = ParserFactory.CreateSpreadsheet(context);

            parser.Context.Properties.Add("HasHeader", "1");
            ToxySpreadsheet ss = parser.Parse();

            Assert.AreEqual(1, ss.Tables[0].HeaderRows.Count);
            Assert.AreEqual("A", ss.Tables[0].HeaderRows[0].Cells[0].Value);
            Assert.AreEqual("B", ss.Tables[0].HeaderRows[0].Cells[1].Value);
            Assert.AreEqual("C", ss.Tables[0].HeaderRows[0].Cells[2].Value);
            Assert.AreEqual("D", ss.Tables[0].HeaderRows[0].Cells[3].Value);
            Assert.AreEqual(3, ss.Tables[0].Rows.Count);
            Assert.AreEqual("1", ss.Tables[0].Rows[0].Cells[0].Value);
            Assert.AreEqual("2", ss.Tables[0].Rows[0].Cells[1].Value);
            Assert.AreEqual("3", ss.Tables[0].Rows[0].Cells[2].Value);
            Assert.AreEqual("4", ss.Tables[0].Rows[0].Cells[3].Value);

            Assert.AreEqual("A1", ss.Tables[0].Rows[1].Cells[0].Value);
            Assert.AreEqual("A2", ss.Tables[0].Rows[1].Cells[1].Value);
            Assert.AreEqual("A3", ss.Tables[0].Rows[1].Cells[2].Value);
            Assert.AreEqual("A4", ss.Tables[0].Rows[1].Cells[3].Value);

            Assert.AreEqual("B1", ss.Tables[0].Rows[2].Cells[0].Value);
            Assert.AreEqual("B2", ss.Tables[0].Rows[2].Cells[1].Value);
            Assert.AreEqual("B3", ss.Tables[0].Rows[2].Cells[2].Value);
            Assert.AreEqual("B4", ss.Tables[0].Rows[2].Cells[3].Value);
        }
Example #2
0
        public void TestParseLineEvent()
        {
            string          path    = TestDataSample.GetTextPath("utf8.txt");
            ParserContext   context = new ParserContext(path);
            PlainTextParser parser  = (PlainTextParser)ParserFactory.CreateText(context);

            parser.ParseLine += (sender, args) =>
            {
                if (args.LineNumber == 0)
                {
                    Assert.AreEqual("hello world", args.Text);
                }
                else if (args.LineNumber == 1)
                {
                    Assert.AreEqual("a2", args.Text);
                }
                else if (args.LineNumber == 2)
                {
                    Assert.AreEqual("a3", args.Text);
                }
                else if (args.LineNumber == 3)
                {
                    Assert.AreEqual("bbb4", args.Text);
                }
            };
            string text = parser.Parse();

            Assert.IsNotNullOrEmpty(text);
        }
Example #3
0
        public void TestParseHtml()
        {
            string path = Path.GetFullPath(TestDataSample.GetHtmlPath("mshome.html"));

            ParserContext context = new ParserContext(path);
            IDomParser    parser  = (IDomParser)ParserFactory.CreateDom(context);
            ToxyDom       toxyDom = parser.Parse();

            List <ToxyNode> metaNodeList = toxyDom.Root.SelectNodes("//meta");

            Assert.AreEqual(7, metaNodeList.Count);

            ToxyNode aNode = toxyDom.Root.SingleSelect("//a");

            Assert.AreEqual(1, aNode.Attributes.Count);
            Assert.AreEqual("href", aNode.Attributes[0].Name);
            Assert.AreEqual("http://www.microsoft.com/en/us/default.aspx?redir=true", aNode.Attributes[0].Value);

            ToxyNode titleNode = toxyDom.Root.ChildrenNodes[0].ChildrenNodes[0].ChildrenNodes[0];

            Assert.AreEqual("title", titleNode.Name);
            Assert.AreEqual("Microsoft Corporation", titleNode.ChildrenNodes[0].InnerText);

            ToxyNode metaNode = toxyDom.Root.ChildrenNodes[0].ChildrenNodes[0].ChildrenNodes[7];

            Assert.AreEqual("meta", metaNode.Name);
            Assert.AreEqual(3, metaNode.Attributes.Count);
            Assert.AreEqual("name", metaNode.Attributes[0].Name);
            Assert.AreEqual("SearchDescription", metaNode.Attributes[0].Value);
            Assert.AreEqual("scheme", metaNode.Attributes[2].Name);
            Assert.AreEqual(string.Empty, metaNode.Attributes[2].Value);
        }
Example #4
0
        public void TestReadExcelAndConvertToDataSet()
        {
            ParserContext c           = new ParserContext(TestDataSample.GetExcelPath("Employee.xls"));
            var           parser      = ParserFactory.CreateSpreadsheet(c);
            var           spreadsheet = parser.Parse();
            DataSet       ds          = spreadsheet.ToDataSet();

            Assert.AreEqual(3, ds.Tables.Count);
            Assert.AreEqual("Sheet1", ds.Tables[0].TableName);
            Assert.AreEqual("Sheet2", ds.Tables[1].TableName);
            Assert.AreEqual("Sheet3", ds.Tables[2].TableName);

            var s1 = ds.Tables[0];

            Assert.AreEqual(System.DBNull.Value, s1.Rows[0][0]);
            Assert.AreEqual(System.DBNull.Value, s1.Rows[0][1]);
            Assert.AreEqual(System.DBNull.Value, s1.Rows[0][2]);
            Assert.AreEqual("Employee Info", s1.Rows[1][1]);
            Assert.AreEqual("Last name:", s1.Rows[3][1]);
            Assert.AreEqual("lastName", s1.Rows[3][2]);
            Assert.AreEqual("First name:", s1.Rows[4][1]);
            Assert.AreEqual("firstName", s1.Rows[4][2]);
            Assert.AreEqual("SSN:", s1.Rows[5][1]);
            Assert.AreEqual("ssn", s1.Rows[5][2]);
        }
Example #5
0
        public void TestParsePlainTextFromPDF()
        {
            string path   = TestDataSample.GetPdfPath("Sample1.PDF");
            var    parser = new PDFTextParser(new ParserContext(path));
            string result = parser.Parse();

            Assert.IsTrue(result.StartsWith("LA MARCHE"));
        }
Example #6
0
        public void TestReadRTF_Html()
        {
            string path   = TestDataSample.GetRTFPath("htmlrtf2.rtf");
            var    parser = new RTFTextParser(new ParserContext(path));
            string result = parser.Parse();

            Assert.IsNotNullOrEmpty(result);
        }
Example #7
0
        public void TestReadBigPDFFile()
        {
            string path   = TestDataSample.GetPdfPath("Word97-2007BinaryFileFormat(doc)Specification.pdf");
            var    parser = new PDFTextParser(new ParserContext(path));
            string result = parser.Parse();

            Assert.IsTrue(true);
        }
Example #8
0
        public void TestParseToxyDocumentFromPDF()
        {
            string path   = TestDataSample.GetPdfPath("Sample1.PDF");
            var    parser = new PDFDocumentParser(new ParserContext(path));
            var    result = parser.Parse();

            Assert.AreEqual(1474, result.Paragraphs.Count);
            Assert.AreEqual("LA MARCHE MONDIALE DES FEMMES : UN MOUVEMENT IRRÉVERSIBLE", result.Paragraphs[0].Text);
        }
Example #9
0
        public void TestParseOgg()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetAudioPath("sample.ogg"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(15, x.Count);
        }
Example #10
0
        public void TestParseMp3_Id3v1Only()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetAudioPath("sample_v1_only.mp3"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(11, x.Count);
        }
Example #11
0
        public void TestExcel2003TextParser()
        {
            ParserContext context = new ParserContext(TestDataSample.GetExcelPath("Employee.xls"));
            ITextParser   parser  = ParserFactory.CreateText(context);
            string        result  = parser.Parse();

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IndexOf("Last name") > 0);
            Assert.IsTrue(result.IndexOf("First name") > 0);
        }
Example #12
0
        public void TestReadWholeText()
        {
            string path = TestDataSample.GetTextPath("utf8.txt");

            ParserContext context = new ParserContext(path);
            ITextParser   parser  = ParserFactory.CreateText(context);
            string        text    = parser.Parse();

            Assert.AreEqual("hello world" + Environment.NewLine + "a2" + Environment.NewLine + "a3" + Environment.NewLine + "bbb4" + Environment.NewLine, text);
        }
Example #13
0
        public void PureTextMsg_ReadTextTest()
        {
            string        path    = TestDataSample.GetEmailPath("raw text mail demo.msg");
            ParserContext context = new ParserContext(path);
            var           parser  = ParserFactory.CreateText(context);

            string result = parser.Parse();

            Assert.IsNotNullOrEmpty(result);
        }
Example #14
0
        public void TestPowerPoint()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetOLE2Path("Test_Humor-Generation.ppt"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(8, x.Count);
            Assert.AreEqual("Funny Factory", x.Get("Title").Value);
        }
Example #15
0
        public void TestExcelFile()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetExcelPath("comments.xls"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(8, x.Count);
            Assert.AreEqual("Microsoft Excel", x.Get("ApplicationName").Value);
        }
Example #16
0
        public void TestParseDirectoryFromZip()
        {
            ParserContext context = new ParserContext(TestDataSample.GetFilePath("toxy.zip", null));
            ITextParser   parser  = ParserFactory.CreateText(context);
            string        list    = parser.Parse();

            Assert.IsNotNull(list);
            string[] lines = list.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
            Assert.AreEqual(68, lines.Length);
        }
Example #17
0
        public void TestParsePlainTextFromSample5()
        {
            string path   = TestDataSample.GetPdfPath("Sample5.PDF");
            var    parser = new PDFTextParser(new ParserContext(path));
            string result = parser.Parse();

            string[] results = result.Split('\n');
            Assert.AreEqual("License income by market (%)", results[0]);
            Assert.AreEqual("Philadelphia, Atlanta, Dallas, San Diego, and New", results[1]);
        }
Example #18
0
        public void TestCorelDrawFile()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetOLE2Path("TestCorel.shw"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(6, x.Count);
            Assert.AreEqual("thorsteb", x.Get("Author").Value);
            Assert.AreEqual("thorsteb", x.Get("LastAuthor").Value);
        }
Example #19
0
        public void TestForeignNames()
        {
            string              path    = TestDataSample.GetVCardPath("PalmAgentSamples.vcf");
            ParserContext       context = new ParserContext(path);
            VCardDocumentParser parser  = new VCardDocumentParser(context);
            var source = parser.Parse();

            Assert.AreEqual(20, source.Cards.Count);

            Assert.AreEqual("John Doe4", source.Cards[10].Name.FullName);
        }
Example #20
0
        public void TestExcel2007TextParserWithoutSheetNames()
        {
            ParserContext context = new ParserContext(TestDataSample.GetExcelPath("WithVariousData.xlsx"));

            context.Properties.Add("IncludeSheetNames", "0");
            ITextParser parser = ParserFactory.CreateText(context);
            string      result = parser.Parse();

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IndexOf("Sheet1") < 0);
        }
Example #21
0
        public void TestExcel2007TextParserWithHeaderFooter()
        {
            ParserContext context = new ParserContext(TestDataSample.GetExcelPath("WithVariousData.xlsx"));

            context.Properties.Add("IncludeHeaderFooter", "1");
            ITextParser parser = ParserFactory.CreateText(context);
            string      result = parser.Parse();

            Assert.IsNotNull(result);
            Assert.IsTrue(result.IndexOf("This is the header") > 0);
        }
Example #22
0
        public void TestPptx()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetOOXMLPath("SampleShow.pptx"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(8, x.Count);
            Assert.AreEqual("SlideShow Sample", x.Get("Title").Value);
            Assert.AreEqual("Microsoft Office PowerPoint", x.Get("Application").Value);
        }
Example #23
0
        public void TestDocx()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetOOXMLPath("MultipleCoreProperties.docx"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(12, x.Count);
            Assert.AreEqual("Format", x.Get("Title").Value);
            Assert.AreEqual("Microsoft Macintosh Word", x.Get("Application").Value);
        }
Example #24
0
        public void TestSolidWorksFile()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetOLE2Path("TestSolidWorks.sldprt"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(10, x.Count);
            Assert.AreEqual("{F29F85E0-4FF9-1068-AB91-08002B27B3D9}", x.Get("ClassID").Value);
            Assert.AreEqual("scj", x.Get("LastAuthor").Value);
        }
Example #25
0
        public void BaseTestWithoutHeader(string filename)
        {
            ParserContext      context = new ParserContext(TestDataSample.GetExcelPath(filename));
            ISpreadsheetParser parser  = ParserFactory.CreateSpreadsheet(context);
            ToxySpreadsheet    ss      = parser.Parse();

            Assert.IsNull(ss.Tables[0].PageHeader);

            Assert.AreEqual(0, ss.Tables[0].HeaderRows.Count);
            Assert.AreEqual(9, ss.Tables[0].Rows.Count);
        }
Example #26
0
        public void TestXlsx()
        {
            string          path    = Path.GetFullPath(TestDataSample.GetOOXMLPath("sample.xlsx"));
            ParserContext   context = new ParserContext(path);
            IMetadataParser parser  = (IMetadataParser)ParserFactory.CreateMetadata(context);
            ToxyMetadata    x       = parser.Parse();

            Assert.AreEqual(4, x.Count);
            Assert.AreEqual("Microsoft Excel", x.Get("Application").Value);
            Assert.AreEqual("12.0000", x.Get("AppVersion").Value);
        }
Example #27
0
        public void TestParsePlainTextFromSample1()
        {
            string path   = TestDataSample.GetPdfPath("Sample1.PDF");
            var    parser = new PDFTextParser(new ParserContext(path));
            string result = parser.Parse();

            Assert.IsTrue(result.StartsWith("LA MARCHE"));
            ContainText(result, "Toute discussion stratégique sur nos actions nécessite un rappel de ce que nous avons fait en");
            ContainText(result, "l’an 2000 et depuis. Au niveau mondial, en l’an 2000, nous avons mené une campagne de");
            ContainText(result, "Une structure pour nous amener à 2005");
            ContainText(result, "Lors de la 4e rencontre qui aura lieu en Inde, nous avons deux objectifs majeurs");
        }
Example #28
0
        public void TestRead2Cards()
        {
            string              path    = TestDataSample.GetVCardPath("RfcAuthors.vcf");
            ParserContext       context = new ParserContext(path);
            VCardDocumentParser parser  = new VCardDocumentParser(context);
            var cards = parser.Parse();

            Assert.AreEqual(2, cards.Cards.Count);

            ToxyBusinessCard tbc1 = cards.Cards[0];

            Assert.AreEqual("Frank Dawson", tbc1.Name.FullName);
            Assert.AreEqual(1, tbc1.Addresses.Count);
            Assert.AreEqual(5, tbc1.Contacts.Count);

            Assert.AreEqual("6544 Battleford Drive;Raleigh;NC;27613-3502;U.S.A.", tbc1.Addresses[0].ToString());
            int i = 0;

            Assert.AreEqual("+1-919-676-9515", tbc1.Contacts[i].Value);
            Assert.AreEqual("MessagingService, WorkVoice", tbc1.Contacts[i].Name);
            i++;
            Assert.AreEqual("+1-919-676-9564", tbc1.Contacts[i].Value);
            Assert.AreEqual("WorkFax", tbc1.Contacts[i].Name);
            i++;
            Assert.AreEqual("*****@*****.**", tbc1.Contacts[i].Value);
            Assert.AreEqual("Internet", tbc1.Contacts[i].Name);
            i++;
            Assert.AreEqual("*****@*****.**", tbc1.Contacts[i].Value);
            Assert.AreEqual("Internet", tbc1.Contacts[i].Name);
            i++;
            Assert.AreEqual("http://home.earthlink.net/~fdawson", tbc1.Contacts[i].Value);
            Assert.AreEqual("Url-Default", tbc1.Contacts[i].Name);
            Assert.AreEqual("Lotus Development Corporation", tbc1.Orgnization);

            //2ed guy
            ToxyBusinessCard tbc2 = cards.Cards[1];

            Assert.AreEqual("Tim Howes", tbc2.Name.FullName);
            Assert.AreEqual("Netscape Communications Corp.", tbc2.Orgnization);
            Assert.AreEqual(1, tbc2.Addresses.Count);
            Assert.AreEqual(3, tbc2.Contacts.Count);
            Assert.AreEqual("501 E. Middlefield Rd.;Mountain View;CA;94043;U.S.A.", tbc2.Addresses[0].ToString());
            i = 0;
            Assert.AreEqual("+1-415-937-3419", tbc2.Contacts[i].Value);
            Assert.AreEqual("MessagingService, WorkVoice", tbc2.Contacts[i].Name);
            i++;
            Assert.AreEqual("+1-415-528-4164", tbc2.Contacts[i].Value);
            Assert.AreEqual("WorkFax", tbc2.Contacts[i].Name);
            i++;
            Assert.AreEqual("*****@*****.**", tbc2.Contacts[i].Value);
            Assert.AreEqual("Internet", tbc2.Contacts[i].Name);
        }
Example #29
0
        public void TestParseToxyDocumentFromPDF()
        {
            string path   = TestDataSample.GetPdfPath("Sample1.PDF");
            var    parser = new PDFDocumentParser(new ParserContext(path));
            var    result = parser.Parse();

            Assert.AreEqual(1474, result.Paragraphs.Count);
            Assert.AreEqual("LA MARCHE MONDIALE DES FEMMES : UN MOUVEMENT IRRÉVERSIBLE", result.Paragraphs[0].Text);
            Assert.AreEqual("DOCUMENT PRÉPARATOIRE", result.Paragraphs[1].Text);
            Assert.AreEqual("e", result.Paragraphs[2].Text);    //this is the superscript 'e'
            Assert.AreEqual("4 Rencontre internationale de la Marche mondiale des femmes", result.Paragraphs[3].Text);
            Assert.AreEqual("du 18-22 Mars 2003", result.Paragraphs[4].Text);
        }
Example #30
0
        public void TestReadRTF_FormattedText()
        {
            string path   = TestDataSample.GetRTFPath("Formated text.rtf");
            var    parser = new RTFTextParser(new ParserContext(path));
            string result = parser.Parse();

            string[] lines = result.Replace("\r\n", "\n").Split('\n');
            Assert.AreEqual(lines.Length, 11);
            Assert.AreEqual("11111111111", lines[0]);
            Assert.AreEqual("22222222222", lines[1]);
            Assert.AreEqual("张三李四王五", lines[2]);
            Assert.AreEqual("RTF Sample , Author : yuans , contact : [email protected] , site : http://www.cnblogs.com/xdesigner .", lines[7]);
        }