public void BaseTestWithHeaderRow(string filename) { ParserContext context = new ParserContext(TestDataSample.GetExcelPath(filename)); ISpreadsheetParser parser = ParserFactory.CreateSpreadsheet(context); parser.Context.Properties.Add("HasHeader", "1"); ToxySpreadsheet ss = parser.Parse(); Assert.AreEqual(1, ss.Tables[0].HeaderRows.Count); Assert.AreEqual("A", ss.Tables[0].HeaderRows[0].Cells[0].Value); Assert.AreEqual("B", ss.Tables[0].HeaderRows[0].Cells[1].Value); Assert.AreEqual("C", ss.Tables[0].HeaderRows[0].Cells[2].Value); Assert.AreEqual("D", ss.Tables[0].HeaderRows[0].Cells[3].Value); Assert.AreEqual(3, ss.Tables[0].Rows.Count); Assert.AreEqual("1", ss.Tables[0].Rows[0].Cells[0].Value); Assert.AreEqual("2", ss.Tables[0].Rows[0].Cells[1].Value); Assert.AreEqual("3", ss.Tables[0].Rows[0].Cells[2].Value); Assert.AreEqual("4", ss.Tables[0].Rows[0].Cells[3].Value); Assert.AreEqual("A1", ss.Tables[0].Rows[1].Cells[0].Value); Assert.AreEqual("A2", ss.Tables[0].Rows[1].Cells[1].Value); Assert.AreEqual("A3", ss.Tables[0].Rows[1].Cells[2].Value); Assert.AreEqual("A4", ss.Tables[0].Rows[1].Cells[3].Value); Assert.AreEqual("B1", ss.Tables[0].Rows[2].Cells[0].Value); Assert.AreEqual("B2", ss.Tables[0].Rows[2].Cells[1].Value); Assert.AreEqual("B3", ss.Tables[0].Rows[2].Cells[2].Value); Assert.AreEqual("B4", ss.Tables[0].Rows[2].Cells[3].Value); }
public void TestParseLineEvent() { string path = TestDataSample.GetTextPath("utf8.txt"); ParserContext context = new ParserContext(path); PlainTextParser parser = (PlainTextParser)ParserFactory.CreateText(context); parser.ParseLine += (sender, args) => { if (args.LineNumber == 0) { Assert.AreEqual("hello world", args.Text); } else if (args.LineNumber == 1) { Assert.AreEqual("a2", args.Text); } else if (args.LineNumber == 2) { Assert.AreEqual("a3", args.Text); } else if (args.LineNumber == 3) { Assert.AreEqual("bbb4", args.Text); } }; string text = parser.Parse(); Assert.IsNotNullOrEmpty(text); }
public void TestParseHtml() { string path = Path.GetFullPath(TestDataSample.GetHtmlPath("mshome.html")); ParserContext context = new ParserContext(path); IDomParser parser = (IDomParser)ParserFactory.CreateDom(context); ToxyDom toxyDom = parser.Parse(); List <ToxyNode> metaNodeList = toxyDom.Root.SelectNodes("//meta"); Assert.AreEqual(7, metaNodeList.Count); ToxyNode aNode = toxyDom.Root.SingleSelect("//a"); Assert.AreEqual(1, aNode.Attributes.Count); Assert.AreEqual("href", aNode.Attributes[0].Name); Assert.AreEqual("http://www.microsoft.com/en/us/default.aspx?redir=true", aNode.Attributes[0].Value); ToxyNode titleNode = toxyDom.Root.ChildrenNodes[0].ChildrenNodes[0].ChildrenNodes[0]; Assert.AreEqual("title", titleNode.Name); Assert.AreEqual("Microsoft Corporation", titleNode.ChildrenNodes[0].InnerText); ToxyNode metaNode = toxyDom.Root.ChildrenNodes[0].ChildrenNodes[0].ChildrenNodes[7]; Assert.AreEqual("meta", metaNode.Name); Assert.AreEqual(3, metaNode.Attributes.Count); Assert.AreEqual("name", metaNode.Attributes[0].Name); Assert.AreEqual("SearchDescription", metaNode.Attributes[0].Value); Assert.AreEqual("scheme", metaNode.Attributes[2].Name); Assert.AreEqual(string.Empty, metaNode.Attributes[2].Value); }
public void TestReadExcelAndConvertToDataSet() { ParserContext c = new ParserContext(TestDataSample.GetExcelPath("Employee.xls")); var parser = ParserFactory.CreateSpreadsheet(c); var spreadsheet = parser.Parse(); DataSet ds = spreadsheet.ToDataSet(); Assert.AreEqual(3, ds.Tables.Count); Assert.AreEqual("Sheet1", ds.Tables[0].TableName); Assert.AreEqual("Sheet2", ds.Tables[1].TableName); Assert.AreEqual("Sheet3", ds.Tables[2].TableName); var s1 = ds.Tables[0]; Assert.AreEqual(System.DBNull.Value, s1.Rows[0][0]); Assert.AreEqual(System.DBNull.Value, s1.Rows[0][1]); Assert.AreEqual(System.DBNull.Value, s1.Rows[0][2]); Assert.AreEqual("Employee Info", s1.Rows[1][1]); Assert.AreEqual("Last name:", s1.Rows[3][1]); Assert.AreEqual("lastName", s1.Rows[3][2]); Assert.AreEqual("First name:", s1.Rows[4][1]); Assert.AreEqual("firstName", s1.Rows[4][2]); Assert.AreEqual("SSN:", s1.Rows[5][1]); Assert.AreEqual("ssn", s1.Rows[5][2]); }
public void TestParsePlainTextFromPDF() { string path = TestDataSample.GetPdfPath("Sample1.PDF"); var parser = new PDFTextParser(new ParserContext(path)); string result = parser.Parse(); Assert.IsTrue(result.StartsWith("LA MARCHE")); }
public void TestReadRTF_Html() { string path = TestDataSample.GetRTFPath("htmlrtf2.rtf"); var parser = new RTFTextParser(new ParserContext(path)); string result = parser.Parse(); Assert.IsNotNullOrEmpty(result); }
public void TestReadBigPDFFile() { string path = TestDataSample.GetPdfPath("Word97-2007BinaryFileFormat(doc)Specification.pdf"); var parser = new PDFTextParser(new ParserContext(path)); string result = parser.Parse(); Assert.IsTrue(true); }
public void TestParseToxyDocumentFromPDF() { string path = TestDataSample.GetPdfPath("Sample1.PDF"); var parser = new PDFDocumentParser(new ParserContext(path)); var result = parser.Parse(); Assert.AreEqual(1474, result.Paragraphs.Count); Assert.AreEqual("LA MARCHE MONDIALE DES FEMMES : UN MOUVEMENT IRRÉVERSIBLE", result.Paragraphs[0].Text); }
public void TestParseOgg() { string path = Path.GetFullPath(TestDataSample.GetAudioPath("sample.ogg")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(15, x.Count); }
public void TestParseMp3_Id3v1Only() { string path = Path.GetFullPath(TestDataSample.GetAudioPath("sample_v1_only.mp3")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(11, x.Count); }
public void TestExcel2003TextParser() { ParserContext context = new ParserContext(TestDataSample.GetExcelPath("Employee.xls")); ITextParser parser = ParserFactory.CreateText(context); string result = parser.Parse(); Assert.IsNotNull(result); Assert.IsTrue(result.IndexOf("Last name") > 0); Assert.IsTrue(result.IndexOf("First name") > 0); }
public void TestReadWholeText() { string path = TestDataSample.GetTextPath("utf8.txt"); ParserContext context = new ParserContext(path); ITextParser parser = ParserFactory.CreateText(context); string text = parser.Parse(); Assert.AreEqual("hello world" + Environment.NewLine + "a2" + Environment.NewLine + "a3" + Environment.NewLine + "bbb4" + Environment.NewLine, text); }
public void PureTextMsg_ReadTextTest() { string path = TestDataSample.GetEmailPath("raw text mail demo.msg"); ParserContext context = new ParserContext(path); var parser = ParserFactory.CreateText(context); string result = parser.Parse(); Assert.IsNotNullOrEmpty(result); }
public void TestPowerPoint() { string path = Path.GetFullPath(TestDataSample.GetOLE2Path("Test_Humor-Generation.ppt")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(8, x.Count); Assert.AreEqual("Funny Factory", x.Get("Title").Value); }
public void TestExcelFile() { string path = Path.GetFullPath(TestDataSample.GetExcelPath("comments.xls")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(8, x.Count); Assert.AreEqual("Microsoft Excel", x.Get("ApplicationName").Value); }
public void TestParseDirectoryFromZip() { ParserContext context = new ParserContext(TestDataSample.GetFilePath("toxy.zip", null)); ITextParser parser = ParserFactory.CreateText(context); string list = parser.Parse(); Assert.IsNotNull(list); string[] lines = list.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); Assert.AreEqual(68, lines.Length); }
public void TestParsePlainTextFromSample5() { string path = TestDataSample.GetPdfPath("Sample5.PDF"); var parser = new PDFTextParser(new ParserContext(path)); string result = parser.Parse(); string[] results = result.Split('\n'); Assert.AreEqual("License income by market (%)", results[0]); Assert.AreEqual("Philadelphia, Atlanta, Dallas, San Diego, and New", results[1]); }
public void TestCorelDrawFile() { string path = Path.GetFullPath(TestDataSample.GetOLE2Path("TestCorel.shw")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(6, x.Count); Assert.AreEqual("thorsteb", x.Get("Author").Value); Assert.AreEqual("thorsteb", x.Get("LastAuthor").Value); }
public void TestForeignNames() { string path = TestDataSample.GetVCardPath("PalmAgentSamples.vcf"); ParserContext context = new ParserContext(path); VCardDocumentParser parser = new VCardDocumentParser(context); var source = parser.Parse(); Assert.AreEqual(20, source.Cards.Count); Assert.AreEqual("John Doe4", source.Cards[10].Name.FullName); }
public void TestExcel2007TextParserWithoutSheetNames() { ParserContext context = new ParserContext(TestDataSample.GetExcelPath("WithVariousData.xlsx")); context.Properties.Add("IncludeSheetNames", "0"); ITextParser parser = ParserFactory.CreateText(context); string result = parser.Parse(); Assert.IsNotNull(result); Assert.IsTrue(result.IndexOf("Sheet1") < 0); }
public void TestExcel2007TextParserWithHeaderFooter() { ParserContext context = new ParserContext(TestDataSample.GetExcelPath("WithVariousData.xlsx")); context.Properties.Add("IncludeHeaderFooter", "1"); ITextParser parser = ParserFactory.CreateText(context); string result = parser.Parse(); Assert.IsNotNull(result); Assert.IsTrue(result.IndexOf("This is the header") > 0); }
public void TestPptx() { string path = Path.GetFullPath(TestDataSample.GetOOXMLPath("SampleShow.pptx")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(8, x.Count); Assert.AreEqual("SlideShow Sample", x.Get("Title").Value); Assert.AreEqual("Microsoft Office PowerPoint", x.Get("Application").Value); }
public void TestDocx() { string path = Path.GetFullPath(TestDataSample.GetOOXMLPath("MultipleCoreProperties.docx")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(12, x.Count); Assert.AreEqual("Format", x.Get("Title").Value); Assert.AreEqual("Microsoft Macintosh Word", x.Get("Application").Value); }
public void TestSolidWorksFile() { string path = Path.GetFullPath(TestDataSample.GetOLE2Path("TestSolidWorks.sldprt")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(10, x.Count); Assert.AreEqual("{F29F85E0-4FF9-1068-AB91-08002B27B3D9}", x.Get("ClassID").Value); Assert.AreEqual("scj", x.Get("LastAuthor").Value); }
public void BaseTestWithoutHeader(string filename) { ParserContext context = new ParserContext(TestDataSample.GetExcelPath(filename)); ISpreadsheetParser parser = ParserFactory.CreateSpreadsheet(context); ToxySpreadsheet ss = parser.Parse(); Assert.IsNull(ss.Tables[0].PageHeader); Assert.AreEqual(0, ss.Tables[0].HeaderRows.Count); Assert.AreEqual(9, ss.Tables[0].Rows.Count); }
public void TestXlsx() { string path = Path.GetFullPath(TestDataSample.GetOOXMLPath("sample.xlsx")); ParserContext context = new ParserContext(path); IMetadataParser parser = (IMetadataParser)ParserFactory.CreateMetadata(context); ToxyMetadata x = parser.Parse(); Assert.AreEqual(4, x.Count); Assert.AreEqual("Microsoft Excel", x.Get("Application").Value); Assert.AreEqual("12.0000", x.Get("AppVersion").Value); }
public void TestParsePlainTextFromSample1() { string path = TestDataSample.GetPdfPath("Sample1.PDF"); var parser = new PDFTextParser(new ParserContext(path)); string result = parser.Parse(); Assert.IsTrue(result.StartsWith("LA MARCHE")); ContainText(result, "Toute discussion stratégique sur nos actions nécessite un rappel de ce que nous avons fait en"); ContainText(result, "l’an 2000 et depuis. Au niveau mondial, en l’an 2000, nous avons mené une campagne de"); ContainText(result, "Une structure pour nous amener à 2005"); ContainText(result, "Lors de la 4e rencontre qui aura lieu en Inde, nous avons deux objectifs majeurs"); }
public void TestRead2Cards() { string path = TestDataSample.GetVCardPath("RfcAuthors.vcf"); ParserContext context = new ParserContext(path); VCardDocumentParser parser = new VCardDocumentParser(context); var cards = parser.Parse(); Assert.AreEqual(2, cards.Cards.Count); ToxyBusinessCard tbc1 = cards.Cards[0]; Assert.AreEqual("Frank Dawson", tbc1.Name.FullName); Assert.AreEqual(1, tbc1.Addresses.Count); Assert.AreEqual(5, tbc1.Contacts.Count); Assert.AreEqual("6544 Battleford Drive;Raleigh;NC;27613-3502;U.S.A.", tbc1.Addresses[0].ToString()); int i = 0; Assert.AreEqual("+1-919-676-9515", tbc1.Contacts[i].Value); Assert.AreEqual("MessagingService, WorkVoice", tbc1.Contacts[i].Name); i++; Assert.AreEqual("+1-919-676-9564", tbc1.Contacts[i].Value); Assert.AreEqual("WorkFax", tbc1.Contacts[i].Name); i++; Assert.AreEqual("*****@*****.**", tbc1.Contacts[i].Value); Assert.AreEqual("Internet", tbc1.Contacts[i].Name); i++; Assert.AreEqual("*****@*****.**", tbc1.Contacts[i].Value); Assert.AreEqual("Internet", tbc1.Contacts[i].Name); i++; Assert.AreEqual("http://home.earthlink.net/~fdawson", tbc1.Contacts[i].Value); Assert.AreEqual("Url-Default", tbc1.Contacts[i].Name); Assert.AreEqual("Lotus Development Corporation", tbc1.Orgnization); //2ed guy ToxyBusinessCard tbc2 = cards.Cards[1]; Assert.AreEqual("Tim Howes", tbc2.Name.FullName); Assert.AreEqual("Netscape Communications Corp.", tbc2.Orgnization); Assert.AreEqual(1, tbc2.Addresses.Count); Assert.AreEqual(3, tbc2.Contacts.Count); Assert.AreEqual("501 E. Middlefield Rd.;Mountain View;CA;94043;U.S.A.", tbc2.Addresses[0].ToString()); i = 0; Assert.AreEqual("+1-415-937-3419", tbc2.Contacts[i].Value); Assert.AreEqual("MessagingService, WorkVoice", tbc2.Contacts[i].Name); i++; Assert.AreEqual("+1-415-528-4164", tbc2.Contacts[i].Value); Assert.AreEqual("WorkFax", tbc2.Contacts[i].Name); i++; Assert.AreEqual("*****@*****.**", tbc2.Contacts[i].Value); Assert.AreEqual("Internet", tbc2.Contacts[i].Name); }
public void TestParseToxyDocumentFromPDF() { string path = TestDataSample.GetPdfPath("Sample1.PDF"); var parser = new PDFDocumentParser(new ParserContext(path)); var result = parser.Parse(); Assert.AreEqual(1474, result.Paragraphs.Count); Assert.AreEqual("LA MARCHE MONDIALE DES FEMMES : UN MOUVEMENT IRRÉVERSIBLE", result.Paragraphs[0].Text); Assert.AreEqual("DOCUMENT PRÉPARATOIRE", result.Paragraphs[1].Text); Assert.AreEqual("e", result.Paragraphs[2].Text); //this is the superscript 'e' Assert.AreEqual("4 Rencontre internationale de la Marche mondiale des femmes", result.Paragraphs[3].Text); Assert.AreEqual("du 18-22 Mars 2003", result.Paragraphs[4].Text); }
public void TestReadRTF_FormattedText() { string path = TestDataSample.GetRTFPath("Formated text.rtf"); var parser = new RTFTextParser(new ParserContext(path)); string result = parser.Parse(); string[] lines = result.Replace("\r\n", "\n").Split('\n'); Assert.AreEqual(lines.Length, 11); Assert.AreEqual("11111111111", lines[0]); Assert.AreEqual("22222222222", lines[1]); Assert.AreEqual("张三李四王五", lines[2]); Assert.AreEqual("RTF Sample , Author : yuans , contact : [email protected] , site : http://www.cnblogs.com/xdesigner .", lines[7]); }