public void TestCsvCollect() { var container = new DataContainer(); var collection = new DataCollection(container, "EMAILS-UTF8.CSV"); var processor = new CsvFileProcessor(); long rowCount; List <DataEntity> entities; using (var stream = File.Open("../../../tests/EMAILS-UTF8.CSV", FileMode.Open)) { entities = processor.ParseFileSchema(container, collection, stream, out rowCount); } var entity = entities.FirstOrDefault(x => x.Name.Equals("FROM_ADDR")); int index = entities.IndexOf(entity); using (var stream = File.Open("../../../tests/EMAILS-UTF8.CSV", FileMode.Open)) { var samples = processor.CollectSamples(container, collection, entity, index, stream, 5, 1); Assert.Contains("*****@*****.**", samples); } }
public void TestCsvEncodings() { var container = new DataContainer(); var collection1 = new DataCollection(container, "EMAILS-UTF8.CSV"); var collection2 = new DataCollection(container, "EMAILS-UTF16.CSV"); var emails = new string[] { "*****@*****.**", "*****@*****.**", "*****@*****.**" }; var processor = new CsvFileProcessor(); long rowCount; List <DataEntity> entities; using (var stream = File.Open("../../../tests/EMAILS-UTF8.CSV", FileMode.Open)) { entities = processor.ParseFileSchema(container, collection1, stream, out rowCount); } var entity = entities.FirstOrDefault(x => x.Name.Equals("FROM_ADDR")); int index = entities.IndexOf(entity); List <string> samples1; List <string> samples2; using (var stream = File.Open("../../../tests/EMAILS-UTF8.CSV", FileMode.Open)) { samples1 = processor.CollectSamples(container, collection1, entity, index, stream, emails.Length, 1); } using (var stream = File.Open("../../../tests/EMAILS-UTF16.CSV", FileMode.Open)) { samples2 = processor.CollectSamples(container, collection2, entity, index, stream, emails.Length, 1); } Assert.Equal(emails.Length, samples1.Count); Assert.Equal(emails.Length, samples2.Count); for (int i = 0; i < emails.Length; i++) { Assert.Equal(emails[i], samples1[i]); Assert.Equal(emails[i], samples2[i]); } }