public CsvRecordIterator(CsvDataSource datasource, CsvReader reader) { _reader = reader; _builder = new RecordBuilder(datasource); // index here is random 0-n. index[0] gives the column no in the CSV // file, while colname[0] gives the corresponding column name. int columnSize = datasource.GetColumns().Count; _index = new int[columnSize]; _column = new Column[columnSize]; // skip the required number of lines before getting to the data for (int ix = 0; ix < datasource.SkipLines; ix++) { _reader.Next(); } // learn column indexes from header line (if there is one) String[] header; if (datasource.HasHeader) { header = _reader.Next(); } else { // find highest column number int high = datasource.GetColumns().Select(c => Int32.Parse(c.GetName())).Concat(new[] { 0 }).Max(); // build corresponding index header = new string[high]; for (int ix = 0; ix < high; ix++) { header[ix] = "" + (ix + 1); } } // build the 'index' and 'column' indexes int count = 0; foreach (var column in datasource.GetColumns()) { for (int ix = 0; ix < header.Length; ix++) { if (header[ix].Equals(column.GetName())) { _index[count] = ix; _column[count++] = column; break; } } } FindNextRecord(); }
public CsvRecordIterator(CsvDataSource datasource, CsvReader reader) { _reader = reader; _builder = new RecordBuilder(datasource); // index here is random 0-n. index[0] gives the column no in the CSV // file, while colname[0] gives the corresponding column name. int columnSize = datasource.GetColumns().Count; _index = new int[columnSize]; _column = new Column[columnSize]; // skip the required number of lines before getting to the data for (int ix = 0; ix < datasource.SkipLines; ix++) { _reader.Next(); } // learn column indexes from header line (if there is one) String[] header; if (datasource.HasHeader) { header = _reader.Next(); } else { // find highest column number int high = datasource.GetColumns().Select(c => Int32.Parse(c.GetName())).Concat(new[] {0}).Max(); // build corresponding index header = new string[high]; for (int ix = 0; ix < high; ix++) { header[ix] = "" + (ix + 1); } } // build the 'index' and 'column' indexes int count = 0; foreach (var column in datasource.GetColumns()) { for (int ix = 0; ix < header.Length; ix++) { if (header[ix].Equals(column.GetName())) { _index[count] = ix; _column[count++] = column; break; } } } FindNextRecord(); }
//Note that if file starts with 'classpath:' the resource is looked // up on the classpath instead. public static Configuration Load(string file) { var cfg = new Configuration(); var properties = new List<Property>(); // Get the appropriate nodes using Linq to XML XElement xml = XElement.Load(file); // Get the threshold double threshold = xml.Elements("schema").Descendants("threshold").Select(x => double.Parse(x.Value)).FirstOrDefault(); cfg.Threshold = threshold; // Get all of the properties IEnumerable<XElement> xmlProperties = from s in xml.Elements("schema") from p in s.Descendants("property") select p; foreach (XElement xElement in xmlProperties) { string propName = xElement.Descendants("name").First().Value; var property = new Property(propName); // Check to see if this is an id property XAttribute xAttribute = xElement.Attribute("type"); if (xAttribute != null) { string id = xAttribute.Value; if (id != null && id == "id") { property.IsIdProperty = true; } } else { string comparatorName = xElement.Descendants("comparator").FirstOrDefault().Value; property.Comparator = GetComparatorFromString(comparatorName); property.LowProbability = xElement.Descendants("low").Select(x => double.Parse(x.Value)).FirstOrDefault(); property.HighProbability = xElement.Descendants("high").Select(x => double.Parse(x.Value)).FirstOrDefault(); properties.Add(property); } } cfg.SetProperties(properties); //// Get the datasources //XPathNodeIterator dsi = xpn.Select("/duke/*[not(self::schema)]"); //while (dsi.MoveNext()) //{ // if (dsi.Current != null && xpi.Current.Name == "csv") // { // var datasource = GetCsvDataSourceFromXml(dsi, xpn); // } //} IEnumerable<XElement> dataSources = from d in xml.Elements() where d.Name != "schema" select d; foreach (XElement dataSource in dataSources) { if (dataSource.Name == "csv") { var csvDs = new CsvDataSource(); Hashtable csvParams = GetParametersTable(dataSource); csvDs.File = csvParams["input-file"].ToString(); if (csvParams.Contains("header-line")) csvDs.HasHeader = (csvParams["header-line"].ToString().ToLower() == "true"); if (csvParams.Contains("skip-lines")) { int skipLines = 0; csvDs.SkipLines = Int32.TryParse(csvParams["skip-lines"].ToString(), out skipLines) ? skipLines : 0; } csvDs.FileEncoding = csvParams.Contains("encoding") ? GetTextEncodingFromString(csvParams["encoding"].ToString()) : Encoding.Default; List<Column> cols = GetDataSourceColumns(dataSource); foreach (Column column in cols) { csvDs.AddColumn(column); } cfg.AddDataSource(0, csvDs); } } return cfg; }