public DataTable ExtractTable(IFormat format) { PathSeriesFormat pathSeriesFormat = format as PathSeriesFormat; PathTableFormat pathTableFormat = format as PathTableFormat; if (pathSeriesFormat != null) { var htmlSettings = new HtmlExtractionSettings(); htmlSettings.ExtractLinkUrl = pathSeriesFormat.ExtractLinkUrl; var result = Content.ExtractTable(HtmlPath.Parse(pathSeriesFormat.Path), pathSeriesFormat.ToExtractionSettings(), htmlSettings); if (!result.Success) { throw new Exception("Failed to extract table from document: " + result.FailureReason); } return(pathSeriesFormat.ToFormattedTable(result.Value)); } else if (pathTableFormat != null) { var result = Content.ExtractTable(HtmlPath.Parse(pathTableFormat.Path), true); if (!result.Success) { throw new Exception("Failed to extract table from document: " + result.FailureReason); } return(pathTableFormat.ToFormattedTable(result.Value)); } else if (format is PathSingleValueFormat) { var f = (PathSingleValueFormat)format; var str = Content.GetTextByPath(HtmlPath.Parse(f.Path)); var value = f.ValueFormat.Convert(str); // XXX: this is really ugly - i have to create a table just to satisfy the interface :( return(CreateTableForScalar(f.ValueFormat.Type, value)); } else { throw new NotSupportedException("Format not supported for Html documents: " + format.GetType()); } }
private DataTable Parse(PathSeriesFormat format, string file) { var rawTable = CsvReader.Read(file, ";"); return(format.ToFormattedTable(rawTable)); }