void Process(SgmlReader reader, string uri, bool loadAsStream) { if (uri == null) { reader.InputStream = Console.In; } else if (loadAsStream) { Uri location = new Uri(uri); if (location.IsFile) { reader.InputStream = new StreamReader(uri); } else { WebRequest wr = WebRequest.Create(location); reader.InputStream = new StreamReader(wr.GetResponse().GetResponseStream()); } } else { reader.Href = uri; } if (debug) { Debug(reader); reader.Close(); return; } if (crawl) { StartCrawl(reader, uri, basify); return; } if (this.encoding == null) { this.encoding = reader.GetEncoding(); } XmlTextWriter w = null; if (output != null) { w = new XmlTextWriter(output, this.encoding); } else { w = new XmlTextWriter(Console.Out); } if (formatted) w.Formatting = Formatting.Indented; if (!noxmldecl) { w.WriteStartDocument(); } if (testdoc) { XmlDocument doc = new XmlDocument(); try { doc.Load(reader); doc.WriteTo(w); } catch (XmlException e) { Console.WriteLine("Error:" + e.Message); Console.WriteLine("at line " + e.LineNumber + " column " + e.LinePosition); } } else { reader.Read(); while (!reader.EOF) { w.WriteNode(reader, true); } } w.Flush(); w.Close(); }
// Creates XmlDocument from html content and return it with rootitem "<root>". public static XmlDocument ParseHtml(string sContent) { StringReader sr = new StringReader("<root>" + sContent + "</root>"); SgmlReader reader = new SgmlReader(); reader.WhitespaceHandling = WhitespaceHandling.All; reader.CaseFolding = Sgml.CaseFolding.ToLower; reader.InputStream = sr; StringWriter sw = new StringWriter(); XmlTextWriter w = new XmlTextWriter(sw); w.Formatting = Formatting.Indented; w.WriteStartDocument(); reader.Read(); while (!reader.EOF) { w.WriteNode(reader, true); } w.Flush(); w.Close(); sw.Flush(); // create document XmlDocument doc = new XmlDocument(); doc.PreserveWhitespace = true; doc.XmlResolver = null; doc.LoadXml(sw.ToString()); reader.Close(); return(doc); }
/// <summary> /// 处理html代码 /// </summary> /// <param name="input">等处理的字符串</param> /// <param name="skipHtmlNode">是否跳过html节点</param> /// <param name="clearTag">是否清除html tag,只输出纯文本</param> /// <param name="maxCount">copy的文本的字符数,如果maxCount<=0,copy全部文本</param> /// <param name="endStr">如果只copy了部分文本,部分文本后的附加字符,如...</param> /// <returns>处理后的html代码</returns> public static string ProcessHtml(string input, bool skipHtmlNode, bool clearTag, int maxCount, string endStr) { if (string.IsNullOrEmpty(input)) { return(input); } StringWriter output = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(output); writer.Formatting = Formatting.Indented; SgmlReader reader = new SgmlReader(); reader.DocType = "HTML"; reader.InputStream = new StringReader(input); WriteXml(writer, reader, true, skipHtmlNode, clearTag, maxCount, endStr); writer.Flush(); writer.Close(); reader.Close(); return(output.ToString()); }
public static string GetWellFormedHTML(string html, string xpathNavPath) { // StreamReader sReader = null; StringWriter sw = null; SgmlReader reader = null; XmlTextWriter writer = null; try { // if (uri == String.Empty) uri = "http://www.XMLforASP.NET"; // HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri); // HttpWebResponse res = (HttpWebResponse)req.GetResponse(); // sReader = new StreamReader(res.GetResponseStream()); reader = new SgmlReader(); reader.DocType = "HTML"; reader.InputStream = new StringReader(html); sw = new StringWriter(); writer = new XmlTextWriter(sw); writer.Formatting = Formatting.Indented; //writer.WriteStartElement("Test"); while (reader.Read()) { if (reader.NodeType != XmlNodeType.Whitespace) { writer.WriteNode(reader, true); } } //writer.WriteEndElement(); if (xpathNavPath == null) { string sr = sw.ToString(); sr = sr.Replace("\r", "\n"); sr = sr.Replace("\n\n", "\n"); return(sr); } else { //Filter out nodes from HTML StringBuilder sb = new StringBuilder(); XPathDocument doc = new XPathDocument(new StringReader(sw.ToString())); XPathNavigator nav = doc.CreateNavigator(); XPathNodeIterator nodes = nav.Select(xpathNavPath); while (nodes.MoveNext()) { sb.Append(nodes.Current.Value + "\n"); } string sr = sb.ToString(); sr = sr.Replace("\r", "\n"); sr = sr.Replace("\n\n", "\n"); return(sr); } } catch (Exception exp) { writer.Close(); reader.Close(); sw.Close(); // sReader.Close(); return(exp.Message); } }
/// <summary> /// 获取xml中的数据 根据 /// </summary> /// <param name="htmlString"></param> /// <param name="xpath"></param> /// <returns></returns> public static string GetWellFormedHTML(string htmlString, string xpath) { if (htmlString.Trim().Length < 10) { return(""); } htmlString = htmlString.Replace("xmlns", "buyao"); StringWriter sw = null; SgmlReader reader = null; XmlTextWriter writer = null; try { reader = new SgmlReader(); reader.DocType = "HTML"; reader.InputStream = new StringReader(htmlString); sw = new StringWriter(); writer = new XmlTextWriter(sw); writer.Formatting = Formatting.Indented; writer.WriteStartDocument(); while (reader.Read()) { if (reader.NodeType != XmlNodeType.Whitespace) { try { //如果出错 抛弃此节点 writer.WriteNode(reader, true); } catch (Exception e) { } } } if (xpath == null) { return(sw.ToString()); } else { StringBuilder sb = new StringBuilder(); XPathDocument doc = new XPathDocument(new StringReader(sw.ToString())); XPathNavigator nav = doc.CreateNavigator(); XPathNodeIterator nodes = nav.Select(xpath); while (nodes.MoveNext()) { sb.Append(nodes.Current.OuterXml + " "); } return(sb.ToString()); } } catch (Exception exp) { writer.Close(); reader.Close(); sw.Close(); return(""); } }
private ResultInfo getDetail(string backstring) { SgmlReader reader = new SgmlReader(); reader.DocType = "HTML"; reader.InputStream = new StringReader(backstring); StringWriter sw = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(sw); reader.WhitespaceHandling = WhitespaceHandling.None; writer.Formatting = Formatting.Indented; while (reader.Read()) { if (reader.NodeType != XmlNodeType.Whitespace) { writer.WriteNode(reader, true); } } XmlDocument doc = new XmlDocument(); doc.Load(new StringReader(sw.ToString())); XmlNamespaceManager xnm = new XmlNamespaceManager(doc.NameTable); xnm.AddNamespace("bottum", "http://www.w3.org/1999/xhtml"); XPathNavigator nav = doc.CreateNavigator(); /////////////////根据网页返回结果分析 string xpath = "//bottum:table[@id='ctl00_ContentPlaceHolder1_TrackDetail']/bottum:tr/bottum:td/bottum:div[8]/bottum:table/bottum:tr/bottum:td"; XPathNodeIterator nodes = nav.Select(xpath, xnm);//xpath表达式 ResultInfo backinfo = new ResultInfo(querynum); if (nodes.Count > 3) { nodes.MoveNext(); nodes.MoveNext(); nodes.MoveNext(); } for (int i = 1; i < nodes.Count / 3; i++) { nodes.MoveNext(); string time = nodes.Current.Value; nodes.MoveNext(); nodes.MoveNext(); string state = nodes.Current.Value; backinfo.add(time, state); } reader.Close(); writer.Close(); sw.Close(); return(backinfo); }
public ResultInfo getDetail(string backstring) { SgmlReader reader = new SgmlReader(); reader.DocType = "HTML"; reader.InputStream = new StringReader(backstring); StringWriter sw = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(sw); reader.WhitespaceHandling = WhitespaceHandling.None; writer.Formatting = Formatting.Indented; while (reader.Read()) { if (reader.NodeType != XmlNodeType.Whitespace) { writer.WriteNode(reader, true); } } XmlDocument doc = new XmlDocument(); doc.Load(new StringReader(sw.ToString())); reader.Close(); writer.Close(); sw.Close(); XmlNamespaceManager xnm = new XmlNamespaceManager(doc.NameTable); xnm.AddNamespace("bottum", "http://www.w3.org/1999/xhtml"); XPathNavigator nav = doc.CreateNavigator(); string xpath = "//bottum:table[@id='GridView1']/bottum:tr/bottum:td"; XPathNodeIterator nodes = nav.Select(xpath, xnm);//xpath表达式 if (nodes != null) { int count = nodes.Count; int k = count / 3; ResultInfo backinfo = new ResultInfo(queryNumber); for (int i = 0; i < k; i++) { nodes.MoveNext(); nodes.MoveNext(); string time = nodes.Current.Value; nodes.MoveNext(); string state = nodes.Current.Value; backinfo.add(time, state); } return(backinfo); } else { return(new ResultInfo(queryNumber)); } }
private ResultInfo getDetail(string backstring) { SgmlReader reader = new SgmlReader(); reader.DocType = "HTML"; reader.InputStream = new StringReader(backstring); StringWriter sw = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(sw); reader.WhitespaceHandling = WhitespaceHandling.None; writer.Formatting = Formatting.Indented; while (reader.Read()) { if (reader.NodeType != XmlNodeType.Whitespace) { writer.WriteNode(reader, true); } } XmlDocument doc = new XmlDocument(); doc.Load(new StringReader(sw.ToString())); XmlNamespaceManager xnm = new XmlNamespaceManager(doc.NameTable); XPathNavigator nav = doc.CreateNavigator(); /////////////////根据网页返回结果分析 string xpath = "//table[1]/tr/td"; string str = ""; XPathNodeIterator nodes = nav.Select(xpath, xnm);//xpath表达式 ResultInfo backinfo = new ResultInfo(querynum); if (nodes.Count >= 4) { nodes.MoveNext(); nodes.MoveNext(); nodes.MoveNext(); nodes.MoveNext(); } for (int i = 4; i < nodes.Count / 2; i++) { nodes.MoveNext(); string time = nodes.Current.Value; nodes.MoveNext(); string state = nodes.Current.Value; backinfo.add(time, state); nodes.MoveNext(); } reader.Close(); writer.Close(); sw.Close(); return(backinfo); }
public ResultInfo getDetail(string backstring) { backstring = backstring.Replace("xmlns=\"http://www.w3.org/1999/xhtml\"", ""); SgmlReader reader = new SgmlReader(); reader.DocType = "HTML"; reader.InputStream = new StringReader(backstring); StringWriter sw = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(sw); reader.WhitespaceHandling = WhitespaceHandling.None; writer.Formatting = Formatting.Indented; while (reader.Read()) { if (reader.NodeType != XmlNodeType.Whitespace) { writer.WriteNode(reader, true); } } XmlDocument doc = new XmlDocument(); doc.Load(new StringReader(sw.ToString())); XmlNamespaceManager xnm = new XmlNamespaceManager(doc.NameTable); xnm.AddNamespace("bottum", "http://www.w3.org/1999/xhtml"); XPathNavigator nav = doc.CreateNavigator(); string xpath = "/html/body/table[8]/tr/td"; XPathNodeIterator nodes = nav.Select(xpath, xnm);//xpath表达式 if (nodes.Count >= 2) { nodes.MoveNext(); nodes.MoveNext(); } ResultInfo backinfo = new ResultInfo(querynum); for (int i = 1; i < nodes.Count / 2; i++) { nodes.MoveNext(); string time = nodes.Current.Value; nodes.MoveNext(); string state = nodes.Current.Value; backinfo.add(time, state); } reader.Close(); writer.Close(); sw.Close(); return(backinfo); }
/*************************************************************************** * Useful debugging code... * **************************************************************************/ void StartCrawl(SgmlReader reader, string uri, bool basify) { Console.WriteLine("Loading '" + reader.BaseURI + "'"); XmlDocument doc = new XmlDocument(); try { doc.XmlResolver = null; // don't do any downloads! doc.Load(reader); } catch (Exception e) { Console.WriteLine("Error loading document\n" + e.Message); } reader.Close(); if (basify) { // html and head are option, if they are there use them otherwise not. XmlElement be = (XmlElement)doc.SelectSingleNode("//base"); if (be == null) { be = doc.CreateElement("base"); be.SetAttribute("href", doc.BaseURI); XmlElement head = (XmlElement)doc.SelectSingleNode("//head"); if (head != null) { head.InsertBefore(be, head.FirstChild); } else { XmlElement html = (XmlElement)doc.SelectSingleNode("//html"); if (html != null) { html.InsertBefore(be, html.FirstChild); } else { doc.DocumentElement.InsertBefore(be, doc.DocumentElement.FirstChild); } } } } try { Crawl(reader.Dtd, doc, reader.ErrorLog); } catch (Exception e) { Console.WriteLine("Uncaught exception: " + e.Message); } }
XmlDocument FetchXmlDocument(Uri url) { var sr = FetchWebText(url); var xr = new SgmlReader() { InputStream = sr }; var doc = new XmlDocument(); doc.Load(xr); sr.Close(); xr.Close(); return(doc); }
private void ParsePageText() { // DateTime startTime; // DateTime endTime; SgmlReader reader = new SgmlReader(); try { reader.InputStream = new StringReader(FixHtmlToAvoidParseErrors(pageText)); // startTime = DateTime.Now; reader.Dtd = ParseDtd(reader.NameTable); // note: this is last-found performance bottleneck; not yet fixed. Retest before fixing. // endTime = DateTime.Now; reader.ErrorLog = Console.Error; reader.DocType = "HTML"; document = new XhtmlDocument(reader.NameTable); try { document.Load(reader); } catch (WebException e) { throw new DoctypeDtdException(e); } ParseForms(); // totalParseTime += endTime - startTime; // Console.WriteLine("parser: " + totalParseTime); } catch (XmlException e) { Console.WriteLine("vvvvvv The following HTML could not be parsed by NUnitAsp vvvvvv"); Console.WriteLine(pageText); Console.WriteLine("^^^^^^ The preceding HTML could not be parsed by NUnitAsp ^^^^^^"); throw new ParseException("Could not parse HTML. See standard out for the HTML and use a validator (such as the one at validator.w3.org) to troubleshoot. Parser error was: " + e.Message); } finally { reader.Close(); } }
private ResultInfo getDetail(string backstring) { //////////////用sgml库分析网页,转换成xml文件 SgmlReader readern = new SgmlReader(); readern.DocType = "HTML"; readern.InputStream = new StringReader(backstring); StringWriter sw = new StringWriter(); XmlTextWriter writer = new XmlTextWriter(sw); readern.WhitespaceHandling = WhitespaceHandling.None; writer.Formatting = Formatting.Indented; while (!readern.EOF) { readern.Read(); if (readern.NodeType != XmlNodeType.Whitespace) { writer.WriteNode(readern, true); } } XmlDocument doc = new XmlDocument(); doc.Load(new StringReader(sw.ToString())); XmlNamespaceManager xnm = new XmlNamespaceManager(doc.NameTable); XPathNavigator nav = doc.CreateNavigator(); string xpath = "//div[@id='ess_ctr1579_TrackResult_DivBill']/table[2]/tr[@class='font_c']/td"; XPathNodeIterator nodes = nav.Select(xpath, xnm);//xpath表达式 ResultInfo backinfo = new ResultInfo(queryNumber); for (int i = 0; i < nodes.Count / 2; i++) { nodes.MoveNext(); string time = nodes.Current.Value; nodes.MoveNext(); string state = nodes.Current.Value; backinfo.add(time, state); } readern.Close(); writer.Close(); sw.Close(); return(backinfo); }
/// <summary> /// 转换成Xhtml /// </summary> /// <param name="html">html代码</param> /// <returns>Xhtml代码</returns> public static string ToXhtml(string html) { SgmlReader reader = new SgmlReader(); reader.CaseFolding = CaseFolding.ToLower; reader.DocType = "HTML"; reader.InputStream = new StringReader(html); StringWriter sw = new StringWriter(CultureInfo.InvariantCulture); XmlTextWriter writer = new XmlTextWriter(sw); writer.Formatting = Formatting.Indented; reader.WhitespaceHandling = WhitespaceHandling.None; while (!reader.EOF) { writer.WriteNode(reader, true); } reader.Close(); sw.Close(); writer.Close(); return(sw.ToString()); }
private void ParsePageText() { SgmlReader reader = new SgmlReader(); try { reader.InputStream = new StringReader(FixHtmlToAvoidParseErrors(pageText)); reader.Dtd = ParseDtd(reader.NameTable); // EP 02/01/07 - Don't output the errors, there are too many of them! //reader.ErrorLog = Console.Error; reader.DocType = "HTML"; document = new XhtmlDocument(reader.NameTable); try { document.Load(reader); } catch (WebException e) { throw new DoctypeDtdException(e); } ParseInitialFormValues(); } catch (XmlException e) { Console.WriteLine("vvvvvv The following HTML could not be parsed by NUnitAsp vvvvvv"); Console.WriteLine(pageText); Console.WriteLine("^^^^^^ The preceding HTML could not be parsed by NUnitAsp ^^^^^^"); throw new ParseException("Could not parse HTML. See standard out for the HTML and use a validator (such as the one at validator.w3.org) to troubleshoot. Parser error was: " + e.Message); } finally { reader.Close(); } }
/// <summary> /// Run the SgmlReader command line tool with the given command line arguments. /// </summary> /// <param name="args"></param> public void Run(string[] args) { SgmlReader reader = new SgmlReader(); string inputUri = null; for (int i = 0; i < args.Length; i++) { string arg = args[i]; if (arg[0] == '-' || arg[0] == '/') { switch (arg.Substring(1)) { case "e": string errorlog = args[++i]; if ("$stderr".Equals(errorlog, StringComparison.OrdinalIgnoreCase)) { reader.ErrorLog = Console.Error; } else { reader.ErrorLog = new StreamWriter(errorlog); } break; case "html": reader.DocType = "HTML"; break; case "dtd": reader.SystemLiteral = args[++i]; break; case "proxy": proxy = args[++i]; reader.WebProxy = new WebProxy(proxy); break; case "encoding": encoding = Encoding.GetEncoding(args[++i]); break; case "nobom": noUtf8Bom = true; break; case "f": formatted = true; reader.WhitespaceHandling = WhitespaceHandling.None; break; case "trimtext": reader.TextWhitespace = TextWhitespaceHandling.TrimBoth; break; case "noxml": noxmldecl = true; break; case "doctype": reader.StripDocType = false; break; case "lower": reader.CaseFolding = CaseFolding.ToLower; break; case "upper": reader.CaseFolding = CaseFolding.ToUpper; break; default: string exeName = Environment.GetCommandLineArgs()[0]; string exeVersion = typeof(CommandLine).Assembly.GetName().Version?.ToString(); Console.WriteLine("{0} - version {1}", exeName, exeVersion); Console.WriteLine(" https://github.com/lovettchris/SgmlReader"); Console.WriteLine(); Console.WriteLine("Usage: {0} <options> [InputUri] [OutputFile]", exeName); Console.WriteLine(); Console.WriteLine("<options>:"); Console.WriteLine(" -help Prints this list of command-line options"); Console.WriteLine(" -e log Optional log file name, name of '$STDERR' will write errors to stderr"); Console.WriteLine(" -f Whether to pretty print the output."); Console.WriteLine(" -html Specify the built in HTML dtd"); Console.WriteLine(" -dtd url Specify other SGML dtd to use"); Console.WriteLine(" -base Add base tag to output HTML"); Console.WriteLine(" -noxml Do not add XML declaration to the output"); Console.WriteLine(" -proxy svr:80 Proxy server to use for http requests"); Console.WriteLine(" -encoding name Specify an encoding for the output file (default UTF-8)"); Console.WriteLine(" -nobom Prevents output of the BOM when using UTF-8"); Console.WriteLine(" -f Produce indented formatted output"); Console.WriteLine(" -trimtext SGML `#text` nodes will be trimmed of outer whitespace"); Console.WriteLine(" -lower Convert input tags to lower case"); Console.WriteLine(" -upper Convert input tags to UPPER CASE"); Console.WriteLine(); Console.WriteLine(" InputUri The input file or http URL (defaults to stdin if not specified)"); Console.WriteLine(" Supports wildcards for local file names."); Console.WriteLine(" OutputFile Output file name (defaults to stdout if not specified)"); Console.WriteLine(" If input file contains wildcards then this just specifies the output file extension (default .xml)"); return; } } else { if (inputUri == null) { inputUri = arg; string ext = Path.GetExtension(arg).ToLower(); if (ext == ".htm" || ext == ".html") { reader.DocType = "HTML"; } } else if (output == null) { output = arg; } } } if (inputUri != null && !inputUri.StartsWith("http://") && inputUri.IndexOfAny(new char[] { '*', '?' }) >= 0) { // wild card processing of a directory of files. string path = Path.GetDirectoryName(inputUri); if (path == "") { path = ".\\"; } string ext = ".xml"; if (output != null) { ext = Path.GetExtension(output); } foreach (string uri in Directory.GetFiles(path, Path.GetFileName(inputUri))) { Console.WriteLine("Processing: " + uri); string file = Path.GetFileName(uri); output = Path.GetDirectoryName(uri) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(file) + ext; Process(reader, uri); reader.Close(); } return; } Process(reader, inputUri); reader.Close(); return; }
bool Crawl(SgmlDtd dtd, XmlDocument doc, TextWriter log) { depth++; StringBuilder indent = new StringBuilder(); for (int i = 0; i < depth; i++) indent.Append(" "); count++; Uri baseUri = new Uri(doc.BaseURI); XmlElement baseElmt = (XmlElement)doc.SelectSingleNode("/html/head/base"); if (baseElmt != null) { string href = baseElmt.GetAttribute("href"); if (href != "") { try { baseUri = new Uri(href); } catch (Exception ) { Console.WriteLine("### Error parsing BASE href '"+href+"'"); } } } foreach (XmlElement a in doc.SelectNodes("//a")) { string href = a.GetAttribute("href"); if (href != "" && href != null && depth<5) { Uri local = new Uri(baseUri, href); if (domain && baseUri.Host != local.Host) continue; string ext = Path.GetExtension(local.AbsolutePath).ToLower(); if (ext == ".jpg" || ext == ".gif" || ext==".mpg") continue; string url = local.AbsoluteUri; if (!visited.ContainsKey(url)) { visited.Add(url, url); log.WriteLine(indent+"Loading '"+url+"'"); log.Flush(); StreamReader stm = null; try { HttpWebRequest wr = (HttpWebRequest)WebRequest.Create(url); wr.Timeout = 10000; if (proxy != null) wr.Proxy = new WebProxy(proxy); wr.PreAuthenticate = false; // Pass the credentials of the process. wr.Credentials = CredentialCache.DefaultCredentials; WebResponse resp = wr.GetResponse(); Uri actual = resp.ResponseUri; if (actual.AbsoluteUri != url) { local = new Uri(actual.AbsoluteUri); log.WriteLine(indent+"Redirected to '"+actual.AbsoluteUri+"'"); log.Flush(); } if (resp.ContentType != "text/html") { log.WriteLine(indent+"Skipping ContentType="+resp.ContentType); log.Flush(); resp.Close(); } else { stm = new StreamReader(resp.GetResponseStream()); } } catch (Exception e) { log.WriteLine(indent+"### Error opening URL: " + e.Message); log.Flush(); } if (stm != null) { SgmlReader reader = new SgmlReader(); reader.Dtd = dtd; reader.SetBaseUri(local.AbsoluteUri); reader.InputStream = stm; reader.WebProxy = proxy; XmlDocument d2 = new XmlDocument(); d2.XmlResolver = null; // don't do any downloads! try { d2.Load(reader); reader.Close(); stm.Close(); if (!Crawl(dtd, d2, log)) return false; } catch (Exception e) { log.WriteLine(indent+"### Error parsing document '"+local.AbsoluteUri+"', "+e.Message); log.Flush(); reader.Close(); } } } } } depth--; return true; }
public void Run(string[] args) { SgmlReader reader = new SgmlReader(); string inputUri = null; for (int i = 0; i < args.Length; i++) { string arg = args[i]; if (arg[0] == '-' || arg[0] == '/') { switch (arg.Substring(1)) { case "e": string errorlog = args[++i]; if (errorlog.ToLower() == "$stderr") { reader.ErrorLog = Console.Error; } else { reader.ErrorLogFile = errorlog; } break; case "html": reader.DocType = "HTML"; break; case "dtd": reader.SystemLiteral = args[++i]; break; case "proxy": proxy = args[++i]; reader.WebProxy = proxy; break; case "encoding": encoding = Encoding.GetEncoding(args[++i]); break; case "f": formatted = true; reader.WhitespaceHandling = WhitespaceHandling.None; break; case "noxml": noxmldecl = true; break; case "doctype": reader.StripDocType = false; break; case "lower": reader.CaseFolding = CaseFolding.ToLower; break; case "upper": reader.CaseFolding = CaseFolding.ToUpper; break; default: Console.WriteLine("Usage: SgmlReader <options> [InputUri] [OutputFile]"); Console.WriteLine("-e log Optional log file name, name of '$STDERR' will write errors to stderr"); Console.WriteLine("-f Whether to pretty print the output."); Console.WriteLine("-html Specify the built in HTML dtd"); Console.WriteLine("-dtd url Specify other SGML dtd to use"); Console.WriteLine("-base Add base tag to output HTML"); Console.WriteLine("-noxml Do not add XML declaration to the output"); Console.WriteLine("-proxy svr:80 Proxy server to use for http requests"); Console.WriteLine("-encoding name Specify an encoding for the output file (default UTF-8)"); Console.WriteLine("-lower Convert input tags to lower case"); Console.WriteLine("-upper Convert input tags to upper case"); Console.WriteLine(); Console.WriteLine("InputUri The input file or http URL (default stdin). "); Console.WriteLine(" Supports wildcards for local file names."); Console.WriteLine("OutputFile Output file name (default stdout)"); Console.WriteLine(" If input file contains wildcards then this just specifies the output file extension (default .xml)"); return; } } else { if (inputUri == null) { inputUri = arg; string ext = Path.GetExtension(arg).ToLower(); if (ext == ".htm" || ext == ".html") { reader.DocType = "HTML"; } } else if (output == null) { output = arg; } } } if (inputUri != null && !inputUri.StartsWith("http://") && inputUri.IndexOfAny(new char[] { '*', '?' }) >= 0) { // wild card processing of a directory of files. string path = Path.GetDirectoryName(inputUri); if (path == "") { path = ".\\"; } string ext = ".xml"; if (output != null) { ext = Path.GetExtension(output); } foreach (string uri in Directory.GetFiles(path, Path.GetFileName(inputUri))) { Console.WriteLine("Processing: " + uri); string file = Path.GetFileName(uri); output = Path.GetDirectoryName(uri) + Path.DirectorySeparatorChar + Path.GetFileNameWithoutExtension(file) + ext; Process(reader, uri); reader.Close(); } return; } Process(reader, inputUri); reader.Close(); return; }