public override void ParseFile(string filename) { Console.Write("Found " + _name + " Catalog: " + filename + "..."); using (XmlSanitizingStream clean = new XmlSanitizingStream(File.OpenRead(filename))) { using (XmlReader reader = XmlReader.Create(clean, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore })) { Parallel.ForEach(reader.Products(), node => { RawProduct p = Parse(node); AddProduct(p); }); Console.WriteLine("done."); } } try { File.Move(filename, filename.Replace(".xml", ".done")); } catch { } }
private CatalogFolderModel ConvertToFolder(StringBuilder opdsSource, string url) { try { CatalogContentDto dto; using (var stringReader = new MemoryStream(Encoding.UTF8.GetBytes(opdsSource.ToString()))) { using (var sanitizingStream = new XmlSanitizingStream(stringReader)) { var xmlSerializer = new XmlSerializer(typeof(CatalogContentDto)); dto = (CatalogContentDto)xmlSerializer.Deserialize(sanitizingStream); } } var folder = dto.ToFolder(CatalogModel.Url, CatalogModel.Type, CatalogId); folder.BaseUrl = url; return(folder); } catch (InvalidOperationException exp) { if (ValidateForHtmlContent(exp)) { throw new WrongCatalogFormatException(exp.Message, url); } throw new ReadCatalogException("Unable convert OPDS data to folder", exp); } }
public static HtmlDocument GetHtmlDocument(string url) { HtmlDocument doc = null; int retries = 3; bool downOk = false; while (true) { try { var request = (HttpWebRequest)WebRequest.Create(url); request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"; request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); string xml; using (XmlSanitizingStream reader = new XmlSanitizingStream(response.GetResponseStream())) { xml = reader.ReadToEnd(); Uri uri = new Uri(url); xml = xml.Replace(" src=\"/", " src=\"http://" + uri.Host + "/"); } // xml contains no illegal characters doc = new HtmlDocument { OptionOutputAsXml = true, OptionFixNestedTags = true }; doc.LoadHtml(xml); downOk = true; break; } catch (Exception ex) { Logging?.Log("GetHtmlDocument ERROR", ex.Message); if (--retries == 0) { break; } else { System.Threading.Thread.Sleep(1000); } } } if (downOk) { return(doc); } return(null); }
public override void ParseFile(string filename) { // Create a reader to grab the header using (XmlSanitizingStream clean = new XmlSanitizingStream(File.OpenRead(filename))) { using (XmlReader reader = XmlReader.Create(clean, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore, })) { _storeName = reader.Header().Single().Element("merchantName").Value; _storeId = reader.Header().Single().Element("merchantId").Value; } } base.ParseFile(filename); }
public object DeserializeSanitized(string sourceFile, Type type, XmlElementEventHandler unknownElementHandler, bool deleteAfterDeserialization) { object result; using (var fileStream = File.OpenRead(sourceFile)) { using (var sanitizedStream = new XmlSanitizingStream(fileStream)) { result = Deserialize(sanitizedStream, type, unknownElementHandler, deleteAfterDeserialization); } } if (deleteAfterDeserialization) { File.Delete(sourceFile); } return(result); }
public override int Read() { // Read each character, skipping over characters that XML has prohibited int nextCharacter; do { // Read a character if ((nextCharacter = base.Read()) == EOF) { // If the character denotes the end of the file, stop reading break; } } // Skip the character if it's prohibited, and try the next while (!XmlSanitizingStream.IsLegalXmlChar(nextCharacter)); return(nextCharacter); }
public override int Peek() { // Return the next legl XML character without reading it int nextCharacter; do { // See what the next character is nextCharacter = base.Peek(); }while ( // If it's prohibited XML, skip over the character in the stream // and try the next. !XmlSanitizingStream.IsLegalXmlChar(nextCharacter) && (nextCharacter = base.Read()) != EOF ); return(nextCharacter); } // method
/// <summary> /// Supports RSS 1, 2 and ATOM 1.0 feed standards /// </summary> /// <param name="url"></param> /// <param name="timeout"></param> /// <returns></returns> internal SyndicationFeed GetFeed(Uri url, int timeout, NetworkCredential credentials) { SyndicationFeed feed = null; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.UserAgent = Configuration.UserAgent; request.Timeout = timeout; if (credentials != null) { request.Credentials = credentials; request.PreAuthenticate = true; } try { using (WebResponse response = request.GetResponse()) { XmlSanitizingStream stream = new XmlSanitizingStream(response.GetResponseStream()); var xml = stream.ReadToEnd(); using (RssXmlReader reader = new RssXmlReader(Flush(xml))) { if (Atom03FeedFormatter.CanReadFrom(reader)) { var aff = new Atom03FeedFormatter(); aff.ReadFrom(reader); feed = aff.Feed; } else if (Rss10FeedFormatter.CanReadFrom(reader)) { // RSS 1.0 var rff = new Rss10FeedFormatter(); rff.ReadFrom(reader); feed = rff.Feed; } else { // RSS 2.0 or Atom 1.0 try { feed = CustomSyndicationFeed.Load(reader); } catch (XmlException ex) { logger.Error(ex); throw; } } } } } catch (WebException ex) { logger.Error(ex); } catch (SocketException ex) { logger.Error(ex); } catch (IOException ex) { logger.Error(ex); } return(feed); }
public static bool IsLegalXmlChar(int character) { return XmlSanitizingStream.IsLegalXmlChar("1.0", character); }
protected void GetFeedData(out XDocument doc, DataGroup group, DateTime exportDate, int firstRow = 1, int maxRows = 0, string extraFields = "") { doc = null; if (GetFeedType(group).Equals(FeedType.CombinedFeed)) { if (_combinedFeed != null && (DateTime.Now - _lastFeedTime).TotalMinutes < _feedRefreshTime) { doc = _combinedFeed; return; } } using (var resultStream = GetFeedData(group, exportDate, firstRow, maxRows, extraFields)) { if (resultStream.Length < 1) return; #if DEBUG using (var fileStream = File.Create("C:\\Temp\\preXmlParseData.txt")) { resultStream.CopyTo(fileStream); } resultStream.Seek(0, SeekOrigin.Begin); #endif using (var xmlreader = new XmlSanitizingStream(ApplyMapping(resultStream))) { doc = XDocument.Load(xmlreader); } } #if DEBUG doc.Save("C:\\Temp\\postXmlParse.xml"); #endif if (GetFeedType(group).Equals(FeedType.CombinedFeed)) { _combinedFeed = doc; _lastFeedTime = DateTime.Now; } }
private Tuple <FeedReadResult, string> RetrieveFeed() { try { // Add extra security protocols ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls; // Create the web request var request = WebRequest.Create(new Uri(Source)); // If this is an http request set some special properties if (request is HttpWebRequest webRequest) { // Make sure to use HTTP version 1.1 webRequest.ProtocolVersion = HttpVersion.Version11; // Set that we'll accept compressed data webRequest.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; // Set a timeout webRequest.Timeout = 10000; // Make sure the service point closes the connection right away webRequest.ServicePoint.ConnectionLeaseTimeout = 0; // If we need to authenticate then set the credentials if (Authenticate) { webRequest.Credentials = new NetworkCredential(Username, Password, Domain); } // Set a user agent string if (string.IsNullOrWhiteSpace(Settings.Default.DefaultUserAgent)) { webRequest.UserAgent = "FeedCenter/" + UpdateCheck.LocalVersion; } else { webRequest.UserAgent = Settings.Default.DefaultUserAgent; } } // Set the default encoding var encoding = Encoding.UTF8; // Attempt to get the response using (var response = (HttpWebResponse)request.GetResponse()) { // If the response included an encoding then change the encoding if (response.ContentEncoding.Length > 0) { encoding = Encoding.GetEncoding(response.ContentEncoding); } // Get the response stream using (var responseStream = response.GetResponseStream()) { if (responseStream == null) { return(Tuple.Create(FeedReadResult.NoResponse, string.Empty)); } // Create the text reader using (StreamReader textReader = new XmlSanitizingStream(responseStream, encoding)) { // Get the feed text var feedText = textReader.ReadToEnd(); // Get rid of any leading and trailing whitespace feedText = feedText.Trim(); // Clean up common invalid XML characters feedText = feedText.Replace(" ", " "); // Find ampersands that aren't properly escaped and replace them with escaped versions var r = new Regex("&(?!(?:[a-z]+|#[0-9]+|#x[0-9a-f]+);)"); feedText = r.Replace(feedText, "&"); return(Tuple.Create(FeedReadResult.Success, feedText)); } } } } catch (IOException ioException) { Tracer.WriteLine(ioException.Message); return(Tuple.Create(FeedReadResult.ConnectionFailed, string.Empty)); } catch (WebException webException) { var result = FeedReadResult.UnknownError; if (webException.Response is HttpWebResponse errorResponse) { switch (errorResponse.StatusCode) { case HttpStatusCode.InternalServerError: return(Tuple.Create(FeedReadResult.ServerError, string.Empty)); case HttpStatusCode.NotModified: return(Tuple.Create(FeedReadResult.NotModified, string.Empty)); case HttpStatusCode.NotFound: return(Tuple.Create(FeedReadResult.NotFound, string.Empty)); case HttpStatusCode.Unauthorized: case HttpStatusCode.Forbidden: return(Tuple.Create(FeedReadResult.Unauthorized, string.Empty)); } } switch (webException.Status) { case WebExceptionStatus.ConnectFailure: case WebExceptionStatus.NameResolutionFailure: result = FeedReadResult.ConnectionFailed; break; case WebExceptionStatus.Timeout: result = FeedReadResult.Timeout; break; } Tracer.WriteException(webException); if (result == FeedReadResult.UnknownError) { Debug.Print("Unknown error"); } return(Tuple.Create(result, string.Empty)); } catch (Exception exception) { Tracer.WriteLine(exception.Message); return(Tuple.Create(FeedReadResult.UnknownError, string.Empty)); } }
public static List <FileCargaBean> ejecutarArchivoKML(String fileLocation) { List <FileCargaBean> lista = new List <FileCargaBean>(); List <String> arrArchivosCargados = new List <String>(); String[] extensions; //if (ConfigurationManager.AppSettings["DTSX"].Trim() == "1") extensions = new String[] { "*.kml" }; //else // extensions = new String[] { "*.xls" }; foreach (String extension in extensions) { String[] filesArr = Directory.GetFiles(fileLocation, extension, SearchOption.TopDirectoryOnly); foreach (String file in filesArr) { arrArchivosCargados.Add(file); } } //List<FileCargaBean> listaArchivos = new List<FileCargaBean>(); FileCargaBean FileBean; String xml; foreach (String file in arrArchivosCargados) { FileBean = new FileCargaBean(); try { using (XmlSanitizingStream reader = new XmlSanitizingStream(new FileStream(file, FileMode.Open))) { xml = reader.ReadToEnd(); } XmlDocument xmlDocument = new XmlDocument(); //xmlDocument.Load(file); xmlDocument.LoadXml(xml); xmlDocument.RemoveChild(xmlDocument.FirstChild); int num1 = 0; int num2 = 0; int num3 = 0; int num4 = 0; XmlElement documentElement = xmlDocument.DocumentElement; StringBuilder stringBuilder = new StringBuilder("<lp>"); foreach (XmlElement xmlElement in documentElement.FirstChild.ChildNodes) { if ("PLACEMARK".Equals(xmlElement.Name.ToUpper())) { ++num1; string str = GeocercaController.registrarGeocercaAPartirDeXML(xmlElement.OuterXml); if (str.Contains("|")) { string[] strArray = str.Split('|'); int result1 = 0; int.TryParse(strArray[0].ToString(), out result1); num3 += result1; int result2 = 0; int.TryParse(strArray[1].ToString(), out result2); num4 += result2; ++num2; } } } FileBean.archivo = file.Substring(file.LastIndexOf("\\") + 1, file.LastIndexOf(".") - file.LastIndexOf("\\") - 1); FileBean.total = num2;//num2; FileBean.subidos = num2; FileBean.insertados = num3; FileBean.actualizados = num4; } catch (Exception e) { FileBean.errorExecute += e.Message; } lista.Add(FileBean); } deleteDataFiles(fileLocation); return(lista); }