/// <summary> /// Načte aktuální uzel jako <see cref="XmlDocument"/>. /// </summary> /// <param name="reader"><see cref="XmlReader"/> na pozici elementu, jehož obsah se má načíst do dokumetnu.</param> /// <returns>Vrací <see cref="XmlDocument"/> tvořený aktuálním uzlem nebo prázdný dokument.</returns> public static XmlDocument ReadNodeAsXmlDocument(XmlReader reader) { XmlDocument xd = new XmlDocument(); if (reader.NodeType == XmlNodeType.Attribute) { reader.MoveToElement(); } if (reader.NodeType != XmlNodeType.Element) { return(null); } string nodeName = reader.Name; int depth = reader.Depth; using (MemoryStream memoryStream = new MemoryStream()) { XmlWriterSettings settings = new XmlWriterSettings(); settings.OmitXmlDeclaration = true; using (XmlWriter xw = XmlWriter.Create(memoryStream, settings)) { string text = null; bool isEmpty = reader.IsEmptyElement; try { Transformace.SerializeNode(reader, xw); if (isEmpty && reader.Name == nodeName && reader.Depth == depth) { LoadXmlString(xw, memoryStream, xd); return(xd); } while (reader.Read()) { isEmpty = reader.IsEmptyElement; Transformace.SerializeNode(reader, xw); if (isEmpty && reader.Name == nodeName && reader.Depth == depth) { LoadXmlString(xw, memoryStream, xd); break; } if (reader.NodeType == XmlNodeType.EndElement && reader.Name == nodeName && reader.Depth == depth) { LoadXmlString(xw, memoryStream, xd); break; } } } catch (Exception e) { string error = e.Message; } } } return(xd); }
public void VypisHeslaTypeUse() { if (base.VstupniSoubor == null || base.VystupniSoubor == null) { throw new ArgumentNullException("Nebyly zadány vhodné názvy vstupního nebo výstupního souboru."); } using (XmlReader r = Objekty.VytvorXmlReader(base.VstupniSoubor)) { using (XmlWriter xw = Objekty.VytvorXmlWriter(base.VystupniSoubor)) { xw.WriteStartDocument(true); xw.WriteStartElement("entries"); while (r.Read()) { if (r.NodeType == XmlNodeType.Element) { switch (r.Name) { case "entry": string sUse = r.GetAttribute("use"); string sType = r.GetAttribute("type"); if (sUse == "public" && sType == "excl") { DPXT.SerializeNode(r, xw); xw.WriteEndElement(); } break; default: break; } } } xw.WriteEndElement(); } } }
/// <summary> /// Upraví hranice heslové stati. Ohraničí jednotlivé části značkou <entry> a v rámci heslové stati seskupí významy podřazené značce <senseGrp>. /// </summary> public override void UpravitHraniceHesloveStati(string inputFile, string outputFile) { if (inputFile == null || outputFile == null) { throw new ArgumentNullException("Nebyly zadány vhodné názvy vstupního nebo výstupního souboru."); } using (XmlReader r = Objekty.VytvorXmlReader(inputFile)) { using (XmlWriter xw = Objekty.VytvorXmlWriter(outputFile)) { xw.WriteStartDocument(true); bool blnPrvniEntryhead = true; bool blnVyskytlySeVyznamy = false; string strTypSenseGrp = null; Stack <string> gstTypSenseGrp = new Stack <string>(); int intPocetOtevrenychSenseGrp = 0; bool blnPrvniVyznam = false; int xmlReaderDepp = 0; int xmlWriterDeep = 0; while (r.Read()) { //Console.WriteLine("Reader: {0}, Writer: {1}", xmlReaderDepp, xmlWriterDeep); if (r.NodeType == XmlNodeType.Element) { xmlReaderDepp++; if (r.IsEmptyElement) { xmlReaderDepp--; } if (r.Name == "div1") { string name = r.Name; } switch (r.Name) { case "entryhead": if (blnPrvniVyznam) { WriteEndElement(xw, ref xmlWriterDeep); //senses xw.WriteWhitespace("\r\n"); } blnPrvniVyznam = false; blnVyskytlySeVyznamy = false; int iPocetSenseGrp = gstTypSenseGrp.Count; if (iPocetSenseGrp > 0) { for (int i = 1; i < iPocetSenseGrp; i++) { intPocetOtevrenychSenseGrp--; WriteEndElement(xw, ref xmlWriterDeep); //senseGrp xw.WriteWhitespace("\r\n"); } gstTypSenseGrp.Clear(); } while (intPocetOtevrenychSenseGrp > 0) { WriteEndElement(xw, ref xmlWriterDeep); xw.WriteWhitespace("\r\n"); intPocetOtevrenychSenseGrp--; } if (!blnPrvniEntryhead) { WriteEndElement(xw, ref xmlWriterDeep); //entry xw.WriteWhitespace("\r\n"); } else { blnPrvniEntryhead = false; } WriteStartElement(xw, "entry", ref xmlWriterDeep); xw.WriteWhitespace("\r\n"); goto default; case "senseGrp": strTypSenseGrp = r.GetAttribute("type"); intPocetOtevrenychSenseGrp++; if (gstTypSenseGrp.Count > 0) { if (strTypSenseGrp == gstTypSenseGrp.Peek()) { WriteEndElement(xw, ref xmlWriterDeep); //senseGrp xw.WriteWhitespace("\r\n"); intPocetOtevrenychSenseGrp--; } else { if (blnVyskytlySeVyznamy) { WriteEndElement(xw, ref xmlWriterDeep); xw.WriteWhitespace("\r\n"); intPocetOtevrenychSenseGrp--; } else { gstTypSenseGrp.Push(strTypSenseGrp); } } } else { gstTypSenseGrp.Push(strTypSenseGrp); } blnVyskytlySeVyznamy = false; goto default; case "sense": if (!blnPrvniVyznam) { WriteStartElement(xw, "senses", ref xmlWriterDeep); //xw.WriteStartElement("senses"); xw.WriteWhitespace("\r\n"); blnPrvniVyznam = true; } blnVyskytlySeVyznamy = true; goto default; case "note": case "appendix": if (r.IsEmptyElement) { goto default; } while (gstTypSenseGrp.Count > 0) { //xw.WriteEndElement(); WriteEndElement(xw, ref xmlWriterDeep); xw.WriteWhitespace("\r\n"); intPocetOtevrenychSenseGrp--; gstTypSenseGrp.Pop(); } if (blnPrvniVyznam) { //xw.WriteEndElement(); //senses WriteEndElement(xw, ref xmlWriterDeep); xw.WriteWhitespace("\r\n"); blnPrvniVyznam = false; } goto default; default: if (!r.IsEmptyElement) { xmlWriterDeep++; } DPXT.SerializeNode(r, xw); break; } } else { if (r.NodeType == XmlNodeType.EndElement) { xmlReaderDepp--; switch (r.Name) { case "senseGrp": if (blnPrvniVyznam) { //xw.WriteEndElement(); //senses WriteEndElement(xw, ref xmlWriterDeep); xw.WriteWhitespace("\r\n"); blnPrvniVyznam = false; } break; case "div1": //xw.WriteEndElement(); //entry while (xmlWriterDeep > xmlReaderDepp + 1) { WriteEndElement(xw, ref xmlWriterDeep); } blnPrvniEntryhead = true; blnPrvniVyznam = false; goto default; default: xmlWriterDeep--; DPXT.SerializeNode(r, xw); break; } } //else if (r.NodeType == XmlNodeType.XmlDeclaration && r.Name == "xml") { // //nedělat nic, jde o začátek dokumentu; genruje cyhbu: nelze zapsat deklaraci, dokument již byl započat //} else { DPXT.SerializeNode(r, xw); } } } } } }
public void Run() { int id = 0; XmlElementInfo currentElement = null; XmlNamespaceManager namespaceManager = new XmlNamespaceManager(new NameTable()); namespaceManager.AddNamespace(String.Empty, NsTei); XmlWriterSettings xmlWriterSettings = new XmlWriterSettings(); xmlWriterSettings.Indent = true; List <string> ignoredElements = IgnoredElements ?? new List <string>(); XmlElementInfo ignoredElement = null; string punctation = Punctation ?? String.Empty; bool ignoreText = false; bool?isRoot = null; using (XmlReader xmlReader = XmlReader.Create(Input)) { using (XmlWriter xmlWriter = XmlWriter.Create(Output, xmlWriterSettings)) { xmlWriter.WriteStartDocument(); while (xmlReader.Read()) { XmlNodeType nodeType = xmlReader.NodeType; if (nodeType == XmlNodeType.Element) { if (!xmlReader.IsEmptyElement) { currentElement = XmlElementInfo.GetInfo(xmlReader, currentElement); } WriteXmlElementInfo(currentElement); isRoot = !isRoot.HasValue; if (ignoredElement == null && ignoredElements.Contains(xmlReader.LocalName)) { ignoredElement = currentElement; ignoreText = true; } } if (nodeType == XmlNodeType.EndElement) { if (currentElement != null && ignoreText && currentElement.Name == ignoredElement.Name && currentElement.Depth == ignoredElement.Depth) //if (ignoredElements.Contains(xmlReader.LocalName)) { ignoredElement = null; ignoreText = false; } if (currentElement != null) { currentElement = currentElement.Parent; } WriteXmlElementInfo(currentElement); } if (nodeType == XmlNodeType.Text) { if (ignoreText) { Transformace.SerializeNode(xmlReader, xmlWriter); continue; } string text = xmlReader.Value; if (text.IndexOf(' ') == -1) { xmlWriter.WriteElementString("w", NsTei, text); } else { _capacity = text.Length; StringBuilder stringBuilder = new StringBuilder(_capacity); foreach (char c in text) { if (c == ' ' || c == '\u0001' || c == '\t') //mezera a pevná mezera { WriteWordElement(ref stringBuilder, xmlWriter, _capacity, id++, XmlIdFormat); xmlWriter.WriteStartElement("c"); xmlWriter.WriteAttributeString("type", "space"); xmlWriter.WriteAttributeString("space", NsXml, "preserve"); if (c == '\t') { xmlWriter.WriteString("\t"); } else { xmlWriter.WriteString(" "); } xmlWriter.WriteEndElement(); } else { if (punctation.IndexOf(c) > -1) { WriteWordElement(ref stringBuilder, xmlWriter, _capacity, id++, XmlIdFormat); xmlWriter.WriteElementString("pc", NsTei, String.Format("{0}", c)); } else { stringBuilder.Append(c); } } } //výpis posledního slova uloženého v bufferu WriteWordElement(ref stringBuilder, xmlWriter, 0, id++, XmlIdFormat); } } else { Transformace.SerializeNode(xmlReader, xmlWriter); if (isRoot.HasValue && isRoot.Value) { xmlWriter.WriteAttributeString("xmlns", "nlp", null, NsNlp); isRoot = false; } } } } } }
/// <summary> /// Upraví hranice heslové stati, seskupí všechny prvky heslové stati do elementu <entry>. Využívá při tom značku <entryend> /// </summary> /// <exception cref="ArgumentNullException">Vyvolá výjimku, pokud nejsou zadány vstupní nebo výstupní soubor.</exception> public override void UpravitHraniceHesloveStati(string inputFile, string outputFile) { //výchozí imnplementace se hodí pro ESSČ string sChyba = null; if (inputFile == null || outputFile == null) { throw new ArgumentNullException("Nebyly zadány vhodné názvy vstupního nebo výstupního souboru."); } using (XmlReader r = Objekty.VytvorXmlReader(inputFile)) { using (XmlWriter xw = Objekty.VytvorXmlWriter(outputFile)) { /* * string strTypSenseGrp = null; * Stack<string> gstTypSenseGrp = new Stack<string>(); * int intPocetOtevrenychSenseGrp = 0; * bool blnVyskytlySeVyznamy = false; */ bool blnJeSenseGrp = false; bool blnJeSense = false; xw.WriteStartDocument(true); bool blnPrvniEntryhead = true; try { while (r.Read()) { if (r.NodeType == XmlNodeType.Element) { switch (r.Name) { case "entryhead": if (blnPrvniEntryhead) { xw.WriteStartElement("entry"); xw.WriteWhitespace("\r\n"); blnPrvniEntryhead = false; } DPXT.SerializeNode(r, xw); blnJeSenseGrp = blnJeSense = false; break; case "entryend": xw.WriteEndElement(); //entry xw.WriteWhitespace("\r\n"); blnPrvniEntryhead = true; break; case "senseGrp": if (blnJeSense) { xw.WriteEndElement(); //senses } if (blnJeSenseGrp) { xw.WriteEndElement(); //senseGrp } blnJeSenseGrp = true; blnJeSense = false; goto default; case "sense": if (!blnJeSense) { xw.WriteStartElement("senses"); } blnJeSense = true; goto default; default: if (blnJeSense && r.Depth == 2 && !(r.Name == "sense")) { xw.WriteEndElement(); //senses blnJeSense = false; } if (blnJeSenseGrp && r.Depth == 2 && !r.Name.StartsWith("sense")) { blnJeSenseGrp = false; xw.WriteEndElement(); //senseGrp } DPXT.SerializeNode(r, xw); break; } } else if (r.NodeType == XmlNodeType.EndElement) { if (r.Depth == 2) { switch (r.Name) { case "senseGrp": /* * if (blnJeSense) * { * xw.WriteEndElement(); //senseGrp * blnJeSenseGrp = false; * } */ //DPXT.SerializeNode(r, xw); break; case "sense": DPXT.SerializeNode(r, xw); break; default: if (blnJeSense) { xw.WriteEndElement(); //senses blnJeSense = false; } DPXT.SerializeNode(r, xw); break; } } else { DPXT.SerializeNode(r, xw); } } else { DPXT.SerializeNode(r, xw); } } } catch (Exception ex) { sChyba = ex.Message; sChyba = r.ReadInnerXml(); while (sChyba.Trim().Length == 0) { if (r.Read()) { sChyba = r.ReadInnerXml(); } else { sChyba = "XmlReader je na konci souboru."; } } Console.WriteLine(sChyba); } finally { if (xw.WriteState != WriteState.Error && xw.WriteState != WriteState.Closed) { xw.WriteEndDocument(); } xw.Flush(); xw.Close(); } } } }