public override void KonsolidovatHeslovouStat(string inputFile, string outputFile) { int iEntry = 0; string sSource = null; using (XmlReader r = Objekty.VytvorXmlReader(inputFile)) { using (XmlWriter xw = Objekty.VytvorXmlWriter(outputFile)) { xw.WriteStartDocument(true); while (r.Read()) { if (r.NodeType == XmlNodeType.Element) { switch (r.Name) { case "entry": XmlDocument xd = new XmlDocument(); XmlNode xn = xd.ReadNode(r); if (xn != null) { xd.AppendChild(xn); } if (xd.DocumentElement != null) { if (!xd.DocumentElement.IsEmpty) { if (ZkonsolidujEntry(ref xd, sSource, ++iEntry)) { xd.WriteContentTo(xw); } } } break; case "dictionary": sSource = r.GetAttribute("name"); goto default; default: Transformace.SerializeNode(r, xw); break; } } else if (r.NodeType == XmlNodeType.EndElement) { switch (r.Name) { case "entry": break; default: Transformace.SerializeNode(r, xw); break; } } else { Transformace.SerializeNode(r, xw); } } } } }
public override void SeskupitHeslaPismene(string inputFile, string outputFile, string filenameWithoutExtension) { var assembly = Assembly.GetExecutingAssembly(); var changeRuleSet = ChangeRuleSet.Load(assembly.GetManifestResourceStream(m_changeRuleSetFile)); var xws = new XmlWriterSettings { Indent = true }; using (var xmlWriter = XmlWriter.Create(outputFile, xws)) using (var xmlReader = XmlReader.Create(inputFile)) { xmlWriter.WriteStartDocument(); while (xmlReader.Read()) { switch (xmlReader.NodeType) { case XmlNodeType.Element: switch (xmlReader.Name) { case "reg": if (xmlReader.GetAttribute("xml:compute-reg") != "true") { Transformace.SerializeNode(xmlReader, xmlWriter); } else { xmlWriter.WriteStartElement(xmlReader.Name); while (xmlReader.MoveToNextAttribute()) { if (xmlReader.Name == "xml:compute-reg") { continue; } xmlWriter.WriteAttributeString(xmlReader.Prefix, xmlReader.LocalName, xmlReader.NamespaceURI, xmlReader.Value); } xmlReader.MoveToContent(); xmlWriter.WriteString(changeRuleSet.Apply(xmlReader.ReadInnerXml())); xmlWriter.WriteEndElement(); } break; default: Transformace.SerializeNode(xmlReader, xmlWriter); break; } break; case XmlNodeType.EndElement: switch (xmlReader.Name) { case "teiHeader": Transformace.SerializeNode(xmlReader, xmlWriter); break; default: Transformace.SerializeNode(xmlReader, xmlWriter); break; } break; default: Transformace.SerializeNode(xmlReader, xmlWriter); break; } } xmlWriter.WriteEndDocument(); } }
public CleaningResult Clean(string document) { CleaningResult result = new CleaningResult(); string temp = Path.GetTempFileName(); bool zpracovat = true; Queue <RunObject> runs = new Queue <RunObject>(); RunObject previous = null; using (xmlReader = XmlReader.Create(document)) { XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlReader.NameTable); nsmgr.AddNamespace("w", Pomucky.Dokument.RelWordprocessingRelationshipTypeW); using (xmlWriter = XmlWriter.Create(temp)) { xmlWriter.WriteStartDocument(); while (xmlReader.Read()) { string nazev = xmlReader.Name; Zacatek: if (xmlReader.NodeType == XmlNodeType.Element) { if (!zpracovat) { break; } switch (nazev) { case "w:r": RunObject actual = new RunObject(); XmlDocument xd = Objekty.ReadNodeAsXmlDocument(xmlReader); actual.Xml = xd; XmlNode node = xd.SelectSingleNode("/w:r/w:rPr[1]/w:rStyle/@w:val", nsmgr); if (node == null) { actual.Style = "Standardní písmo odstavce"; } else { actual.Style = node.Value; } XmlNode actualT = actual.Xml.SelectSingleNode("/w:r/w:t[1]", nsmgr); if (actualT == null) //jde o případy obrázku v dokumentu { if (previous != null) { previous.Xml.Save(xmlWriter); } actual.Xml.Save(xmlWriter); actual = previous = null; } if (previous != null) { if (previous.Style == actual.Style) { XmlNode prevT = previous.Xml.SelectSingleNode("/w:r/w:t[1]", nsmgr); prevT.InnerText += actualT.InnerText; if (prevT.InnerText[prevT.InnerText.Length - 1] == ' ' && prevT.Attributes["xml:space"] == null) { XmlAttribute at = previous.Xml.CreateAttribute("xml", "space", Objekty.XmlNamespace); at.Value = "preserve"; prevT.Attributes.Append(at); } result.NumerOfChanges++; } else { previous.Xml.Save(xmlWriter); previous = actual; } } else { previous = actual; } goto Zacatek; default: Transformace.SerializeNode(xmlReader, xmlWriter); break; } } else if (xmlReader.NodeType == XmlNodeType.EndElement) { if (nazev == "w:p") { if (previous != null) { previous.Xml.Save(xmlWriter); } previous = null; } Transformace.SerializeNode(xmlReader, xmlWriter); } else { Transformace.SerializeNode(xmlReader, xmlWriter); } } xmlWriter.WriteEndDocument(); xmlWriter.Close(); } } result.Output = temp; result.Success = true; return(result); }
/// <summary> /// Rozdělí heslář podle písmen abecedy. Jedno písmeno = jeden soubor. /// </summary> /// <param name="strVstupniSoubor">Vstupní soubor (kompletní heslář ve formátu XML.</param> /// <param name="sVystupniAdresar">Výstoní adresář, do něhož se uloží vygenerované soubory (pro každé písmeno jeden soubor).</param> internal static void RozdelitPodlePismen(string strVstupniSoubor, string sVystupniAdresar) { XmlTextReader treader = new XmlTextReader(strVstupniSoubor); XmlReaderSettings xrs = new XmlReaderSettings(); xrs.ValidationFlags = System.Xml.Schema.XmlSchemaValidationFlags.None; XmlReader r = XmlReader.Create(treader, xrs); XmlTextWriter xwPismeno = null; StringBuilder sb = new System.Text.StringBuilder(); bool bPismeno = false; /* * using (XmlReader r = Objekty.VytvorXmlReader(mstrVstupniSoubor)) { * using (XmlWriter xw = Objekty.VytvorXmlWriter(mstrVystupniSoubor)) { * xw.WriteStartDocument(true); * * while (r.Read()) { * if (r.NodeType == XmlNodeType.Element) { * switch (r.Name) { * case "milestone": * break; * default: * Transformace.SerializeNode(r, xw); * break; * * } * } * else if (r.NodeType == XmlNodeType.EndElement) { * switch (r.Name) { * case "milestone": * break; * default: * Transformace.SerializeNode(r, xw); * break; * } * } * else { Transformace.SerializeNode(r, xw); } * * } * * } * } */ #region WHILE while (r.Read()) { if (r.NodeType == XmlNodeType.Element) { #region SWITCH switch (r.Name) { case "pismeno": if (bPismeno) { xwPismeno.WriteEndDocument(); xwPismeno.Flush(); xwPismeno.Close(); } bPismeno = true; string sID = r.GetAttribute("id"); xwPismeno = new XmlTextWriter(sVystupniAdresar + sID + ".xml", System.Text.Encoding.UTF8); xwPismeno.Formatting = Formatting.Indented; xwPismeno.Indentation = 2; xwPismeno.WriteStartDocument(true); Transformace.SerializeNode(r, xwPismeno); break; case "heslovaStat": Transformace.SerializeNode(r, xwPismeno); sb = new System.Text.StringBuilder(); break; case "hw": sb.Append(r.ReadString() + ", "); break; } #endregion } #region IF2 if (r.NodeType == XmlNodeType.EndElement) { switch (r.Name) { case "heslovaStat": sb.Remove(sb.Length - 2, 2); xwPismeno.WriteStartElement("hw"); xwPismeno.WriteString(sb.ToString()); xwPismeno.WriteEndElement(); // hw xwPismeno.WriteEndElement(); // heslovaStat break; case "pismeno": Transformace.SerializeNode(r, xwPismeno); break; } } #endregion } #endregion xwPismeno.WriteEndDocument(); xwPismeno.Close(); xwPismeno = null; }
public static void HeslarXml(string strVstupniSoubor, string strVystupniSoubor) { char[] chIndexy = schIndexy; char[] chSeparatory = schSeparatory; using (XmlReader r = Objekty.VytvorXmlReader(strVstupniSoubor)) { using (XmlWriter xwHeslar = Objekty.VytvorXmlWriter(strVystupniSoubor)) { xwHeslar.WriteStartDocument(true); xwHeslar.WriteStartElement("heslar"); #region WHILE while (r.Read()) { if (r.NodeType == XmlNodeType.Element) { #region SWITCH switch (r.Name) { case "dictionary": string sSource = r.GetAttribute("name"); xwHeslar.WriteAttributeString("dictionary", sSource); break; case "div1": xwHeslar.WriteStartElement("pismeno"); xwHeslar.WriteAttributeString("id", r.GetAttribute("id")); xwHeslar.WriteAttributeString("text", r.GetAttribute("text")); break; /* * case "entryref": * iHw = 0; * xwHeslar.WriteStartElement("heslovaStat"); * sIdEntry = r.GetAttribute("id"); * xwHeslar.WriteAttributeString("id", sIdEntry); * xwHeslar.WriteAttributeString("type", "ref"); * break; */ case "entry": //iHw = 0; xwHeslar.WriteStartElement("heslovaStat"); Transformace.SerializeAttributes(r, xwHeslar, false); //sIdEntry = r.GetAttribute("id"); //xwHeslar.WriteAttributeString("id", sIdEntry); //string sTypEntry = r.GetAttribute("type"); //if (null != sTypEntry) // xwHeslar.WriteAttributeString("type", sTypEntry); break; case "hwo": case "hw": //zkontrolovat, jestli odstavec obsahuje "nenáležitá podoba" - a pak heslo vyřadit/označit jako interní //jenže akce následuje až za heslovým slovem string sForma = r.GetAttribute("form"); string sHom = r.GetAttribute("hom"); Transformace.SerializeNode(r, xwHeslar); string strHeslo = r.ReadString(); strHeslo = strHeslo.Trim(); for (int i = 0; i < chIndexy.Length; i++) { if (strHeslo.Contains(chIndexy[i].ToString())) { strHeslo = strHeslo.Remove(strHeslo.IndexOf(chIndexy[i]), 1); if (sHom == null) { xwHeslar.WriteAttributeString("hom", chIndexy[i].ToString()); //je potřeba to zapisovat, nebo ne? } break; } } if (strHeslo.IndexOf('-') == strHeslo.Length - 1 || strHeslo.IndexOf('-') == 0) { if (sForma == null) { xwHeslar.WriteAttributeString("form", "short"); } } if (strHeslo.Contains("(?)")) { strHeslo = strHeslo.Replace("(?)", ""); //otazník v závorce za heslem } strHeslo = strHeslo.TrimEnd(chSeparatory); strHeslo = strHeslo.TrimEnd(); //strHeslo = strHeslo.TrimEnd(chIndexy); if (strHeslo.Length > 0) { char chPismeno = strHeslo[0]; if (chPismeno == '*' || chPismeno == '\u02E3') // || chPismeno == '\u02DF') - zobrazovalo se špatně v IE //nemělo by se takové heslo upravit tak, že se odstraní první znak? { xwHeslar.WriteAttributeString("pref", strHeslo.Substring(0, 1)); } xwHeslar.WriteString(strHeslo); } /* * while (r.Name != "") * { * r.Read(); * } * SerializeNode(r, xwHeslar); */ /* * string sTyp = r.GetAttribute("type"); * string strHeslo = r.ReadString(); * strHeslo = strHeslo.Trim(); * strHeslo = strHeslo.TrimEnd(chSeparatory); * strHeslo = strHeslo.TrimEnd(); * string[] aHesla = strHeslo.Split(chSeparatory); * foreach (string s in aHesla) { * string sText = s.Trim(); * if (s.Length > 0) { * xwHeslar.WriteStartElement("hw"); * if (sTyp != null) * xwHeslar.WriteAttributeString("type",sTyp); ++iHw; * xwHeslar.WriteAttributeString("id", sIdEntry + ".hw" + iHw.ToString()); * * xwHeslar.WriteString(sText); * xwHeslar.WriteEndElement(); * } * } */ break; } #endregion } #region IF2 if (r.NodeType == XmlNodeType.EndElement) { switch (r.Name) { case "div1": xwHeslar.WriteEndElement(); break; case "entry": xwHeslar.WriteEndElement(); break; case "hwo": case "hw": xwHeslar.WriteEndElement(); break; } } #endregion } #endregion } } }
public static void HeslarXml(string strVstupniSoubor, string strVystupniSoubor, Dictionary <string, ZpracovatTagProHeslarXml> gztTagyZpracovani) { char[] chIndexy = schIndexy; char[] chSeparatory = schSeparatory; using (XmlReader r = Objekty.VytvorXmlReader(strVstupniSoubor)) { using (XmlWriter xwHeslar = Objekty.VytvorXmlWriter(strVystupniSoubor)) { xwHeslar.WriteStartDocument(true); xwHeslar.WriteStartElement("heslar"); string strNazevTagu = null; HesloInfo hiHeslo = null; PismenoInfo piPismeno = null; HeslovaStatInfo hsiHeslovaStat = null; #region WHILE while (r.Read()) { strNazevTagu = r.Name; if (r.NodeType == XmlNodeType.Element) { if (gztTagyZpracovani != null && gztTagyZpracovani.ContainsKey(strNazevTagu)) { gztTagyZpracovani[strNazevTagu](r, xwHeslar, piPismeno, hsiHeslovaStat, hiHeslo); } else { #region SWITCH switch (r.Name) { case "dictionary": string sSource = r.GetAttribute("name"); xwHeslar.WriteAttributeString("dictionary", sSource); break; case "div1": piPismeno = new PismenoInfo(); piPismeno.Id = r.GetAttribute("id"); piPismeno.Text = r.GetAttribute("text"); VypisZacatekPismene(xwHeslar, piPismeno); break; /* * case "entryref": * iHw = 0; * xwHeslar.WriteStartElement("heslovaStat"); * sIdEntry = r.GetAttribute("id"); * xwHeslar.WriteAttributeString("id", sIdEntry); * xwHeslar.WriteAttributeString("type", "ref"); * break; */ case "entry": //iHw = 0; hsiHeslovaStat = new HeslovaStatInfo(); hsiHeslovaStat.Id = r.GetAttribute("id"); hsiHeslovaStat.Typ = r.GetAttribute("type"); xwHeslar.WriteStartElement("heslovaStat"); Transformace.SerializeAttributes(r, xwHeslar, false); //sIdEntry = r.GetAttribute("id"); //xwHeslar.WriteAttributeString("id", sIdEntry); //string sTypEntry = r.GetAttribute("type"); //if (null != sTypEntry) // xwHeslar.WriteAttributeString("type", sTypEntry); break; case "hw": //zkontrolovat, jestli odstavec obsahuje "nenáležitá podoba" - a pak heslo vyřadit/označit jako interní //jenže akce následuje až za heslovým slovem string sForma = r.GetAttribute("form"); Transformace.SerializeNode(r, xwHeslar); string strHeslo = r.ReadString(); strHeslo = strHeslo.Trim(); for (int i = 0; i < chIndexy.Length; i++) { if (strHeslo.Contains(chIndexy[i].ToString())) { strHeslo = strHeslo.Remove(strHeslo.IndexOf(chIndexy[i]), 1); //xwHeslar.WriteAttributeString("hom", chIndexy[i].ToString()); break; } } if (strHeslo.IndexOf('-') == strHeslo.Length - 1 || strHeslo.IndexOf('-') == 0) { if (sForma == null) { xwHeslar.WriteAttributeString("form", "short"); } } if (strHeslo.Contains("(?)")) { strHeslo = strHeslo.Replace("(?)", ""); //otazník v závorce za heslem } strHeslo = strHeslo.TrimEnd(chSeparatory); strHeslo = strHeslo.TrimEnd(); //strHeslo = strHeslo.TrimEnd(chIndexy); if (strHeslo.Length > 0) { char chPismeno = strHeslo[0]; if (chPismeno == '*' || chPismeno == '\u02E3') // || chPismeno == '\u02DF') - zobrazovalo se špatně v IE //nemělo by se takové heslo upravit tak, že se odstraní první znak? { xwHeslar.WriteAttributeString("pref", strHeslo.Substring(0, 1)); } xwHeslar.WriteString(strHeslo); } break; } #endregion } } #region IF2 if (r.NodeType == XmlNodeType.EndElement) { if (gztTagyZpracovani != null && gztTagyZpracovani.ContainsKey(strNazevTagu)) { gztTagyZpracovani[strNazevTagu](r, xwHeslar, piPismeno, hsiHeslovaStat, hiHeslo); } else { switch (r.Name) { case "div1": xwHeslar.WriteEndElement(); break; case "entry": xwHeslar.WriteEndElement(); break; case "hw": xwHeslar.WriteEndElement(); break; } } } #endregion } #endregion } } }
public SplittingResult SplitOnPageBreak() { _result = new SplittingResult(XmlFile, OutputDirectory); bool splittingStarted = (StartingElement == null); ElementInfo startElement = null; ElementInfos elementStack = new ElementInfos(); FileInfo xmlFileInfo = new FileInfo(XmlFile); string newFileFormat = xmlFileInfo.Name.Substring(0, xmlFileInfo.Name.Length - xmlFileInfo.Extension.Length) + NumberedXmlPattern; _outputManager = new OutputManager(); _outputManager.OutputDirectory = OutputDirectory; _outputManager.FileNameFormat = newFileFormat; string divId = StartingElement ?? "body"; //set default value because xml:id^="." is invalid int paragraphId = 0; try { using (XmlReader reader = XmlReader.Create(XmlFile)) { while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: ElementInfo element = ElementInfo.GetElementInfo(reader); if (element.Name == TeiElementName) { string documentXmlId = element.Attributes.GetAttributeByLocalName(String.Empty, NAttributeName).Value; string documentVersionId = element.Attributes.GetAttributeByLocalName(String.Empty, ChangeAttributeName).Value; _sourceDocumentInfo = new SourceDocumentInfo(documentXmlId, documentVersionId); } if (!splittingStarted) { if (!ShouldSplittingStart(element, startElement)) { continue; } } if (!splittingStarted) { startElement = element.Clone(); splittingStarted = true; _currentSplitInfo = new PageBreakSplitInfo(_sourceDocumentInfo); } if (element.Name == PbElementName || element.Name == TeiPrefix + PbElementName) { bool editionPagebreak = element.Attributes.AttributeExists(String.Empty, EdAttrubuteName); //TODO načíst do stacku první element pb, i když je z jiného stránkování if (!editionPagebreak) { if (_currentSplitInfo != null && _currentSplitInfo.Number == null) { _currentSplitInfo.Id = element.Attributes.GetAttributeByLocalName(XmlNamespacePrefix, IdAttributeName).Value; _currentSplitInfo.Number = element.Attributes.GetAttributeByLocalName(String.Empty, NAttributeName).Value; } ElementInfos tempQueue = null; if (_outputManager.CurrentChunk > 0) { tempQueue = CloseCurrentSplit(elementStack); } StartNewSplit(elementStack, tempQueue); _currentSplitInfo.Number = element.Attributes.GetAttributeByLocalName(String.Empty, NAttributeName).Value; _currentSplitInfo.Id = element.Attributes.GetAttributeByLocalName(XmlNamespacePrefix, IdAttributeName).Value; Transformace.SerializeNode(reader, _currentWriter); } //goto Begin; } else //(reader.Name == "pb") { if (_divElementNames.Contains(element.Name)) { foreach (AttributeInfo attribute in element.Attributes) { if (attribute.LocalName == IdAttributeName) { divId = attribute.Value; } } } if (_blockElementNames.Contains(element.Name)) { if (!element.Attributes.Exists(a => a.Prefix == XmlNamespacePrefix && a.LocalName == IdAttributeName)) { string id = divId + "." + element.Name + ++paragraphId; element.Attributes.Add(new AttributeInfo(XmlNamespacePrefix, IdAttributeName, XmlNamespace, id)); } } if (_currentWriter != null) { if (!element.IsEmpty) { elementStack.Push(element); } //pokud je element prázdný, při jeho přečtení se XmlReader přesune na další prvek WriteElementInfo(element, _currentWriter); } } break; case XmlNodeType.EndElement: ElementInfo endElementInfo = ElementInfo.GetElementInfo(reader); if (!splittingStarted || startElement == null) { continue; } if (_divElementNames.Contains(reader.Name)) { divId = null; paragraphId = 0; } if (ShouldSplittingEnd(endElementInfo, startElement)) { CloseCurrentSplit(elementStack); _result.IsSplitted = true; return(_result); } if (elementStack.Count > 0) { ElementInfo elementPeak = elementStack.Peek(); if (elementPeak.Name != endElementInfo.Name) { _result.Errors = String.Format(ErrorInfoFormat, elementPeak.Name, endElementInfo.Name); //Console.WriteLine("Chyba {0} × {1} (element × reader)", elementPeak.Name, name); } else { Transformace.SerializeNode(reader, _currentWriter); elementStack.Pop(); } } break; default: if (splittingStarted && _currentWriter != null) { Transformace.SerializeNode(reader, _currentWriter); } break; } } } _result.IsSplitted = true; } catch (Exception exception) { _result.Errors = exception.Message; } finally { if (_currentWriter != null) { _currentWriter.Close(); } } return(_result); }
public void SplitOnStartingElement() { bool splittingStarted = (StartingElement == null); ElementInfos elementStack = new ElementInfos(); FileInfo xmlFileInfo = new FileInfo(XmlFile); string newFileFormat = xmlFileInfo.Name.Substring(0, xmlFileInfo.Name.Length - xmlFileInfo.Extension.Length) + NumberedXmlPattern; _outputManager = new OutputManager(); _outputManager.OutputDirectory = OutputDirectory; _outputManager.FileNameFormat = newFileFormat; using (XmlReader reader = XmlReader.Create(XmlFile)) { reader.MoveToContent(); while (reader.Read()) { if (reader.NodeType == XmlNodeType.Element) { bool isEmpty = reader.IsEmptyElement; if (!splittingStarted) { if (reader.Name == StartingElement) { splittingStarted = true; _currentWriter = _outputManager.GetXmlWriter(); _currentWriter.WriteStartDocument(); _currentWriter.WriteStartElement(FragmentElementlName, TeiNamespace); } } if (!splittingStarted) { continue; } ElementInfo element = ElementInfo.GetElementInfo(reader); //GetElementInfo(reader); elementStack.Push(element); WriteElementInfo(element, _currentWriter); if (!isEmpty) { continue; } elementStack.Pop(); } else if (reader.NodeType == XmlNodeType.EndElement) { if (!splittingStarted) { continue; } string name = reader.Name; ElementInfo elementPeak = elementStack.Peek(); if (elementPeak.Name != name) { Console.WriteLine("Chyba element × reader ({0} × {1})", elementPeak.Name, name); } else { Transformace.SerializeNode(reader, _currentWriter); elementStack.Pop(); } if (name == StartingElement) { elementStack.CloneReverse(); while (elementStack.Count > 0) { _currentWriter.WriteEndElement(); elementStack.Pop(); } _currentWriter.WriteFullEndElement(); _currentWriter.Close(); splittingStarted = false; //currentWriter = outputManager.GetXmlWriter(); } } else { if (splittingStarted) { Transformace.SerializeNode(reader, _currentWriter); } } } } }