public Sample() { XmlDocument doc = new XmlDocument(); doc.LoadXml("<author>" + "<first-name>Eva</first-name>" + "<last-name>Corets</last-name>" + "</author>"); Console.WriteLine("InnerText before..."); Console.WriteLine(doc.DocumentElement.InnerText); // Add white space. currNode = doc.DocumentElement; XmlWhitespace ws = doc.CreateWhitespace("\r\n"); currNode.InsertAfter(ws, currNode.FirstChild); Console.WriteLine(); Console.WriteLine("InnerText after..."); Console.WriteLine(doc.DocumentElement.InnerText); // Save and then display the file. doc.Save(filename); Console.WriteLine(); Console.WriteLine("Reading file..."); ReadFile(filename); }
// Check the properties on a newly constructed whitespace node. private void CheckProperties(String msg, XmlWhitespace white, String value, bool failXml) { String temp; AssertEquals(msg + " [1]", "#whitespace", white.LocalName); AssertEquals(msg + " [2]", "#whitespace", white.Name); AssertEquals(msg + " [3]", String.Empty, white.Prefix); AssertEquals(msg + " [4]", String.Empty, white.NamespaceURI); AssertEquals(msg + " [5]", XmlNodeType.Whitespace, white.NodeType); AssertEquals(msg + " [6]", value, white.Data); AssertEquals(msg + " [7]", value, white.Value); AssertEquals(msg + " [8]", value, white.InnerText); AssertEquals(msg + " [9]", value.Length, white.Length); AssertEquals(msg + " [10]", String.Empty, white.InnerXml); if (failXml) { try { temp = white.OuterXml; Fail(msg + " [11]"); } catch (ArgumentException) { // Success } } else { AssertEquals(msg + " [12]", value, white.OuterXml); } }
private void RemoveComments(XmlElement element) { List <XmlComment> comments = new List <XmlComment>(); foreach (XmlNode childElement in element.ChildNodes) { if (childElement is XmlComment) { comments.Add((XmlComment)childElement); } } foreach (XmlComment comment in comments) { XmlWhitespace prev = comment.PreviousSibling as XmlWhitespace; XmlWhitespace next = comment.NextSibling as XmlWhitespace; if (prev != null && prev.Value != null & prev.Value.StartsWith(Environment.NewLine) && next != null && next.Value != null && next.Value.StartsWith(Environment.NewLine)) { element.RemoveChild(next); } element.RemoveChild(comment); } foreach (XmlNode childElement in element.ChildNodes) { if (childElement is XmlElement && childElement.HasChildNodes) { RemoveComments((XmlElement)childElement); } } }
public void indent() { foreach (XmlNode child in this.ChildNodes) { if (child.NodeType == XmlNodeType.XmlDeclaration) { XmlWhitespace whiteSpace = this.CreateWhitespace("\r\n"); this.InsertAfter(whiteSpace, child); continue; } } XmlElement elSchema = getSchemaElement(); XmlElement lastChildElement = null; foreach (XmlNode child in elSchema.ChildNodes) { if (child.NodeType == XmlNodeType.Element) { indentChildren(child, config.cntIndentSpaces()); lastChildElement = (XmlElement)child; } } if (lastChildElement != null) { string wspaces = "\r\n"; if (config.emptyLineBeforeGlobal()) { wspaces = wspaces + "\r\n"; } XmlWhitespace whiteSpace2 = this.CreateWhitespace(wspaces); elSchema.InsertAfter(whiteSpace2, lastChildElement); } }
public void indentChildren(XmlNode elem, int cntSpaces) { string spaces = ""; spaces = spaces.PadRight(cntSpaces); if (cntSpaces == config.cntIndentSpaces() && config.emptyLineBeforeGlobal()) { XmlWhitespace whiteSpace5 = this.CreateWhitespace("\r\n"); elem.ParentNode.InsertBefore(whiteSpace5, elem); } XmlWhitespace whiteSpace = this.CreateWhitespace("\r\n" + spaces); elem.ParentNode.InsertBefore(whiteSpace, elem); XmlElement lastChildElement = null; foreach (XmlNode child in elem.ChildNodes) { if (child.NodeType != XmlNodeType.Element) { continue; } indentChildren(child, cntSpaces + config.cntIndentSpaces()); lastChildElement = (XmlElement)child; } if (elem.HasChildNodes && lastChildElement != null) { XmlWhitespace whiteSpace2 = this.CreateWhitespace("\r\n" + spaces); elem.InsertAfter(whiteSpace2, lastChildElement); } }
// Check the properties on a newly constructed whitespace node. private void CheckProperties(String msg, XmlWhitespace white, String value, bool failXml) { String temp; AssertEquals(msg + " [1]", "#whitespace", white.LocalName); AssertEquals(msg + " [2]", "#whitespace", white.Name); AssertEquals(msg + " [3]", String.Empty, white.Prefix); AssertEquals(msg + " [4]", String.Empty, white.NamespaceURI); AssertEquals(msg + " [5]", XmlNodeType.Whitespace, white.NodeType); AssertEquals(msg + " [6]", value, white.Data); AssertEquals(msg + " [7]", value, white.Value); AssertEquals(msg + " [8]", value, white.InnerText); AssertEquals(msg + " [9]", value.Length, white.Length); AssertEquals(msg + " [10]", String.Empty, white.InnerXml); if(failXml) { try { temp = white.OuterXml; Fail(msg + " [11]"); } catch(ArgumentException) { // Success } } else { AssertEquals(msg + " [12]", value, white.OuterXml); } }
void ValidateNode(XmlNode node) { XmlElement e = node as XmlElement; if (e != null) { ValidateElement(e); return; } XmlText t = node as XmlText; if (t != null) { ValidateText(t); return; } XmlCDataSection cd = node as XmlCDataSection; if (cd != null) { ValidateText(cd); return; } XmlWhitespace w = node as XmlWhitespace; if (w != null) { ValidateWhitespace(w); return; } }
public override void VisitWhitespace(XmlWhitespace whitespace) { if (this.myLevel > 0) { this.Append(" ", XmlDocRichTextPresenterEx.ourNormalStyle); } base.VisitWhitespace(whitespace); }
/// <summary> /// Inserts <?Mapping?> processing instructions (PIs) at the beginning of generated xml. /// </summary> /// <param name="xmlDocument"></param> protected override void CreateXmlCore(XmlDocument xmlDocument) { // Create mappingPI _CreateMappingPIs(xmlDocument); // Insert newline XmlWhitespace ws = xmlDocument.CreateWhitespace("\r\n"); xmlDocument.AppendChild(ws); }
public void GetReady() { document = new XmlDocument(); document.LoadXml("<root><foo></foo></root>"); XmlElement element = document.CreateElement("foo"); whitespace = document.CreateWhitespace("\r\n"); element.AppendChild(whitespace); doc2 = new XmlDocument(); doc2.PreserveWhitespace = true; }
static void AddStorageElements(XmlNode parentNode, IXmlStorage storage, XmlDocument dataFile) { if (storage != null && storage.Storage != null) { foreach (XmlNode nd in storage.Storage) { XmlAttribute aa = nd as XmlAttribute; if (aa == null) { XmlComment comm = nd as XmlComment; if (comm == null) { XmlWhitespace whitespace = nd as XmlWhitespace; if (whitespace == null) { XmlElement newNode = dataFile.CreateElement(nd.Name); newNode.InnerXml = nd.InnerXml; parentNode.AppendChild(newNode); } else { //if (whitespace.Data != "\r\n") //{ // //XmlWhitespace newWhitespace = dataFile.CreateWhitespace(whitespace.Data); // //parentNode.AppendChild(newWhitespace); //} } } else { if (comm.Value.StartsWith("Created by Artemis Mod Loader", StringComparison.OrdinalIgnoreCase) || comm.Value.StartsWith("Modified by Artemis Mod Loader", StringComparison.OrdinalIgnoreCase)) { } else { XmlComment newComm = dataFile.CreateComment(comm.Data); parentNode.AppendChild(newComm); } } } else { XmlAttribute newAttrib = dataFile.CreateAttribute(nd.Name); newAttrib.Value = aa.Value; parentNode.Attributes.Append(newAttrib); } } } }
//</Snippet5> //************************************************************************************ // // Add an element to the XML document at a specific location // Takes a string that describes where the user wants the new node // to be positioned. The string comes from a series of radio buttons in a UI. // this method also accepts the XMLDocument in context. You have to use the // this instance because it is the object that was used to generate the // selectedBook XMLNode. // //************************************************************************************ //</Snipppet9> public void InsertBookElement(XmlElement bookElement, string position, XmlNode selectedBook, bool validateNode, bool generateSchema) { XmlDocument doc = bookElement.OwnerDocument; string stringThatContainsNewline = bookElement.OuterXml; switch (position) { case Constants.positionTop: // Add newline characters and spaces to make XML more readable. XmlSignificantWhitespace sigWhiteSpace = doc.CreateSignificantWhitespace("\n "); doc.DocumentElement.InsertBefore(sigWhiteSpace, doc.DocumentElement.FirstChild); doc.DocumentElement.InsertAfter(bookElement, doc.DocumentElement.FirstChild); break; case Constants.positionBottom: // Add newline characters to make XML more readable. XmlWhitespace whitespace = doc.CreateWhitespace(" "); XmlNode appendedNode = doc.DocumentElement.AppendChild(bookElement); doc.DocumentElement.InsertBefore(whitespace, appendedNode); sigWhiteSpace = doc.CreateSignificantWhitespace("\n"); doc.DocumentElement.InsertAfter(sigWhiteSpace, appendedNode); break; case Constants.positionAbove: // Add newline characters to make XML more readable. XmlNode currNode = doc.DocumentElement.InsertBefore(bookElement, selectedBook); sigWhiteSpace = doc.CreateSignificantWhitespace("\n "); doc.DocumentElement.InsertAfter(sigWhiteSpace, currNode); break; case Constants.positionBelow: // Add newline characters to make XML more readable. sigWhiteSpace = doc.CreateSignificantWhitespace("\n "); XmlNode whiteSpaceNode = doc.DocumentElement.InsertAfter(sigWhiteSpace, selectedBook); doc.DocumentElement.InsertAfter(bookElement, whiteSpaceNode); break; default: doc.DocumentElement.AppendChild(bookElement); break; } if (validateNode) { validateXML(generateSchema, doc); } }
// Test the setting of whitespace values. public void TestXmlWhitespaceSetValue() { XmlWhitespace white = doc.CreateWhitespace(null); white.Value = String.Empty; white.Value = " \f\t\r\n\v"; white.Value = null; try { white.Value = "abc"; Fail("SetValue (1)"); } catch (ArgumentException) { // Success } }
public static void Main() { XmlDocument doc = new XmlDocument(); doc.LoadXml("<author>" + "<first-name>Eva</first-name>" + "<last-name>Corets</last-name>" + "</author>"); Console.WriteLine("InnerText before..."); Console.WriteLine(doc.DocumentElement.InnerText); // Add white space. XmlNode currNode = doc.DocumentElement; XmlWhitespace ws = doc.CreateWhitespace("\r\n"); currNode.InsertAfter(ws, currNode.FirstChild); Console.WriteLine(); Console.WriteLine("InnerText after..."); Console.WriteLine(doc.DocumentElement.InnerText); }
public static XmlElement EnsureXmlElement(this XmlElement xmlNode, string propName, string propValue) { var result = (XmlElement)xmlNode.GetNodes(propName).FirstOrDefault(); if (result == null) { result = xmlNode.OwnerDocument.CreateElement(propName); XmlWhitespace xmlSpace = CreateXmlWhitespace(xmlNode); if (xmlNode.LastChild.NodeType == XmlNodeType.Whitespace) { xmlNode.InsertBefore(xmlSpace, xmlNode.LastChild); xmlNode.InsertBefore(result, xmlNode.LastChild); } else { xmlNode.AppendChild(xmlSpace); xmlNode.AppendChild(result); } } result.InnerText = propValue; return(result); }
public void saveServersXml() { //starting saving process XmlDocument doc = new XmlDocument(); doc.PreserveWhitespace = true; doc.Load(AppContext.BaseDirectory + "\\config.xml"); XmlNodeList servXml = doc.GetElementsByTagName("Servers"); if (servXml.Count > 0) { doc.GetElementsByTagName("Servers").Item(0).RemoveAll(); foreach (var item in serversL) { string newnodest = @"<Server> <name>" + item.name + @"</name> <ip>" + item.ip + @"</ip> <mask>" + item.mask + @"</mask> <gateway>" + item.gateway + @"</gateway> <dns>" + item.dns + @"</dns> </Server>" ; XmlNode node = doc.CreateNode(XmlNodeType.Element, "Server", null); XmlDocument elem = new XmlDocument(); elem.LoadXml(newnodest); XmlNode newnode = elem.DocumentElement; node.InnerXml = newnode.InnerXml; XmlWhitespace ws = doc.CreateWhitespace("\r\n\t"); doc.GetElementsByTagName("Servers").Item(0).AppendChild(ws); doc.GetElementsByTagName("Servers").Item(0).AppendChild(node); } XmlWhitespace wse = doc.CreateWhitespace("\r\n"); doc.GetElementsByTagName("Servers").Item(0).AppendChild(wse); doc.PreserveWhitespace = true; doc.Save(AppContext.BaseDirectory + "\\config.xml"); } }
private void Stream(XmlWhitespace whitespace) { Data.Add(new ClassSeparatorData(typeof(XmlWhitespace))); }
static void importEntireWikipedia() { const ushort btreeNodeSize = 10000; Console.WriteLine(DateTime.Now.ToString() + ", start importing Wikipedia text"); //System.Xml.Schema.XmlSchema docSchema; //using (System.Xml.XmlTextReader schemaReader = new System.Xml.XmlTextReader("c:\\export-0_5.xsd")) //{ // docSchema = System.Xml.Schema.XmlSchema.Read(schemaReader, ValidationCallBack); // } int docCount = 0; using (SessionNoServer session = new SessionNoServer(s_systemDir, 5000, false, false, CacheEnum.No)) // turn of page and object caching { Console.WriteLine("Running with databases in directory: " + session.SystemDirectory); //GCSettings.LatencyMode = GCLatencyMode.Batch;// try to keep the WeakIOptimizedPersistableReference objects around longer Placement documentPlacement = new Placement(Document.PlaceInDatabase, 1003, 1, 500, 1000, false, false, 1000, false); Placement contentPlacement = new Placement(Document.PlaceInDatabase, 1, 1, 500, UInt16.MaxValue, false, false, 1, false); XmlComment xmlComment; XmlElement xmlElement; XmlEntity xmlEntity; XmlText xmlText; XmlWhitespace xmlWhitespace; session.BeginUpdate(); File.Copy(s_licenseDbFile, System.IO.Path.Combine(session.SystemDirectory, "4.odb"), true); // register all database schema classes used by the application in advance to avoid lock conflict later in parallell indexing session.RegisterClass(typeof(Repository)); session.RegisterClass(typeof(IndexRoot)); session.RegisterClass(typeof(Document)); session.RegisterClass(typeof(Lexicon)); session.RegisterClass(typeof(DocumentText)); session.RegisterClass(typeof(Word)); session.RegisterClass(typeof(WordGlobal)); session.RegisterClass(typeof(WordHit)); session.RegisterClass(typeof(BTreeSet<Document>)); session.RegisterClass(typeof(OidShort)); session.RegisterClass(typeof(BTreeMap<Word, WordHit>)); session.RegisterClass(typeof(HashCodeComparer<Word>)); session.RegisterClass(typeof(BTreeSetOidShort<Word>)); session.RegisterClass(typeof(BTreeMapOidShort<Word, WordHit>)); Database db = session.OpenDatabase(IndexRoot.PlaceInDatabase, false, false); if (db != null) { outputSomeInfo(session); session.Abort(); return; } session.NewDatabase(IndexRoot.PlaceInDatabase, 0, "IndexRoot"); session.NewDatabase(Lexicon.PlaceInDatabase, 0, "Lexicon"); session.NewDatabase(Repository.PlaceInDatabase, 0, "Repository"); for (UInt32 i = 40; i <= 186; i++) { session.NewDatabase(i, 512, "Document"); // pre allocate 146 Document databases presized to 512MB each } //session.SetTraceDbActivity(Lexicon.PlaceInDatabase); //session.SetTraceAllDbActivity(); XmlDocument xmlDocument = new XmlDocument("enwiki-latest-pages-articles.xml"); IndexRoot indexRoot = new IndexRoot(btreeNodeSize, session); indexRoot.Persist(session, indexRoot, true); Document doc = null; bool titleElement = false; bool pageText = false; UInt32 currentDocumentDatabaseNum = documentPlacement.StartDatabaseNumber; using (FileStream fs = new FileStream(s_wikipediaXmlFile, FileMode.Open)) { //using (GZipStream zipStream = new GZipStream(fs, CompressionMode.Decompress)) // if input was a .gz file { using (System.Xml.XmlTextReader textReader = new System.Xml.XmlTextReader(fs)) { while (textReader.Read()) { System.Xml.XmlNodeType nodeType = textReader.NodeType; switch (nodeType) { case System.Xml.XmlNodeType.Attribute: break; case System.Xml.XmlNodeType.CDATA: break; case System.Xml.XmlNodeType.Comment: xmlComment = new XmlComment(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.Document: break; case System.Xml.XmlNodeType.DocumentFragment: break; case System.Xml.XmlNodeType.DocumentType: break; case System.Xml.XmlNodeType.Element: xmlElement = new XmlElement(textReader.Prefix, textReader.LocalName, textReader.NamespaceURI, xmlDocument); if (textReader.LocalName == "title") titleElement = true; else if (textReader.LocalName == "text") pageText = true; break; case System.Xml.XmlNodeType.EndElement: if (textReader.LocalName == "title" && doc != null) titleElement = false; else if (textReader.LocalName == "text" && doc != null) pageText = false; break; case System.Xml.XmlNodeType.EndEntity: break; case System.Xml.XmlNodeType.Entity: xmlEntity = new XmlEntity(textReader.LocalName, xmlDocument); break; case System.Xml.XmlNodeType.EntityReference: break; case System.Xml.XmlNodeType.None: break; case System.Xml.XmlNodeType.Notation: break; case System.Xml.XmlNodeType.ProcessingInstruction: break; case System.Xml.XmlNodeType.SignificantWhitespace: break; case System.Xml.XmlNodeType.Text: xmlText = new XmlText(textReader.Value, xmlDocument); if (titleElement) { doc = new Document(textReader.Value, indexRoot, session); doc.Persist(documentPlacement, session, true); if (doc.DatabaseNumber != currentDocumentDatabaseNum) { session.FlushUpdates(session.OpenDatabase(currentDocumentDatabaseNum)); Console.WriteLine("Database: " + currentDocumentDatabaseNum +" is completed, done importing article " + docCount + " number of lines: " + textReader.LineNumber); currentDocumentDatabaseNum = doc.DatabaseNumber; } //doc.Page.Database.Name = doc.Name; } else if (doc != null && pageText) { #if DEBUGx Console.WriteLine(doc.Name + " line: " + textReader.LineNumber); #endif //if (textReader.LineNumber > 1000000) //{ // session.Commit(); // return; //} DocumentText content = new DocumentText(textReader.Value, doc); if (doc.DatabaseNumber != contentPlacement.TryDatabaseNumber) contentPlacement = new Placement(doc.DatabaseNumber, (ushort)contentPlacement.StartPageNumber, 1, contentPlacement.MaxObjectsPerPage, contentPlacement.MaxPagesPerDatabase, false, false, 1, false); content.Persist(contentPlacement, session, false); Debug.Assert(content.DatabaseNumber == doc.DatabaseNumber); doc.Content = content; indexRoot.repository.documentSet.AddFast(doc); if (++docCount % 1000000 == 0) { //session.Commit(false); // skip recovery check, we do it in BeginUpdate which is enough Console.WriteLine("Done importing article " + docCount + " number of lines: " + textReader.LineNumber); //session.BeginUpdate(); } } break; case System.Xml.XmlNodeType.Whitespace: xmlWhitespace = new XmlWhitespace(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.XmlDeclaration: break; }; } Console.WriteLine("Finished importing article " + docCount + " number of lines: " + textReader.LineNumber); } } } session.Commit(); } Console.WriteLine(DateTime.Now.ToString() + ", done importing Wikipedia text"); }
private void Stream(XmlWhitespace wSpace) { MDataObjs.Add(new ClassSeparator(typeof(XmlWhitespace))); // no data to display at this level }
private void button1_Click_1(object sender, EventArgs e) { for (int i = 0; i < confList.Count; i++) { switch (confList.ElementAt(i).Key) { case "Server": confList["Server"] = textBox1.Text; break; case "Username": confList["Username"] = textBox2.Text; break; case "Password": confList["Password"] = textBox3.Text; break; case "ClientF": confList["ClientF"] = textBox4.Text; break; case "Service": if (checkBox2.Checked) { confList["Service"] = "1"; } else { confList["Service"] = "0"; } break; case "IntH": confList["IntH"] = textBox5.Text; break; case "Fpath": confList["Fpath"] = textBox9.Text; break; case "IntM": confList["IntM"] = textBox6.Text; break; case "IntS": confList["IntS"] = textBox7.Text; break; case "Cleanup": confList["Cleanup"] = textBox10.Text; break; case "Keep": confList["Keep"] = textBox11.Text; break; case "ClName": confList["ClName"] = textBox13.Text; break; case "Level": if (radioButton3.Checked) { confList["Level"] = "0"; } else { confList["Level"] = "1"; } break; default: break; } } XmlDocument doc = new XmlDocument(); doc.PreserveWhitespace = true; doc.Load(AppContext.BaseDirectory + "\\config.xml"); XmlNodeList setings = doc.GetElementsByTagName("appSettings"); if (setings.Count > 0) { doc.GetElementsByTagName("appSettings").Item(0).RemoveAll(); } foreach (var elem in confList) { XmlNode newnode = doc.CreateNode(XmlNodeType.Element, "add", null); //Create a new attribute XmlAttribute attr1 = doc.CreateAttribute("key"); attr1.Value = elem.Key; XmlAttribute attr2 = doc.CreateAttribute("value"); attr2.Value = elem.Value; //Add the attribute to the node newnode.Attributes.SetNamedItem(attr1); newnode.Attributes.SetNamedItem(attr2); XmlWhitespace ws = doc.CreateWhitespace("\r\n\t"); doc.GetElementsByTagName("appSettings").Item(0).AppendChild(ws); doc.GetElementsByTagName("appSettings").Item(0).AppendChild(newnode); } XmlWhitespace wse = doc.CreateWhitespace("\r\n"); doc.GetElementsByTagName("appSettings").Item(0).AppendChild(wse); doc.PreserveWhitespace = true; doc.Save(AppContext.BaseDirectory + "\\config.xml"); logs = "All params were saved..."; textBox8.Text += logs + Environment.NewLine; saveLog(logs); logs = ""; button4.Enabled = true; timer3.Interval = Convert.ToInt32(confList["Cleanup"]) * 60000; }
public void XmlWhitespaceBadConstructor() { broken = document.CreateWhitespace("black"); }
public void InnerAndOuterXml() { whitespace = doc2.CreateWhitespace("\r\n\t "); Assert.AreEqual(String.Empty, whitespace.InnerXml); Assert.AreEqual("\r\n\t ", whitespace.OuterXml); }
static void ImportEntireWikipedia() { const ushort btreeNodeSize = 10000; Console.WriteLine(DateTime.Now.ToString() + ", start importing Wikipedia text"); //System.Xml.Schema.XmlSchema docSchema; //using (System.Xml.XmlTextReader schemaReader = new System.Xml.XmlTextReader("c:\\export-0_5.xsd")) //{ // docSchema = System.Xml.Schema.XmlSchema.Read(schemaReader, ValidationCallBack); // } int docCount = 0; using (SessionNoServer session = new SessionNoServer(s_systemDir, 5000, false, false, CacheEnum.No)) // turn of page and object caching { Console.WriteLine($"Running with databases in directory: {session.SystemDirectory}"); //GCSettings.LatencyMode = GCLatencyMode.Batch;// try to keep the WeakIOptimizedPersistableReference objects around longer XmlComment xmlComment; XmlElement xmlElement; XmlEntity xmlEntity; XmlText xmlText; XmlWhitespace xmlWhitespace; session.BeginUpdate(); // register all database schema classes used by the application in advance to avoid lock conflict later in parallel indexing Database db = session.OpenDatabase(IndexRoot.PlaceInDatabase, false, false); if (db != null) { outputSomeInfo(session); session.Abort(); return; } //session.SetTraceDbActivity(Lexicon.PlaceInDatabase); //session.SetTraceAllDbActivity(); XmlDocument xmlDocument = new XmlDocument("enwiki-latest-pages-articles.xml"); IndexRoot indexRoot = new IndexRoot(btreeNodeSize, session); indexRoot.Persist(session, indexRoot, true); UInt32 currentDocumentDatabaseNum = 0; Document doc = null; bool titleElement = false; bool pageText = false; using (FileStream fs = new FileStream(s_wikipediaXmlFile, FileMode.Open)) { //using (GZipStream zipStream = new GZipStream(fs, CompressionMode.Decompress)) // if input was a .gz file { using (System.Xml.XmlTextReader textReader = new System.Xml.XmlTextReader(fs)) { while (textReader.Read()) { System.Xml.XmlNodeType nodeType = textReader.NodeType; switch (nodeType) { case System.Xml.XmlNodeType.Attribute: break; case System.Xml.XmlNodeType.CDATA: break; case System.Xml.XmlNodeType.Comment: xmlComment = new XmlComment(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.Document: break; case System.Xml.XmlNodeType.DocumentFragment: break; case System.Xml.XmlNodeType.DocumentType: break; case System.Xml.XmlNodeType.Element: xmlElement = new XmlElement(textReader.Prefix, textReader.LocalName, textReader.NamespaceURI, xmlDocument); if (textReader.LocalName == "title") { titleElement = true; } else if (textReader.LocalName == "text") { pageText = true; } break; case System.Xml.XmlNodeType.EndElement: if (textReader.LocalName == "title" && doc != null) { titleElement = false; } else if (textReader.LocalName == "text" && doc != null) { pageText = false; } break; case System.Xml.XmlNodeType.EndEntity: break; case System.Xml.XmlNodeType.Entity: xmlEntity = new XmlEntity(textReader.LocalName, xmlDocument); break; case System.Xml.XmlNodeType.EntityReference: break; case System.Xml.XmlNodeType.None: break; case System.Xml.XmlNodeType.Notation: break; case System.Xml.XmlNodeType.ProcessingInstruction: break; case System.Xml.XmlNodeType.SignificantWhitespace: break; case System.Xml.XmlNodeType.Text: xmlText = new XmlText(textReader.Value, xmlDocument); if (titleElement) { doc = new Document(textReader.Value, indexRoot, session); session.Persist(doc); if (doc.DatabaseNumber != currentDocumentDatabaseNum) { if (currentDocumentDatabaseNum > 0) { session.FlushUpdates(); Console.WriteLine("Database: " + currentDocumentDatabaseNum + " is completed, done importing article " + docCount + " number of lines: " + textReader.LineNumber); } currentDocumentDatabaseNum = doc.DatabaseNumber; } //doc.Page.Database.Name = doc.Name; } else if (doc != null && pageText) { #if DEBUGx Console.WriteLine(doc.Name + " line: " + textReader.LineNumber); #endif //if (textReader.LineNumber > 1000000) //{ // session.Commit(); // return; //} DocumentText content = new DocumentText(textReader.Value, doc); session.Persist(content, 10000); doc.Content = content; indexRoot.Repository.DocumentSet.AddFast(doc); if (++docCount % 1000000 == 0) { //session.Commit(false); // skip recovery check, we do it in BeginUpdate which is enough Console.WriteLine("Done importing article " + docCount + " number of lines: " + textReader.LineNumber); //session.BeginUpdate(); } } break; case System.Xml.XmlNodeType.Whitespace: xmlWhitespace = new XmlWhitespace(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.XmlDeclaration: break; } ; } Console.WriteLine("Finished importing article " + docCount + " number of lines: " + textReader.LineNumber); } } } session.Commit(); } Console.WriteLine(DateTime.Now.ToString() + ", done importing Wikipedia text"); }
public static void FormatElement(SvgElement element) { if (element == null) { return; } XmlNode node1 = element; while ((node1.ParentNode != null) && !(node1.ParentNode is XmlDocument)) { node1 = node1.ParentNode; } if (node1 != element.OwnerDocument.DocumentElement) { return; } SvgDocument document1 = element.OwnerDocument; bool flag1 = document1.AcceptChanges; document1.AcceptChanges = false; int num1 = element.NodeDepth; XmlNode node2 = element.PreviousSibling; while (((node2 != null) && !(node2 is SvgElement)) && (!(element.ParentNode is Text) || !(node2 is XmlText))) { XmlNode node3 = node2.PreviousSibling; element.ParentNode.RemoveChild(node2); node2 = node3; } node2 = element.PreviousSibling; XmlWhitespace whitespace1 = document1.CreateWhitespace("\n"); if (node2 == null) { element.ParentNode.PrependChild(whitespace1); } else { element.ParentNode.InsertAfter(whitespace1, node2); } node2 = whitespace1; for (int num2 = 0; num2 < num1; num2++) { whitespace1 = document1.CreateWhitespace("\t"); element.ParentNode.InsertAfter(whitespace1, node2); node2 = whitespace1; } bool flag2 = false; node2 = element.NextSibling; Label_0122: if (node2 is XmlWhitespace) { if (((XmlWhitespace)node2).Value.IndexOf("\n") >= 0) { flag2 = true; } else { node2 = node2.NextSibling; goto Label_0122; } } if (!flag2) { whitespace1 = document1.CreateWhitespace("\n"); element.ParentNode.InsertAfter(whitespace1, element); } if (whitespace1 == element.ParentNode.LastChild) { num1 = ((SvgElement)element.ParentNode).NodeDepth; node2 = element.ParentNode.LastChild; for (int num3 = 0; num3 < num1; num3++) { whitespace1 = document1.CreateWhitespace("\t"); element.ParentNode.InsertAfter(whitespace1, node2); node2 = whitespace1; } } document1.AcceptChanges = flag1; foreach (XmlNode node4 in element.ChildNodes) { if (node4 is SvgElement) { CodeFunc.FormatElement((SvgElement)node4); } } }
static void importEntireWikipedia() { const ushort btreeNodeSize = 10000; Console.WriteLine(DateTime.Now.ToString() + ", start importing Wikipedia text"); //System.Xml.Schema.XmlSchema docSchema; //using (System.Xml.XmlTextReader schemaReader = new System.Xml.XmlTextReader("c:\\export-0_5.xsd")) //{ // docSchema = System.Xml.Schema.XmlSchema.Read(schemaReader, ValidationCallBack); // } int docCount = 0; using (SessionNoServer session = new SessionNoServer(s_systemDir, 5000, false, false, CacheEnum.No)) // turn of page and object caching { Console.WriteLine("Running with databases in directory: " + session.SystemDirectory); //GCSettings.LatencyMode = GCLatencyMode.Batch;// try to keep the WeakIOptimizedPersistableReference objects around longer Placement documentPlacement = new Placement(Document.PlaceInDatabase, 1003, 1, 500, 1000, false, false, 1000, false); Placement contentPlacement = new Placement(Document.PlaceInDatabase, 1, 1, 500, UInt16.MaxValue, false, false, 1, false); XmlComment xmlComment; XmlElement xmlElement; XmlEntity xmlEntity; XmlText xmlText; XmlWhitespace xmlWhitespace; session.BeginUpdate(); File.Copy(s_licenseDbFile, System.IO.Path.Combine(session.SystemDirectory, "4.odb"), true); // register all database schema classes used by the application in advance to avoid lock conflict later in parallell indexing session.RegisterClass(typeof(Repository)); session.RegisterClass(typeof(IndexRoot)); session.RegisterClass(typeof(Document)); session.RegisterClass(typeof(Lexicon)); session.RegisterClass(typeof(DocumentText)); session.RegisterClass(typeof(Word)); session.RegisterClass(typeof(WordGlobal)); session.RegisterClass(typeof(WordHit)); session.RegisterClass(typeof(BTreeSet <Document>)); session.RegisterClass(typeof(OidShort)); session.RegisterClass(typeof(BTreeMap <Word, WordHit>)); session.RegisterClass(typeof(HashCodeComparer <Word>)); session.RegisterClass(typeof(BTreeSetOidShort <Word>)); session.RegisterClass(typeof(BTreeMapOidShort <Word, WordHit>)); Database db = session.OpenDatabase(IndexRoot.PlaceInDatabase, false, false); if (db != null) { outputSomeInfo(session); session.Abort(); return; } session.NewDatabase(IndexRoot.PlaceInDatabase, 0, "IndexRoot"); session.NewDatabase(Lexicon.PlaceInDatabase, 0, "Lexicon"); session.NewDatabase(Repository.PlaceInDatabase, 0, "Repository"); for (UInt32 i = 40; i <= 186; i++) { session.NewDatabase(i, 512, "Document"); // pre allocate 146 Document databases presized to 512MB each } //session.SetTraceDbActivity(Lexicon.PlaceInDatabase); //session.SetTraceAllDbActivity(); XmlDocument xmlDocument = new XmlDocument("enwiki-latest-pages-articles.xml"); IndexRoot indexRoot = new IndexRoot(btreeNodeSize, session); indexRoot.Persist(session, indexRoot, true); Document doc = null; bool titleElement = false; bool pageText = false; UInt32 currentDocumentDatabaseNum = documentPlacement.StartDatabaseNumber; using (FileStream fs = new FileStream(s_wikipediaXmlFile, FileMode.Open)) { //using (GZipStream zipStream = new GZipStream(fs, CompressionMode.Decompress)) // if input was a .gz file { using (System.Xml.XmlTextReader textReader = new System.Xml.XmlTextReader(fs)) { while (textReader.Read()) { System.Xml.XmlNodeType nodeType = textReader.NodeType; switch (nodeType) { case System.Xml.XmlNodeType.Attribute: break; case System.Xml.XmlNodeType.CDATA: break; case System.Xml.XmlNodeType.Comment: xmlComment = new XmlComment(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.Document: break; case System.Xml.XmlNodeType.DocumentFragment: break; case System.Xml.XmlNodeType.DocumentType: break; case System.Xml.XmlNodeType.Element: xmlElement = new XmlElement(textReader.Prefix, textReader.LocalName, textReader.NamespaceURI, xmlDocument); if (textReader.LocalName == "title") { titleElement = true; } else if (textReader.LocalName == "text") { pageText = true; } break; case System.Xml.XmlNodeType.EndElement: if (textReader.LocalName == "title" && doc != null) { titleElement = false; } else if (textReader.LocalName == "text" && doc != null) { pageText = false; } break; case System.Xml.XmlNodeType.EndEntity: break; case System.Xml.XmlNodeType.Entity: xmlEntity = new XmlEntity(textReader.LocalName, xmlDocument); break; case System.Xml.XmlNodeType.EntityReference: break; case System.Xml.XmlNodeType.None: break; case System.Xml.XmlNodeType.Notation: break; case System.Xml.XmlNodeType.ProcessingInstruction: break; case System.Xml.XmlNodeType.SignificantWhitespace: break; case System.Xml.XmlNodeType.Text: xmlText = new XmlText(textReader.Value, xmlDocument); if (titleElement) { doc = new Document(textReader.Value, indexRoot, session); doc.Persist(documentPlacement, session, true); if (doc.DatabaseNumber != currentDocumentDatabaseNum) { session.FlushUpdates(session.OpenDatabase(currentDocumentDatabaseNum)); Console.WriteLine("Database: " + currentDocumentDatabaseNum + " is completed, done importing article " + docCount + " number of lines: " + textReader.LineNumber); currentDocumentDatabaseNum = doc.DatabaseNumber; } //doc.Page.Database.Name = doc.Name; } else if (doc != null && pageText) { #if DEBUGx Console.WriteLine(doc.Name + " line: " + textReader.LineNumber); #endif //if (textReader.LineNumber > 1000000) //{ // session.Commit(); // return; //} DocumentText content = new DocumentText(textReader.Value, doc); if (doc.DatabaseNumber != contentPlacement.TryDatabaseNumber) { contentPlacement = new Placement(doc.DatabaseNumber, (ushort)contentPlacement.StartPageNumber, 1, contentPlacement.MaxObjectsPerPage, contentPlacement.MaxPagesPerDatabase, false, false, 1, false); } content.Persist(contentPlacement, session, false); Debug.Assert(content.DatabaseNumber == doc.DatabaseNumber); doc.Content = content; indexRoot.repository.documentSet.AddFast(doc); if (++docCount % 1000000 == 0) { //session.Commit(false); // skip recovery check, we do it in BeginUpdate which is enough Console.WriteLine("Done importing article " + docCount + " number of lines: " + textReader.LineNumber); //session.BeginUpdate(); } } break; case System.Xml.XmlNodeType.Whitespace: xmlWhitespace = new XmlWhitespace(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.XmlDeclaration: break; } ; } Console.WriteLine("Finished importing article " + docCount + " number of lines: " + textReader.LineNumber); } } } session.Commit(); } Console.WriteLine(DateTime.Now.ToString() + ", done importing Wikipedia text"); }
void ValidateWhitespace(XmlWhitespace w) { this.nsResolver.Context = w; validator.ValidateWhitespace(w.InnerText); }