public ElementDom(Interop.IHTMLDOMNode element, IHtmlEditor htmlEditor) { this.node = element; this.htmlEditor = htmlEditor; if (element.nodeType == 0) { nodeType = System.Xml.XmlNodeType.Attribute; } else { switch (element.nodeType) { case 1: nodeType = System.Xml.XmlNodeType.Element; break; case 3: nodeType = System.Xml.XmlNodeType.Text; break; default: nodeType = System.Xml.XmlNodeType.None; break; } } }
public ParserNode(string name, string value, System.Xml.XmlNodeType nodeType) { Name = name; Value = value; NodeType = nodeType; Attributes = new System.Collections.Generic.Dictionary <string, string>(); }
public static bool _CreateNode_System_Xml_XmlDocument_System_Xml_XmlNodeType_System_String_System_String_System_String( ) { //Parameters System.Xml.XmlNodeType type = null; System.String prefix = null; System.String name = null; System.String namespaceURI = null; //ReturnType/Value System.Xml.XmlNode returnVal_Real = null; System.Xml.XmlNode returnVal_Intercepted = null; //Exception Exception exception_Real = null; Exception exception_Intercepted = null; InterceptionMaintenance.disableInterception( ); try { returnValue_Real = System.Xml.XmlDocument.CreateNode(type, prefix, name, namespaceURI); } catch (Exception e) { exception_Real = e; } InterceptionMaintenance.enableInterception( ); try { returnValue_Intercepted = System.Xml.XmlDocument.CreateNode(type, prefix, name, namespaceURI); } catch (Exception e) { exception_Intercepted = e; } Return((exception_Real.Messsage == exception_Intercepted.Message) && (returnValue_Real == returnValue_Intercepted)); }
public static bool _MoveToContent_System_Xml_XmlReader( ) { //Parameters //ReturnType/Value System.Xml.XmlNodeType returnVal_Real = null; System.Xml.XmlNodeType returnVal_Intercepted = null; //Exception Exception exception_Real = null; Exception exception_Intercepted = null; InterceptionMaintenance.disableInterception( ); try { returnValue_Real = System.Xml.XmlReader.MoveToContent(); } catch (Exception e) { exception_Real = e; } InterceptionMaintenance.enableInterception( ); try { returnValue_Intercepted = System.Xml.XmlReader.MoveToContent(); } catch (Exception e) { exception_Intercepted = e; } Return((exception_Real.Messsage == exception_Intercepted.Message) && (returnValue_Real == returnValue_Intercepted)); }
public static bool _ctor_System_Xml_XmlTextReader_System_String_System_Xml_XmlNodeType_System_Xml_XmlParserContext( ) { //Parameters System.String xmlFragment = null; System.Xml.XmlNodeType fragType = null; System.Xml.XmlParserContext context = null; //Exception Exception exception_Real = null; Exception exception_Intercepted = null; InterceptionMaintenance.disableInterception( ); try { returnValue_Real = System.Xml.XmlTextReader.ctor(xmlFragment, fragType, context); } catch (Exception e) { exception_Real = e; } InterceptionMaintenance.enableInterception( ); try { returnValue_Intercepted = System.Xml.XmlTextReader.ctor(xmlFragment, fragType, context); } catch (Exception e) { exception_Intercepted = e; } }
protected bool IsTextNode(System.Xml.XmlNodeType nodeType) { return(default(bool)); }
public ParserNode(string name, System.Xml.XmlNodeType nodeType) : this(name, null, nodeType) { }
/// <summary> /// Protected constructor that initializes all the parameters. /// </summary> /// <param name="fullpath"></param> /// <param name="name"></param> /// <param name="datatype"></param> /// <param name="hasChildren"></param> /// <param name="children"></param> protected PDFDataItem(string fullpath, string relativepath, string name, string title, System.Xml.XmlNodeType nodeType, DataType datatype, PDFDataItemCollection children) { if (string.IsNullOrEmpty(fullpath)) { throw new ArgumentNullException("path"); } if (string.IsNullOrEmpty(name)) { throw new ArgumentNullException("name"); } this.FullPath = fullpath; this.RelativePath = relativepath; this.Name = name; this.Title = title; this._dataType = datatype; this.NodeType = nodeType; this.Children = children; }
// // .ctor // #region public PDFDataItem(string fullpath, string relativepath, string name, string title, System.Xml.XmlNodeType nodeType, Type datatype) /// <summary> /// Creates a new simple data item (with no children) /// </summary> /// <param name="fullpath"></param> /// <param name="name"></param> /// <param name="datatype"></param> public PDFDataItem(string fullpath, string relativepath, string name, string title, System.Xml.XmlNodeType nodeType, DataType datatype) : this(fullpath, relativepath, name, title, nodeType, datatype, null) { }
protected bool IsTextNode(System.Xml.XmlNodeType nodeType) { throw null; }
public virtual System.Xml.XmlNode CreateNode(System.Xml.XmlNodeType type, string prefix, string name, string namespaceURI) { throw null; }
public virtual System.Xml.XmlNode CreateNode(System.Xml.XmlNodeType type, string prefix, string name, string namespaceURI) { return(default(System.Xml.XmlNode)); }
static void importEntireWikipedia() { const ushort btreeNodeSize = 10000; Console.WriteLine(DateTime.Now.ToString() + ", start importing Wikipedia text"); //System.Xml.Schema.XmlSchema docSchema; //using (System.Xml.XmlTextReader schemaReader = new System.Xml.XmlTextReader("c:\\export-0_5.xsd")) //{ // docSchema = System.Xml.Schema.XmlSchema.Read(schemaReader, ValidationCallBack); // } int docCount = 0; using (SessionNoServer session = new SessionNoServer(s_systemDir, 5000, false, false, CacheEnum.No)) // turn of page and object caching { Console.WriteLine("Running with databases in directory: " + session.SystemDirectory); //GCSettings.LatencyMode = GCLatencyMode.Batch;// try to keep the WeakIOptimizedPersistableReference objects around longer Placement documentPlacement = new Placement(Document.PlaceInDatabase, 1003, 1, 500, 1000, false, false, 1000, false); Placement contentPlacement = new Placement(Document.PlaceInDatabase, 1, 1, 500, UInt16.MaxValue, false, false, 1, false); XmlComment xmlComment; XmlElement xmlElement; XmlEntity xmlEntity; XmlText xmlText; XmlWhitespace xmlWhitespace; session.BeginUpdate(); File.Copy(s_licenseDbFile, System.IO.Path.Combine(session.SystemDirectory, "4.odb"), true); // register all database schema classes used by the application in advance to avoid lock conflict later in parallell indexing session.RegisterClass(typeof(Repository)); session.RegisterClass(typeof(IndexRoot)); session.RegisterClass(typeof(Document)); session.RegisterClass(typeof(Lexicon)); session.RegisterClass(typeof(DocumentText)); session.RegisterClass(typeof(Word)); session.RegisterClass(typeof(WordGlobal)); session.RegisterClass(typeof(WordHit)); session.RegisterClass(typeof(BTreeSet <Document>)); session.RegisterClass(typeof(OidShort)); session.RegisterClass(typeof(BTreeMap <Word, WordHit>)); session.RegisterClass(typeof(HashCodeComparer <Word>)); session.RegisterClass(typeof(BTreeSetOidShort <Word>)); session.RegisterClass(typeof(BTreeMapOidShort <Word, WordHit>)); Database db = session.OpenDatabase(IndexRoot.PlaceInDatabase, false, false); if (db != null) { outputSomeInfo(session); session.Abort(); return; } session.NewDatabase(IndexRoot.PlaceInDatabase, 0, "IndexRoot"); session.NewDatabase(Lexicon.PlaceInDatabase, 0, "Lexicon"); session.NewDatabase(Repository.PlaceInDatabase, 0, "Repository"); for (UInt32 i = 40; i <= 186; i++) { session.NewDatabase(i, 512, "Document"); // pre allocate 146 Document databases presized to 512MB each } //session.SetTraceDbActivity(Lexicon.PlaceInDatabase); //session.SetTraceAllDbActivity(); XmlDocument xmlDocument = new XmlDocument("enwiki-latest-pages-articles.xml"); IndexRoot indexRoot = new IndexRoot(btreeNodeSize, session); indexRoot.Persist(session, indexRoot, true); Document doc = null; bool titleElement = false; bool pageText = false; UInt32 currentDocumentDatabaseNum = documentPlacement.StartDatabaseNumber; using (FileStream fs = new FileStream(s_wikipediaXmlFile, FileMode.Open)) { //using (GZipStream zipStream = new GZipStream(fs, CompressionMode.Decompress)) // if input was a .gz file { using (System.Xml.XmlTextReader textReader = new System.Xml.XmlTextReader(fs)) { while (textReader.Read()) { System.Xml.XmlNodeType nodeType = textReader.NodeType; switch (nodeType) { case System.Xml.XmlNodeType.Attribute: break; case System.Xml.XmlNodeType.CDATA: break; case System.Xml.XmlNodeType.Comment: xmlComment = new XmlComment(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.Document: break; case System.Xml.XmlNodeType.DocumentFragment: break; case System.Xml.XmlNodeType.DocumentType: break; case System.Xml.XmlNodeType.Element: xmlElement = new XmlElement(textReader.Prefix, textReader.LocalName, textReader.NamespaceURI, xmlDocument); if (textReader.LocalName == "title") { titleElement = true; } else if (textReader.LocalName == "text") { pageText = true; } break; case System.Xml.XmlNodeType.EndElement: if (textReader.LocalName == "title" && doc != null) { titleElement = false; } else if (textReader.LocalName == "text" && doc != null) { pageText = false; } break; case System.Xml.XmlNodeType.EndEntity: break; case System.Xml.XmlNodeType.Entity: xmlEntity = new XmlEntity(textReader.LocalName, xmlDocument); break; case System.Xml.XmlNodeType.EntityReference: break; case System.Xml.XmlNodeType.None: break; case System.Xml.XmlNodeType.Notation: break; case System.Xml.XmlNodeType.ProcessingInstruction: break; case System.Xml.XmlNodeType.SignificantWhitespace: break; case System.Xml.XmlNodeType.Text: xmlText = new XmlText(textReader.Value, xmlDocument); if (titleElement) { doc = new Document(textReader.Value, indexRoot, session); doc.Persist(documentPlacement, session, true); if (doc.DatabaseNumber != currentDocumentDatabaseNum) { session.FlushUpdates(session.OpenDatabase(currentDocumentDatabaseNum)); Console.WriteLine("Database: " + currentDocumentDatabaseNum + " is completed, done importing article " + docCount + " number of lines: " + textReader.LineNumber); currentDocumentDatabaseNum = doc.DatabaseNumber; } //doc.Page.Database.Name = doc.Name; } else if (doc != null && pageText) { #if DEBUGx Console.WriteLine(doc.Name + " line: " + textReader.LineNumber); #endif //if (textReader.LineNumber > 1000000) //{ // session.Commit(); // return; //} DocumentText content = new DocumentText(textReader.Value, doc); if (doc.DatabaseNumber != contentPlacement.TryDatabaseNumber) { contentPlacement = new Placement(doc.DatabaseNumber, (ushort)contentPlacement.StartPageNumber, 1, contentPlacement.MaxObjectsPerPage, contentPlacement.MaxPagesPerDatabase, false, false, 1, false); } content.Persist(contentPlacement, session, false); Debug.Assert(content.DatabaseNumber == doc.DatabaseNumber); doc.Content = content; indexRoot.repository.documentSet.AddFast(doc); if (++docCount % 1000000 == 0) { //session.Commit(false); // skip recovery check, we do it in BeginUpdate which is enough Console.WriteLine("Done importing article " + docCount + " number of lines: " + textReader.LineNumber); //session.BeginUpdate(); } } break; case System.Xml.XmlNodeType.Whitespace: xmlWhitespace = new XmlWhitespace(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.XmlDeclaration: break; } ; } Console.WriteLine("Finished importing article " + docCount + " number of lines: " + textReader.LineNumber); } } } session.Commit(); } Console.WriteLine(DateTime.Now.ToString() + ", done importing Wikipedia text"); }
public virtual System.Xml.XmlNode CreateNode(System.Xml.XmlNodeType type, string name, string namespaceURI) { }
static void ImportEntireWikipedia() { const ushort btreeNodeSize = 10000; Console.WriteLine(DateTime.Now.ToString() + ", start importing Wikipedia text"); //System.Xml.Schema.XmlSchema docSchema; //using (System.Xml.XmlTextReader schemaReader = new System.Xml.XmlTextReader("c:\\export-0_5.xsd")) //{ // docSchema = System.Xml.Schema.XmlSchema.Read(schemaReader, ValidationCallBack); // } int docCount = 0; using (SessionNoServer session = new SessionNoServer(s_systemDir, 5000, false, false, CacheEnum.No)) // turn of page and object caching { Console.WriteLine($"Running with databases in directory: {session.SystemDirectory}"); //GCSettings.LatencyMode = GCLatencyMode.Batch;// try to keep the WeakIOptimizedPersistableReference objects around longer XmlComment xmlComment; XmlElement xmlElement; XmlEntity xmlEntity; XmlText xmlText; XmlWhitespace xmlWhitespace; session.BeginUpdate(); // register all database schema classes used by the application in advance to avoid lock conflict later in parallel indexing Database db = session.OpenDatabase(IndexRoot.PlaceInDatabase, false, false); if (db != null) { outputSomeInfo(session); session.Abort(); return; } //session.SetTraceDbActivity(Lexicon.PlaceInDatabase); //session.SetTraceAllDbActivity(); XmlDocument xmlDocument = new XmlDocument("enwiki-latest-pages-articles.xml"); IndexRoot indexRoot = new IndexRoot(btreeNodeSize, session); indexRoot.Persist(session, indexRoot, true); UInt32 currentDocumentDatabaseNum = 0; Document doc = null; bool titleElement = false; bool pageText = false; using (FileStream fs = new FileStream(s_wikipediaXmlFile, FileMode.Open)) { //using (GZipStream zipStream = new GZipStream(fs, CompressionMode.Decompress)) // if input was a .gz file { using (System.Xml.XmlTextReader textReader = new System.Xml.XmlTextReader(fs)) { while (textReader.Read()) { System.Xml.XmlNodeType nodeType = textReader.NodeType; switch (nodeType) { case System.Xml.XmlNodeType.Attribute: break; case System.Xml.XmlNodeType.CDATA: break; case System.Xml.XmlNodeType.Comment: xmlComment = new XmlComment(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.Document: break; case System.Xml.XmlNodeType.DocumentFragment: break; case System.Xml.XmlNodeType.DocumentType: break; case System.Xml.XmlNodeType.Element: xmlElement = new XmlElement(textReader.Prefix, textReader.LocalName, textReader.NamespaceURI, xmlDocument); if (textReader.LocalName == "title") { titleElement = true; } else if (textReader.LocalName == "text") { pageText = true; } break; case System.Xml.XmlNodeType.EndElement: if (textReader.LocalName == "title" && doc != null) { titleElement = false; } else if (textReader.LocalName == "text" && doc != null) { pageText = false; } break; case System.Xml.XmlNodeType.EndEntity: break; case System.Xml.XmlNodeType.Entity: xmlEntity = new XmlEntity(textReader.LocalName, xmlDocument); break; case System.Xml.XmlNodeType.EntityReference: break; case System.Xml.XmlNodeType.None: break; case System.Xml.XmlNodeType.Notation: break; case System.Xml.XmlNodeType.ProcessingInstruction: break; case System.Xml.XmlNodeType.SignificantWhitespace: break; case System.Xml.XmlNodeType.Text: xmlText = new XmlText(textReader.Value, xmlDocument); if (titleElement) { doc = new Document(textReader.Value, indexRoot, session); session.Persist(doc); if (doc.DatabaseNumber != currentDocumentDatabaseNum) { if (currentDocumentDatabaseNum > 0) { session.FlushUpdates(); Console.WriteLine("Database: " + currentDocumentDatabaseNum + " is completed, done importing article " + docCount + " number of lines: " + textReader.LineNumber); } currentDocumentDatabaseNum = doc.DatabaseNumber; } //doc.Page.Database.Name = doc.Name; } else if (doc != null && pageText) { #if DEBUGx Console.WriteLine(doc.Name + " line: " + textReader.LineNumber); #endif //if (textReader.LineNumber > 1000000) //{ // session.Commit(); // return; //} DocumentText content = new DocumentText(textReader.Value, doc); session.Persist(content, 10000); doc.Content = content; indexRoot.Repository.DocumentSet.AddFast(doc); if (++docCount % 1000000 == 0) { //session.Commit(false); // skip recovery check, we do it in BeginUpdate which is enough Console.WriteLine("Done importing article " + docCount + " number of lines: " + textReader.LineNumber); //session.BeginUpdate(); } } break; case System.Xml.XmlNodeType.Whitespace: xmlWhitespace = new XmlWhitespace(textReader.Value, xmlDocument); break; case System.Xml.XmlNodeType.XmlDeclaration: break; } ; } Console.WriteLine("Finished importing article " + docCount + " number of lines: " + textReader.LineNumber); } } } session.Commit(); } Console.WriteLine(DateTime.Now.ToString() + ", done importing Wikipedia text"); }
public static ThirdPartyDescriptor DescriptorFromClientSideExtrnalCampaign(string content) { string start = "<external_campaign"; string end = "</external_campaign>"; int startIndex = content.IndexOf(start); int endIndex = content.IndexOf(end, startIndex); if ((startIndex == -1) || (endIndex == -1)) { return(null); } ThirdPartyDescriptor thirdPartyDescriptor = new ThirdPartyDescriptor(); int length = endIndex - startIndex + end.Length; content = content.Substring(startIndex, length); System.IO.StringReader stringReader = new System.IO.StringReader(content); System.Xml.XmlReaderSettings readerSettings = new System.Xml.XmlReaderSettings(); readerSettings.IgnoreWhitespace = true; readerSettings.IgnoreComments = true; readerSettings.IgnoreProcessingInstructions = true; System.Xml.XmlReader reader = System.Xml.XmlReader.Create(stringReader, readerSettings); Stack <string> elementStack = new Stack <string>(); Dictionary <string, string> elementAttributes = new Dictionary <string, string>(); string elementContent = null; reader.ReadToDescendant("external_campaign"); while (reader.Read()) { System.Xml.XmlNodeType nodeType = reader.NodeType; switch (nodeType) { case System.Xml.XmlNodeType.Element: elementStack.Push(reader.Name); elementAttributes.Clear(); elementContent = null; if (reader.HasAttributes) { for (int i = 0, c = reader.AttributeCount; i < c; ++i) { reader.MoveToAttribute(i); string attributeName = reader.Name; if (reader.ReadAttributeValue()) { string attributeValue = reader.Value; if (attributeValue != null) { elementAttributes[attributeName] = attributeValue; } } } } break; case System.Xml.XmlNodeType.EndElement: if (elementStack.Count == 0) { break; } string key = elementStack.Pop(); if (elementContent != null) { if (key == "param") { if (elementAttributes.TryGetValue("name", out key)) { thirdPartyDescriptor.campaignParams[key] = elementContent; } } else { thirdPartyDescriptor.properties[key] = elementContent; } } elementAttributes.Clear(); elementContent = null; break; case System.Xml.XmlNodeType.Text: case System.Xml.XmlNodeType.CDATA: elementContent = reader.Value; break; } } return(thirdPartyDescriptor); }