コード例 #1
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		public void LoadState(Tokenizer other)
		{
			strBufLen = other.strBufLen;
			if (strBufLen > strBuf.Length)
			{
				strBuf = new char[strBufLen];
			}
			//Array.Copy(other.strBuf, strBuf, strBufLen);
		    Buffer.BlockCopy(other.strBuf, 0, strBuf, 0, strBufLen << 1);
            
			longStrBufLen = other.longStrBufLen;
			if (longStrBufLen > longStrBuf.Length)
			{
				longStrBuf = new char[longStrBufLen];
			}
			//Array.Copy(other.longStrBuf, longStrBuf, longStrBufLen);
            Buffer.BlockCopy(other.longStrBuf, 0, longStrBuf, 0,longStrBufLen<< 1);

			stateSave = other.stateSave;
			returnStateSave = other.returnStateSave;
			endTagExpectation = other.endTagExpectation;
			endTagExpectationAsArray = other.endTagExpectationAsArray;
			// line = 1; XXX line numbers
			lastCR = other.lastCR;
			index = other.index;
			forceQuirks = other.forceQuirks;
			additional = other.additional;
			entCol = other.entCol;
			firstCharKey = other.firstCharKey;
			lo = other.lo;
			hi = other.hi;
			candidate = other.candidate;
			strBufMark = other.strBufMark;
			prevValue = other.prevValue;
			value = other.value;
			seenDigits = other.seenDigits;
			endTag = other.endTag;
			shouldSuspend = false;

			if (other.doctypeName == null)
			{
				doctypeName = null;
			}
			else
			{
				doctypeName = other.doctypeName;
			}

			if (other.systemIdentifier == null)
			{
				systemIdentifier = null;
			}
			else
			{
				systemIdentifier = other.systemIdentifier;
			}

			if (other.publicIdentifier == null)
			{
				publicIdentifier = null;
			}
			else
			{
				publicIdentifier = other.publicIdentifier;
			}

			if (other.tagName == null)
			{
				tagName = null;
			}
			else
			{
				tagName = other.tagName.CloneElementName();
			}

			if (other.attributeName == null)
			{
				attributeName = null;
			}
			else
			{
				attributeName = other.attributeName.CloneAttributeName();
			}
            
			if (other.attributes == null)
			{
				attributes = null;
			}
			else
			{
				attributes = other.attributes.CloneAttributes();
			}
		}
コード例 #2
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		public void End()
		{
			strBuf = null;
			longStrBuf = null;
			doctypeName = null;
			systemIdentifier = null;
			publicIdentifier = null;
			tagName = null;
			attributeName = null;
			TokenHandler.EndTokenization();
			if (attributes != null)
			{
				attributes.Clear(mappingLangToXmlLang);
				attributes = null;
			}
		}
コード例 #3
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		public void ResetToDataState()
		{
			strBufLen = 0;
			longStrBufLen = 0;
            stateSave = TokenizerState.DATA;
			// line = 1; XXX line numbers
			lastCR = false;
			index = 0;
			forceQuirks = false;
			additional = '\u0000';
			entCol = -1;
			firstCharKey = -1;
			lo = 0;
			hi = 0; // will always be overwritten before use anyway
			candidate = -1;
			strBufMark = 0;
			prevValue = -1;
			value = 0;
			seenDigits = false;
			endTag = false;
            // Removed J. Treworgy 12/7/2012 - this should remain true so the parser can choose to abort 
			//shouldSuspend = false;
			InitDoctypeFields();
			if (tagName != null)
			{
				tagName = null;
			}
			if (attributeName != null)
			{
				attributeName = null;
			}
			// [NOCPP[
			if (newAttributesEachTime)
			{
				// ]NOCPP]
				if (attributes != null)
				{
					attributes = null;
				}
				// [NOCPP[
			}
			// ]NOCPP]
		}
コード例 #4
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		public Tokenizer(ITokenHandler tokenHandler, bool newAttributesEachTime)
		{
			this.TokenHandler = tokenHandler;
			this.newAttributesEachTime = newAttributesEachTime;
			this.bmpChar = new char[1];
			this.astralChar = new char[2];
			this.tagName = null;
			this.attributeName = null;
			this.doctypeName = null;
			this.publicIdentifier = null;
			this.systemIdentifier = null;
			this.attributes = null;
		}
コード例 #5
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		// ]NOCPP]

		/**
		 * The constructor.
		 * 
		 * @param tokenHandler
		 *            the handler for receiving tokens
		 */
		public Tokenizer(ITokenHandler tokenHandler)
		{
			this.TokenHandler = tokenHandler;
			// [NOCPP[
			this.newAttributesEachTime = false;
			// ]NOCPP]
			this.bmpChar = new char[1];
			this.astralChar = new char[2];
			this.tagName = null;
			this.attributeName = null;
			this.doctypeName = null;
			this.publicIdentifier = null;
			this.systemIdentifier = null;
			this.attributes = null;
		}
コード例 #6
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		private void AddAttributeWithValue()
		{
			// [NOCPP[
			if (metaBoundaryPassed && ElementName.META == tagName
					&& AttributeName.CHARSET == attributeName)
			{
				Err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
			}
			// ]NOCPP]
			if (attributeName != null)
			{
				String val = LongStrBufToString(); // Ownership transferred to
				// HtmlAttributes

				// [NOCPP[
				if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
						&& attributeName.IsCaseFolded)
				{
					val = NewAsciiLowerCaseStringFromString(val);
				}
				// ]NOCPP]
				attributes.AddAttribute(attributeName, val
					// [NOCPP[
						, xmlnsPolicy
					// ]NOCPP]
				);
				attributeName = null; // attributeName has been adopted by the
				// |attributes| object
			}
		}
コード例 #7
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		private void AddAttributeWithoutValue()
		{
			NoteAttributeWithoutValue();

			// [NOCPP[
			if (metaBoundaryPassed && AttributeName.CHARSET == attributeName
					&& ElementName.META == tagName)
			{
				Err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
			}
			// ]NOCPP]
			if (attributeName != null)
			{
				// [NOCPP[
				if (html4)
				{
					if (attributeName.IsBoolean)
					{
						if (html4ModeCompatibleWithXhtml1Schemata)
						{
							attributes.AddAttribute(attributeName,
									attributeName.GetLocal(AttributeName.HTML),
									xmlnsPolicy);
						}
						else
						{
							attributes.AddAttribute(attributeName, "", xmlnsPolicy);
						}
					}
					else
					{
						if (AttributeName.BORDER != attributeName)
						{
							Err("Attribute value omitted for a non-bool attribute. (HTML4-only error.)");
							attributes.AddAttribute(attributeName, "", xmlnsPolicy);
						}
					}
				}
				else
				{
					if (AttributeName.SRC == attributeName
							|| AttributeName.HREF == attributeName)
					{
						Warn("Attribute \u201C"
								+ attributeName.GetLocal(AttributeName.HTML)
								+ "\u201D without an explicit value seen. The attribute may be dropped by IE7.");
					}
					// ]NOCPP]
					attributes.AddAttribute(attributeName,
							String.Empty
						// [NOCPP[
							, xmlnsPolicy
						// ]NOCPP]
					);
					// [NOCPP[
				}
				// ]NOCPP]
				attributeName = null; // attributeName has been adopted by the
				// |attributes| object
			}
		}
コード例 #8
0
ファイル: Tokenizer.cs プロジェクト: antrampa/HtmlParserSharp
		private void AttributeNameComplete()
		{
			// if (strBufOffset != -1) {
			// attributeName = AttributeName.nameByBuffer(buf, strBufOffset,
			// strBufLen, namePolicy != XmlViolationPolicy.ALLOW);
			// } else {
			attributeName = AttributeName.NameByBuffer(strBuf, 0, strBufLen
				// [NOCPP[
					, namePolicy != XmlViolationPolicy.Allow
				// ]NOCPP]
					);
			// }

			if (attributes == null)
			{
				attributes = new HtmlAttributes(mappingLangToXmlLang);
			}

			/*
			 * When the user agent leaves the attribute name state (and before
			 * emitting the tag token, if appropriate), the complete attribute's
			 * name must be compared to the other attributes on the same token; if
			 * there is already an attribute on the token with the exact same name,
			 * then this is a parse error and the new attribute must be dropped,
			 * along with the value that gets associated with it (if any).
			 */
			if (attributes.Contains(attributeName))
			{
				ErrDuplicateAttribute();
				attributeName = null;
			}
		}
コード例 #9
0
 internal bool EqualsAnother(AttributeName another)
 {
     return this.GetLocal(AttributeName.HTML) == another.GetLocal(AttributeName.HTML);
 }
コード例 #10
0
ファイル: Tokenizer.cs プロジェクト: prepare/WebParser
 public void End()
 {
     this.strBuffer = null;
     this.longStrBuffer.Length = 0;
     this.longStrBuffer = null;
     doctypeName = null;
     systemIdentifier = null;
     publicIdentifier = null;
     tagName = null;
     attributeName = null;
     TokenListener.EndTokenization();
     if (attributes != null)
     {
         attributes.Clear(mappingLangToXmlLang);
         attributes = null;
     }
 }