public void Load(string filePath) { FileName = Path.GetFileName(filePath); FilePath = Path.GetFullPath(filePath); using (FileStream fs = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { using (BufferedStream bs = new BufferedStream(fs)) { using (StreamReader sr = new StreamReader(bs)) { bool insideBody = false; while (sr.Peek() >= 0) { string tag = string.Empty; if (!insideBody) { do { tag = StreamReaderExtensions.ReadUntil(sr, '>'); if (tag.ToLower().Contains("<body")) { insideBody = true; } } while (!insideBody); } ParseDataItem dataItem = new ParseDataItem(sr, this, null); if (dataItem.HasData) { AddItem(dataItem); } } sr.Dispose(); } bs.Dispose(); } fs.Dispose(); } }
public ParseDataItem(StreamReader sr, ParseDocument parentDocument, ParseDataItem parentDataItem) { ParentDocument = parentDocument; ParentDataItem = parentDataItem; string nextTag = string.Empty; int associatedChildCount = 0; Stack <string> tagStack = new Stack <string>(); do { while (sr.Peek() > 0) { char firstChar; if (string.IsNullOrEmpty(nextTag)) { firstChar = (char)sr.Read(); while (sr.Peek() > 0 && char.IsControl(firstChar)) { firstChar = (char)sr.Read(); } } else { firstChar = nextTag[0]; } string toTest = string.Empty; string toTestNode = firstChar.Equals('/') ? nextTag : string.Empty; if (!string.IsNullOrEmpty(toTestNode)) { if (toTestNode.Contains(" ") || toTestNode.StartsWith("br")) { toTest = toTestNode.Substring(toTestNode.StartsWith("/") ? 1 : 0, toTestNode.StartsWith("/") ? toTestNode.IndexOf(" ") - 1 : toTestNode.IndexOf(" ")).Trim(); } else { toTest = toTestNode.Substring(toTestNode.StartsWith("/") ? 1 : 0, toTestNode.StartsWith("/") ? toTestNode.IndexOf(">") - 1 : toTestNode.IndexOf(">")).Trim(); } } if (firstChar.Equals('<') || ((firstChar.Equals('/') && tagStack.Peek() != DATA_TAG) || (firstChar.Equals('/') && tagStack.Peek() == DATA_TAG && HasData && (!string.IsNullOrEmpty(toTest) && !tagsToProcess.Contains(toTest))) || (firstChar.Equals('/') && tagStack.Peek() == DATA_TAG && HasData && reservedHeaders.Contains(HTMLDecodedHeader.ToUpper().Trim())) || (firstChar.Equals('/') && tagStack.Peek() == DATA_TAG && !reservedHeaders.Contains(HTMLDecodedHeader.ToUpper().Trim())))) { string tagNode = string.IsNullOrEmpty(nextTag) ? StreamReaderExtensions.ReadUntil(sr, '>') : nextTag.StartsWith("<") ? nextTag.Substring(1) : nextTag; nextTag = string.Empty; bool isOpenNode = !(firstChar.Equals('/') || tagNode.StartsWith("/")); string tag = string.Empty; if (tagNode.Contains(" ") || tagNode.StartsWith("br")) { tag = tagNode.Substring(tagNode.StartsWith("/") ? 1 : 0, tagNode.StartsWith("/") ? tagNode.IndexOf(" ") - 1 : tagNode.IndexOf(" ")).Trim(); } else { tag = tagNode.Substring(tagNode.StartsWith("/") ? 1 : 0, tagNode.StartsWith("/") ? tagNode.IndexOf(">") - 1 : tagNode.IndexOf(">")).Trim(); } if (tagStack.Any() && tagStack.Peek().Trim().ToLower() == DATA_TAG && tag.Trim().ToLower() == TABLE_TAG) { if (isOpenNode) { ParseDataItem newChild = new ParseDataItem(sr, ParentDocument, this); if (newChild.HasData) { AddChild(newChild); associatedChildCount++; } } } else { if (tagsToProcess.Contains(tag)) { if (isOpenNode) { tagStack.Push(tag); } else { if (tagStack.Peek().Equals(tag)) { tagStack.Pop(); } if (tag.Equals(DATA_TAG)) { if (HTMLDecodedValues != null && HTMLDecodedValues.Any()) { ValueCounts.Last().AssociatedChildCount = associatedChildCount; } associatedChildCount = 0; } } } if (!tagStack.Any()) { break; } } } else { if (tagStack != null && tagStack.Count > 0) { bool done = false; int dataLoopCount = 0; do { string control = Int32.TryParse(firstChar.ToString(), out int temp) || firstChar.ToString().ToUpper().Equals("X") || firstChar.ToString().ToUpper().Equals("Y") ? string.Empty : firstChar + "<"; string text = dataLoopCount == 0 ? firstChar + StreamReaderExtensions.ReadUntil(sr, '<') : StreamReaderExtensions.ReadUntil(sr, '<'); if (!text.Equals(control) && !text.StartsWith("br /") && !text.Equals("<")) { text = new string(text.Substring(0, text.Length - 1).Where(c => !char.IsControl(c)).ToArray()); if (!string.IsNullOrEmpty(text)) { if (tagStack.Peek().Trim().ToLower() == HEADER_TAG) { HTMLDecodedHeader = text; } else if (tagStack.Peek().Trim().ToLower() == DATA_TAG) { if (HTMLDecodedValues != null && HTMLDecodedValues.Any()) { ValueCounts.Last().AssociatedChildCount = associatedChildCount; } associatedChildCount = 0; AddValue(text); } } nextTag = StreamReaderExtensions.ReadUntil(sr, '>'); if (!nextTag.StartsWith("br /") && !nextTag.StartsWith("br/")) { done = true; } dataLoopCount++; } else { nextTag = StreamReaderExtensions.ReadUntil(sr, '>'); string tempTag = nextTag; tempTag = tempTag.Replace(">", ""); if (tagsToProcess.Contains(tempTag)) { nextTag = "<" + nextTag; } done = true; } } while (!done); } } } } while (tagStack.Any() && sr.Peek() > 0); }