Пример #1
0
        public void Load(string filePath)
        {
            FileName = Path.GetFileName(filePath);
            FilePath = Path.GetFullPath(filePath);

            using (FileStream fs = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
            {
                using (BufferedStream bs = new BufferedStream(fs))
                {
                    using (StreamReader sr = new StreamReader(bs))
                    {
                        bool insideBody = false;
                        while (sr.Peek() >= 0)
                        {
                            string tag = string.Empty;
                            if (!insideBody)
                            {
                                do
                                {
                                    tag = StreamReaderExtensions.ReadUntil(sr, '>');
                                    if (tag.ToLower().Contains("<body"))
                                    {
                                        insideBody = true;
                                    }
                                } while (!insideBody);
                            }

                            ParseDataItem dataItem = new ParseDataItem(sr, this, null);
                            if (dataItem.HasData)
                            {
                                AddItem(dataItem);
                            }
                        }
                        sr.Dispose();
                    }
                    bs.Dispose();
                }
                fs.Dispose();
            }
        }
Пример #2
0
        public ParseDataItem(StreamReader sr, ParseDocument parentDocument, ParseDataItem parentDataItem)
        {
            ParentDocument = parentDocument;
            ParentDataItem = parentDataItem;

            string         nextTag = string.Empty;
            int            associatedChildCount = 0;
            Stack <string> tagStack             = new Stack <string>();

            do
            {
                while (sr.Peek() > 0)
                {
                    char firstChar;
                    if (string.IsNullOrEmpty(nextTag))
                    {
                        firstChar = (char)sr.Read();
                        while (sr.Peek() > 0 && char.IsControl(firstChar))
                        {
                            firstChar = (char)sr.Read();
                        }
                    }
                    else
                    {
                        firstChar = nextTag[0];
                    }

                    string toTest     = string.Empty;
                    string toTestNode = firstChar.Equals('/') ? nextTag : string.Empty;
                    if (!string.IsNullOrEmpty(toTestNode))
                    {
                        if (toTestNode.Contains(" ") || toTestNode.StartsWith("br"))
                        {
                            toTest = toTestNode.Substring(toTestNode.StartsWith("/") ? 1 : 0, toTestNode.StartsWith("/") ? toTestNode.IndexOf(" ") - 1 : toTestNode.IndexOf(" ")).Trim();
                        }
                        else
                        {
                            toTest = toTestNode.Substring(toTestNode.StartsWith("/") ? 1 : 0, toTestNode.StartsWith("/") ? toTestNode.IndexOf(">") - 1 : toTestNode.IndexOf(">")).Trim();
                        }
                    }
                    if (firstChar.Equals('<') ||
                        ((firstChar.Equals('/') && tagStack.Peek() != DATA_TAG) ||
                         (firstChar.Equals('/') && tagStack.Peek() == DATA_TAG && HasData && (!string.IsNullOrEmpty(toTest) && !tagsToProcess.Contains(toTest))) ||
                         (firstChar.Equals('/') && tagStack.Peek() == DATA_TAG && HasData && reservedHeaders.Contains(HTMLDecodedHeader.ToUpper().Trim())) ||
                         (firstChar.Equals('/') && tagStack.Peek() == DATA_TAG && !reservedHeaders.Contains(HTMLDecodedHeader.ToUpper().Trim()))))
                    {
                        string tagNode = string.IsNullOrEmpty(nextTag) ? StreamReaderExtensions.ReadUntil(sr, '>') : nextTag.StartsWith("<") ? nextTag.Substring(1) : nextTag;
                        nextTag = string.Empty;

                        bool   isOpenNode = !(firstChar.Equals('/') || tagNode.StartsWith("/"));
                        string tag        = string.Empty;
                        if (tagNode.Contains(" ") || tagNode.StartsWith("br"))
                        {
                            tag = tagNode.Substring(tagNode.StartsWith("/") ? 1 : 0, tagNode.StartsWith("/") ? tagNode.IndexOf(" ") - 1 : tagNode.IndexOf(" ")).Trim();
                        }
                        else
                        {
                            tag = tagNode.Substring(tagNode.StartsWith("/") ? 1 : 0, tagNode.StartsWith("/") ? tagNode.IndexOf(">") - 1 : tagNode.IndexOf(">")).Trim();
                        }

                        if (tagStack.Any() && tagStack.Peek().Trim().ToLower() == DATA_TAG && tag.Trim().ToLower() == TABLE_TAG)
                        {
                            if (isOpenNode)
                            {
                                ParseDataItem newChild = new ParseDataItem(sr, ParentDocument, this);
                                if (newChild.HasData)
                                {
                                    AddChild(newChild);
                                    associatedChildCount++;
                                }
                            }
                        }
                        else
                        {
                            if (tagsToProcess.Contains(tag))
                            {
                                if (isOpenNode)
                                {
                                    tagStack.Push(tag);
                                }
                                else
                                {
                                    if (tagStack.Peek().Equals(tag))
                                    {
                                        tagStack.Pop();
                                    }
                                    if (tag.Equals(DATA_TAG))
                                    {
                                        if (HTMLDecodedValues != null && HTMLDecodedValues.Any())
                                        {
                                            ValueCounts.Last().AssociatedChildCount = associatedChildCount;
                                        }
                                        associatedChildCount = 0;
                                    }
                                }
                            }
                            if (!tagStack.Any())
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        if (tagStack != null && tagStack.Count > 0)
                        {
                            bool done          = false;
                            int  dataLoopCount = 0;
                            do
                            {
                                string control = Int32.TryParse(firstChar.ToString(), out int temp) || firstChar.ToString().ToUpper().Equals("X") ||
                                                 firstChar.ToString().ToUpper().Equals("Y") ? string.Empty : firstChar + "<";
                                string text = dataLoopCount == 0 ? firstChar + StreamReaderExtensions.ReadUntil(sr, '<') : StreamReaderExtensions.ReadUntil(sr, '<');
                                if (!text.Equals(control) && !text.StartsWith("br /") && !text.Equals("<"))
                                {
                                    text = new string(text.Substring(0, text.Length - 1).Where(c => !char.IsControl(c)).ToArray());
                                    if (!string.IsNullOrEmpty(text))
                                    {
                                        if (tagStack.Peek().Trim().ToLower() == HEADER_TAG)
                                        {
                                            HTMLDecodedHeader = text;
                                        }
                                        else if (tagStack.Peek().Trim().ToLower() == DATA_TAG)
                                        {
                                            if (HTMLDecodedValues != null && HTMLDecodedValues.Any())
                                            {
                                                ValueCounts.Last().AssociatedChildCount = associatedChildCount;
                                            }
                                            associatedChildCount = 0;
                                            AddValue(text);
                                        }
                                    }
                                    nextTag = StreamReaderExtensions.ReadUntil(sr, '>');
                                    if (!nextTag.StartsWith("br /") && !nextTag.StartsWith("br/"))
                                    {
                                        done = true;
                                    }
                                    dataLoopCount++;
                                }
                                else
                                {
                                    nextTag = StreamReaderExtensions.ReadUntil(sr, '>');

                                    string tempTag = nextTag;
                                    tempTag = tempTag.Replace(">", "");
                                    if (tagsToProcess.Contains(tempTag))
                                    {
                                        nextTag = "<" + nextTag;
                                    }
                                    done = true;
                                }
                            } while (!done);
                        }
                    }
                }
            } while (tagStack.Any() && sr.Peek() > 0);
        }