C# (CSharp) Majestic12 HTMLparser.LoadFromFile примеры использования

Язык программирования: C# (CSharp)

Пространство имен/Пакет: Majestic12

Класс/Тип: HTMLparser

Метод/Функция: LoadFromFile

Примеров на hotexamples.com: 3

C# (CSharp) Majestic12 HTMLparser.LoadFromFile - 3 примера найдено. Это лучшие примеры C# (CSharp) кода для Majestic12.HTMLparser.LoadFromFile, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ParseNext(5)

SetChunkHashMode(3)

Close(2)

Init(2)

InitMiniEntities(2)

LoadFromFile(2)

Reset(2)

SetEncoding(2)

CleanUp(1)

HandleMetaEncoding(1)

ParseNextTag(1)

SetRawHTML(1)

LoadFromFile() публичный Метод

Loads HTML from file

public LoadFromFile ( string sFileName ) : void
sFileName	string	Full filename
Результат	void

Документация по классу HTMLparser

Пример #1

Показать файл

        /// <summary>
        /// Starts parsing
        /// </summary>
        /// <param name="iParseTimes">Number of times to parse document (useful for benchmarking)</param>
        void Start(int iParseTimes)
        {
            string sFileName = Path.Combine(Directory.GetCurrentDirectory(), "tests" + Path.DirectorySeparatorChar + "majestic12.html");

            if (!File.Exists(sFileName))
            {
                Console.WriteLine("Could not find file in current directory to parse - expected it to be here: " + sFileName);
                return;
            }

            HTMLparser oP = new HTMLparser();

            // This is optional, but if you want high performance then you may
            // want to set chunk hash mode to FALSE. This would result in tag params
            // being added to string arrays in HTMLchunk object called sParams and sValues, with number
            // of actual params being in iParams. See code below for details.
            //
            // When TRUE (and its default) tag params will be added to hashtable HTMLchunk (object).oParams
            oP.SetChunkHashMode(false);

            // if you set this to true then original parsed HTML for given chunk will be kept -
            // this will reduce performance somewhat, but may be desireable in some cases where
            // reconstruction of HTML may be necessary
            oP.bKeepRawHTML = false;

            // load HTML from file
            oP.LoadFromFile(sFileName);

            DateTime oStart = DateTime.Now;

            for (int i = 0; i < iParseTimes; i++)
            {
                if (iParseTimes > 1)
                {
                    BenchMarkParse(oP);
                }
                else
                {
                    ParseAndPrint(oP);
                }


                oP.Reset();
            }

            // calculate number of milliseconds we were parsing
            int iMSecs = (int)((DateTime.Now.Ticks - oStart.Ticks) / TimeSpan.TicksPerMillisecond);

            if (iMSecs > 0 && iParseTimes > 0)
            {
                Console.WriteLine("Parsed {0} time(s), total time {1} secs, approximately {2} ms per full parse.", iParseTimes, iMSecs / 1000, iMSecs / iParseTimes);
            }

            oP.Close();
        }

Пример #2

Показать файл

Файл: Main.cs Проект: stimpy77/Majestic12.HTMLparser

        /// <summary>
        /// Starts parsing
        /// </summary>
        /// <param name="iParseTimes">Number of times to parse document (useful for benchmarking)</param>
        void Start(int iParseTimes, string sFileName)
        {
            if (!File.Exists(sFileName))
            {
                sFileName = Path.Combine(Directory.GetCurrentDirectory(), "tests" + Path.DirectorySeparatorChar + sFileName);

                if (!File.Exists(sFileName))
                {
                    Console.WriteLine("Could not find file in current directory to parse - expected it to be here: " + sFileName);
                    return;
                }
            }

            HTMLparser oP = new HTMLparser();

            // This is optional, but if you want high performance then you may
            // want to set chunk hash mode to FALSE. This would result in tag params
            // being added to string arrays in HTMLchunk object called sParams and sValues, with number
            // of actual params being in iParams. See code below for details.
            //
            // When TRUE (and its default) tag params will be added to hashtable HTMLchunk (object).oParams
            oP.SetChunkHashMode(false);

            // if you set this to true then original parsed HTML for given chunk will be kept -
            // this will reduce performance somewhat, but may be desireable in some cases where
            // reconstruction of HTML may be necessary
            oP.bKeepRawHTML = false;

            // if set to true (it is false by default), then entities will be decoded: this is essential
            // if you want to get strings that contain final representation of the data in HTML, however
            // you should be aware that if you want to use such strings into output HTML string then you will
            // need to do Entity encoding or same string may fail later
            oP.bDecodeEntities = true;

            // we have option to keep most entities as is - only replace stuff like &nbsp;
            // this is called Mini Entities mode - it is handy when HTML will need
            // to be re-created after it was parsed, though in this case really
            // entities should not be parsed at all
            oP.bDecodeMiniEntities = true;

            if (!oP.bDecodeEntities && oP.bDecodeMiniEntities)
            {
                oP.InitMiniEntities();
            }

            // if set to true, then in case of Comments and SCRIPT tags the data set to oHTML will be
            // extracted BETWEEN those tags, rather than include complete RAW HTML that includes tags too
            // this only works if auto extraction is enabled
            oP.bAutoExtractBetweenTagsOnly = true;

            // if true then comments will be extracted automatically
            oP.bAutoKeepComments = true;

            // if true then scripts will be extracted automatically:
            oP.bAutoKeepScripts = true;

            // if this option is true then whitespace before start of tag will be compressed to single
            // space character in string: " ", if false then full whitespace before tag will be returned (slower)
            // you may only want to set it to false if you want exact whitespace between tags, otherwise it is just
            // a waste of CPU cycles
            oP.bCompressWhiteSpaceBeforeTag = true;

            // if true (default) then tags with attributes marked as CLOSED (/ at the end) will be automatically
            // forced to be considered as open tags - this is no good for XML parsing, but I keep it for backwards
            // compatibility for my stuff as it makes it easier to avoid checking for same tag which is both closed
            // or open
            oP.bAutoMarkClosedTagsWithParamsAsOpen = false;

            // load HTML from file
            oP.LoadFromFile(sFileName);

            // alternatively you can set HTML to be parsed as follows (bHTML is byte[] array containing data):
            // oP.Init(bHTML);

            DateTime oStart = DateTime.Now;

            for (int i = 0; i < iParseTimes; i++)
            {
                if (iParseTimes > 1)
                {
                    BenchMarkParse(oP);
                }
                else
                {
                    ParseAndPrint(oP);
                }

                oP.Reset();
            }

            // calculate number of milliseconds we were parsing
            int iMSecs = (int)((DateTime.Now.Ticks - oStart.Ticks) / TimeSpan.TicksPerMillisecond);

            if (iMSecs > 0 && iParseTimes > 0)
            {
                Console.Error.WriteLine("Parsed {0} time(s), total time {1:0.00} secs, ~{2:0.00} ms per full parse.", iParseTimes, iMSecs * 1.0 / 1000, iMSecs * 1.0 / iParseTimes);
            }

            oP.Close();
        }

Пример #3

Показать файл

Файл: Main.cs Проект: ariesy/HTMLParser

        /// <summary>
        /// Starts parsing
        /// </summary>
        /// <param name="iParseTimes">Number of times to parse document (useful for benchmarking)</param>
        void Start(int iParseTimes,string sFileName)
        {
            if(!File.Exists(sFileName))
            {
                sFileName=Path.Combine(Directory.GetCurrentDirectory(),"tests"+Path.DirectorySeparatorChar+sFileName);

                if(!File.Exists(sFileName))
                {
                    Console.WriteLine("Could not find file in current directory to parse - expected it to be here: "+sFileName);
                    return;
                }
            }

            HTMLparser oP=new HTMLparser();

            // This is optional, but if you want high performance then you may
            // want to set chunk hash mode to FALSE. This would result in tag params
            // being added to string arrays in HTMLchunk object called sParams and sValues, with number
            // of actual params being in iParams. See code below for details.
            //
            // When TRUE (and its default) tag params will be added to hashtable HTMLchunk (object).oParams
            oP.SetChunkHashMode(false);

            // if you set this to true then original parsed HTML for given chunk will be kept -
            // this will reduce performance somewhat, but may be desireable in some cases where
            // reconstruction of HTML may be necessary
            oP.bKeepRawHTML=false;

            // if set to true (it is false by default), then entities will be decoded: this is essential
            // if you want to get strings that contain final representation of the data in HTML, however
            // you should be aware that if you want to use such strings into output HTML string then you will
            // need to do Entity encoding or same string may fail later
            oP.bDecodeEntities=true;

            // we have option to keep most entities as is - only replace stuff like &nbsp;
            // this is called Mini Entities mode - it is handy when HTML will need
            // to be re-created after it was parsed, though in this case really
            // entities should not be parsed at all
            oP.bDecodeMiniEntities=true;

            if(!oP.bDecodeEntities && oP.bDecodeMiniEntities)
               oP.InitMiniEntities();

            // if set to true, then in case of Comments and SCRIPT tags the data set to oHTML will be
            // extracted BETWEEN those tags, rather than include complete RAW HTML that includes tags too
            // this only works if auto extraction is enabled
            oP.bAutoExtractBetweenTagsOnly=true;

            // if true then comments will be extracted automatically
            oP.bAutoKeepComments=true;

            // if true then scripts will be extracted automatically:
            oP.bAutoKeepScripts=true;

            // if this option is true then whitespace before start of tag will be compressed to single
            // space character in string: " ", if false then full whitespace before tag will be returned (slower)
            // you may only want to set it to false if you want exact whitespace between tags, otherwise it is just
            // a waste of CPU cycles
            oP.bCompressWhiteSpaceBeforeTag=true;

            // if true (default) then tags with attributes marked as CLOSED (/ at the end) will be automatically
            // forced to be considered as open tags - this is no good for XML parsing, but I keep it for backwards
            // compatibility for my stuff as it makes it easier to avoid checking for same tag which is both closed
            // or open
            oP.bAutoMarkClosedTagsWithParamsAsOpen=false;

            // load HTML from file
            oP.LoadFromFile(sFileName);

            // alternatively you can set HTML to be parsed as follows (bHTML is byte[] array containing data):
            // oP.Init(bHTML);

            DateTime oStart=DateTime.Now;

            for(int i=0; i<iParseTimes; i++)
            {
                if(iParseTimes>1)
                    BenchMarkParse(oP);
                else
                    ParseAndPrint(oP);

                oP.Reset();
            }

            // calculate number of milliseconds we were parsing
            int iMSecs=(int)((DateTime.Now.Ticks-oStart.Ticks)/TimeSpan.TicksPerMillisecond);

            if(iMSecs>0 && iParseTimes>0)
            {
                Console.Error.WriteLine("Parsed {0} time(s), total time {1:0.00} secs, ~{2:0.00} ms per full parse.",iParseTimes,iMSecs*1.0/1000,iMSecs*1.0/iParseTimes);
            }

            oP.Close();
        }