public TankstellenUsecaseController(Downloader downloader, HTMLParser htmlParser, string ort, EntryAuditor entryAuditor)
 {
     this.downloader = downloader;
     this.htmlParser = htmlParser;
     downloadUrl = Url + ort;
     this.entryAuditor = entryAuditor;
 }
Beispiel #2
0
 public static void Main(string[] args) {
   try {
     HTMLLexer lexer = new HTMLLexer(new ByteBuffer(Console.OpenStandardInput()));
     TokenBuffer buffer = new TokenBuffer(lexer);
     HTMLParser parser = new HTMLParser(buffer);
     parser.document();
   } catch(Exception e) {
     Console.Error.WriteLine("exception: "+e);
     Environment.Exit(1);
   }
 }
Beispiel #3
0
            private void OnLinkTag(HTMLParser instance, string tag)
            {
                _inLink = true;
                HashMap attrMap = instance.ParseAttributes(tag);

                if ((_url = (string)attrMap["href"]) == null)
                {
                    _url = string.Empty;
                }
                if ((_id = (string)attrMap["id"]) == null)
                {
                    _id = string.Empty;
                }
                _isFeed = attrMap["feedurl"] != null;
            }
        public void TestHTMLParserMultiEmphasis()
        {
            var res = HTMLParser.Parse("<b><i>Hello</i></b> World").ToList();

            Assert.AreEqual(res.Count(), 2);
            Assert.IsTrue(CheckRun((Run)res[0], "Hello", FontWeights.Bold, FontStyle.Italic, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[1], " World", FontWeights.Normal, FontStyle.Normal, TextDecorations.None));

            res = HTMLParser.Parse("<b><i>Hell</i>o</b> <i>World</i>").ToList();
            Assert.AreEqual(res.Count(), 4);
            Assert.IsTrue(CheckRun((Run)res[0], "Hell", FontWeights.Bold, FontStyle.Italic, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[1], "o", FontWeights.Bold, FontStyle.Normal, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[2], " ", FontWeights.Normal, FontStyle.Normal, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[3], "World", FontWeights.Normal, FontStyle.Italic, TextDecorations.None));
        }
Beispiel #5
0
        protected override bool FillListResponse()
        {
            /*
             * string myWorkAnswer = MyRequest.GetResponse;
             * if (myWorkAnswer == null)
             *  return;
             */

            if (!base.FillListResponse())
            {
                return(false);
            }

            List <Tag> SearchResult = new List <Tag>();
            List <GPB> workList     = new List <GPB>();

            List <Tag> HTMLDoc = HTMLParser.Parse(lastAnswer);

            foreach (Tag item in HTMLDoc)
            {
                if (!item.IsProto)
                {
                    //SearchResult.AddRange(item.LookForChildTag("table", true));
                    SearchResult.AddRange(item.LookForChildTag("div", true, new KeyValuePair <string, string>("class", "procedure__data")));
                }
            }

            if (SearchResult.Count < 1)
            {
                if (lastAnswer.Contains("emptyResultsBlock"))
                {
                    lastError         = new Exception("Поиск не дал результатов");
                    this.ListResponse = workList;
                    return(false);
                }
                lastError         = new Exception("Ответ сервера не содержит данных (ожидались результаты с тегом \"div\" и классом \"procedure__data\"):" + Environment.NewLine + lastAnswer);
                this.ListResponse = workList;
                return(false);
            }

            foreach (Tag item in SearchResult)
            {
                workList.Add(new GPB(item, MyRequest));
            }

            this.ListResponse = workList;
            return(true);
        }
Beispiel #6
0
        protected virtual void ParseHtmlContents(string source, string html, IPDFContainerComponent container, int insertIndex)
        {
            HTMLParserSettings settings = GetParserSettings();

            if (this.Format == HtmlFormatType.Markdown)
            {
                Markdown md = new Markdown();
                html = md.Transform(html);
            }
            HTMLParser parser = new HTMLParser(html, settings);

            Stack <IPDFComponent> route = new Stack <IPDFComponent>();

            IPDFComponentList contents = container.Content;

            //int codeDepth = 0;
            foreach (Scryber.Html.Parsing.HTMLParserResult result in parser)
            {
                if (result.Valid && null != result.Parsed)
                {
                    IPDFComponent parsed = result.Parsed;

                    if (result.IsEnd)
                    {
                        route.Pop();
                    }
                    else
                    {
                        if (route.Count == 0)
                        {
                            _added.Add(parsed);
                            contents.Insert(insertIndex, parsed);
                            insertIndex++;
                            if (parsed is IPDFLoadableComponent)
                            {
                                ((IPDFLoadableComponent)parsed).LoadedSource = source;
                            }
                        }
                        else
                        {
                            IPDFContainerComponent parent = (IPDFContainerComponent)route.Peek();
                            ((IPDFComponentList)parent.Content).Add(parsed);
                        }
                        route.Push(result.Parsed);
                    }
                }
            }
        }
Beispiel #7
0
        public void CharsetNoWordBreak()
        {
            string HTML = "<HTML><meTa httP-eQuIv=\"Content-Type\" content=\"text/html; cHaRseT=WinDowS-1251\"><BODY>1st frag</BODY></HTML>";

            using (HTMLParser parser = new HTMLParser(
                       new StreamReader(new MemoryStream(Encoding.Default.GetBytes(HTML)))))
            {
                parser.BreakWords = false;
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("1st frag", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("", parser.ReadNextFragment());
                Assert.AreEqual(true, parser.Finished);
                Assert.AreEqual("windows-1251", parser.CharSet, "Invalid charset!");
            }
        }
Beispiel #8
0
        public void SimpleBodyNoWordBreak()
        {
            string noBodyHTML = "<HTML><HEAD><BODY>text in body</BODY>text to be ignored</HEAD></HTML>";

            using (HTMLParser parser = new HTMLParser(
                       new StreamReader(new MemoryStream(Encoding.Default.GetBytes(noBodyHTML)))))
            {
                parser.BreakWords = false;

                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("text in body", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("", parser.ReadNextFragment());
                Assert.AreEqual(true, parser.Finished);
            }
        }
        static void Main(string[] args)
        {
            Console.WriteLine("Enter url");
            string url = Console.ReadLine();

            url = "https://translate.google.com/";
            Console.WriteLine("Enter path");
            string path = Console.ReadLine();

            path = "c:\\Temp\\Links";
            string     depth  = Console.ReadLine();
            HTMLParser loader = new HTMLParser(int.Parse(depth));

            loader.ParseSite(url, path);
            Console.ReadKey();
        }
Beispiel #10
0
        public static HtmlParseResults InvokeParse(Stream stream)
        {
            var input  = new AntlrInputStream(stream);
            var lexer  = new HTMLLexer(input);
            var tokens = new CommonTokenStream(lexer);
            var parser = new HTMLParser(tokens);

            var tree = parser.htmlDocument();

            var walker = new ParseTreeWalker();
            var loader = new AspNetParseTree();

            walker.Walk(loader, tree);

            return(loader.Results);
        }
Beispiel #11
0
        public void QuotesInTagNoWordBreak()
        {
            string HTML = "<HTML><HEAD><BODY>1st frag<P a=\"aaaa\" b=\"bbbb\"> 2nd frag </BODY></HEAD></HTML>";

            using (HTMLParser parser = new HTMLParser(
                       new StreamReader(new MemoryStream(Encoding.Default.GetBytes(HTML)))))
            {
                parser.BreakWords = false;
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("1st frag", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual(" 2nd frag ", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("", parser.ReadNextFragment());
                Assert.AreEqual(true, parser.Finished);
            }
        }
Beispiel #12
0
        public void FinishingOnOverclosed()
        {
            string HTML = "<HTML><HEAD><Title>The title</</</</</</</a></a></html></head></title>";

            using (HTMLParser parser = new HTMLParser(
                       new StreamReader(new MemoryStream(Encoding.Default.GetBytes(HTML)))))
            {
                int a;
                for (a = 0; (a < 0x1000) && (!parser.Finished); a++)
                {
                    parser.ReadNextFragment();
                }
                if (!(a < 1000))
                {
                    Assert.Fail("The parser has failed to finish.");
                }
            }
        }
        public async Task <IActionResult> AddAvitoRentAdverts([FromBody] RentURLFilterModel model)
        {
            var adverts = await HTMLParser.ParseAdvertsFromURL(model.ToURLString());

            foreach (AdvertModel advert in adverts)
            {
                var existingadvert = _context.Adverts.SingleOrDefault(row => row.Url == advert.Url);
                if (existingadvert != null)
                {
                    continue;
                }

                _context.Adverts.Add(advert);
                await _context.SaveChangesAsync();
            }

            return(Ok(model.ToURLString()));
        }
Beispiel #14
0
        public void ScriptsNoWordBreak()
        {
            string HTML = "<HTML><HEAD><Title>The title</tITLe><script>i = 0</script></HEAD><BODY>1st frag<P a=\"aaaa\" b=\"bbbb\"><script>i = 0</script> 2nd frag </BODY></HTML>";

            using (HTMLParser parser = new HTMLParser(
                       new StreamReader(new MemoryStream(Encoding.Default.GetBytes(HTML)))))
            {
                parser.BreakWords = false;
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("The title", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("1st frag", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual(" 2nd frag ", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("", parser.ReadNextFragment());
                Assert.AreEqual(true, parser.Finished);
            }
        }
Beispiel #15
0
        public void CharEntityReferencesNoWordBreak()
        {
            string HTML = "<body><p>&#x69;&#X6E;&#x63;lude &lt;&#X6C;ist&gt;<p>inclu&#100;&#101; &quot;omniamea.h&quot;<p>#include &laquo;Kama&mdash;Sutra&raquo;</p></body>";

            using (HTMLParser parser = new HTMLParser(
                       new StreamReader(new MemoryStream(Encoding.Default.GetBytes(HTML)))))
            {
                parser.BreakWords = false;
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("include <list>", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("include \"omniamea.h\"", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("#include «Kama—Sutra»", parser.ReadNextFragment());
                Assert.AreEqual(false, parser.Finished);
                Assert.AreEqual("", parser.ReadNextFragment());
                Assert.AreEqual(true, parser.Finished);
            }
        }
Beispiel #16
0
        public void FinishingOnUnclosedNoWordBreak()
        {
            string HTML = "<HTML><HEAD><Title>The title";

            using (HTMLParser parser = new HTMLParser(
                       new StreamReader(new MemoryStream(Encoding.Default.GetBytes(HTML)))))
            {
                parser.BreakWords = false;
                int a;
                for (a = 0; (a < 0x1000) && (!parser.Finished); a++)
                {
                    parser.ReadNextFragment();
                }
                if (!(a < 1000))
                {
                    Assert.Fail("The parser has failed to finish.");
                }
            }
        }
        public void TestHTMLParserSimpleEmphasis()
        {
            var res = HTMLParser.Parse("<b>Hello</b>").ToList();

            Assert.AreEqual(res.Count(), 1);
            Assert.IsTrue(CheckRun(res.First(), "Hello", FontWeights.Bold, FontStyle.Normal, TextDecorations.None));

            res = HTMLParser.Parse("<i>  Hello</i>").ToList();
            Assert.AreEqual(res.Count(), 1);
            Assert.IsTrue(CheckRun(res.First(), "  Hello", FontWeights.Normal, FontStyle.Italic, TextDecorations.None));

            res = HTMLParser.Parse("<u>Hello  </u>").ToList();
            Assert.AreEqual(res.Count(), 1);
            Assert.IsTrue(CheckRun(res.First(), "Hello  ", FontWeights.Normal, FontStyle.Normal, TextDecorations.Underline));

            res = HTMLParser.Parse("<s>  Hello World </s>").ToList();
            Assert.AreEqual(res.Count(), 1);
            Assert.IsTrue(CheckRun(res.First(), "  Hello World ", FontWeights.Normal, FontStyle.Normal, TextDecorations.Strikethrough));
        }
Beispiel #18
0
        /*
         * protected override string CreateTableForMailing(bool html = true)
         * {
         *
         *  return base.CreateTableForMailing(html);
         * }
         */

        protected override bool FillListResponse()
        {
            /*
             * string myWorkAnswer = MyRequest.GetResponse;
             * if (myWorkAnswer == null)
             *  return;
             */
            if (!base.FillListResponse())
            {
                return(false);
            }

            List <Tag> SearchResult = new List <Tag>();

            List <Tag> HTMLDoc = HTMLParser.Parse(lastAnswer);

            foreach (Tag item in HTMLDoc)
            {
                if (!item.IsProto)
                {
                    SearchResult.AddRange(item.LookForChildTag("span", true, new KeyValuePair <string, string>("class", "teaser teaser-product")));
                }
            }
            //

            List <LotOnlineSales> workList = new List <LotOnlineSales>();

            foreach (Tag item in SearchResult)
            {
                workList.Add(new LotOnlineSales(item, MyRequest));
            }

            this.ListResponse = workList;

            tableHead = new string[]
            {
                "№",
                "Наименование",
                "Цена"
            };

            return(true);
        }
Beispiel #19
0
        public RJOutline(String docs)
        {
            HTMLParser p = HTMLParser.GetByHTML(docs);

            NodeList nodes = p.GetFirstNode("id", "work_outline").Children;

            nodes.KeepAllNodesThatMatch(new TagNameFilter("tr"));

            for (int i = 0; i < nodes.Count; i++)
            {
                INode node = nodes.ElementAt(i);

                if (node != null)
                {
                    node.Children.RemoveMeaninglessNodes();
                    this.data.Add(node.FirstChild.ToPlainTextStringEx().Trim(), node.LastChild.ToDividedTextString(" ").TrimAll());
                }
            }
        }
        public void TestHTMLParserMultiNode()
        {
            var res = HTMLParser.Parse("<b>Hello</b> World").ToList();

            Assert.AreEqual(res.Count(), 2);
            Assert.IsTrue(CheckRun((Run)res[0], "Hello", FontWeights.Bold, FontStyle.Normal, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[1], " World", FontWeights.Normal, FontStyle.Normal, TextDecorations.None));

            res = HTMLParser.Parse("<b>Hello</b> World <i>!!!</i>").ToList();
            Assert.AreEqual(res.Count(), 3);
            Assert.IsTrue(CheckRun((Run)res[0], "Hello", FontWeights.Bold, FontStyle.Normal, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[1], " World ", FontWeights.Normal, FontStyle.Normal, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[2], "!!!", FontWeights.Normal, FontStyle.Italic, TextDecorations.None));

            res = HTMLParser.Parse("Welcome <b>Hello</b> World").ToList();
            Assert.AreEqual(res.Count(), 3);
            Assert.IsTrue(CheckRun((Run)res[0], "Welcome ", FontWeights.Normal, FontStyle.Normal, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[1], "Hello", FontWeights.Bold, FontStyle.Normal, TextDecorations.None));
            Assert.IsTrue(CheckRun((Run)res[2], " World", FontWeights.Normal, FontStyle.Normal, TextDecorations.None));
        }
        public void BuildDocument()
        {
            if (this.SelectedElementAttributesString != string.Empty)
            {
                List <string> properties = HTMLParser.GetElementAttributes(this.SelectedElementAttributesString);
                this.SelectedElement.Attributes = new List <string>(properties);
            }
            else
            {
                this.SelectedElement.Attributes = new List <string>();
            }

            string html = string.Empty;

            foreach (ElementModel element in this.Elements)
            {
                html = $"{html}{element}";
            }
            this.Document = $"<!DOCTYPE html>{html}";
        }
Beispiel #22
0
        public async void PlayAsync(string web_url, string pid)
        {
            btnPlay.Content             = "Pause";
            btnPlay.Visibility          = Visibility.Hidden;
            PlayProgressRing.Visibility = Visibility.Visible;
            PlayerSlider.Visibility     = Visibility.Visible;

            SaavnPageRequest pageRequest = new SaavnPageRequest();

            System.Diagnostics.Debug.Write("Fetching HTML : " + web_url);
            string html = await pageRequest.MakeRequest(web_url);

            string enc_media_url = HTMLParser.GetEncryptedURL(html, pid);
            string mediaUrl      = Decrypto.GetDESDecryptedUrl(enc_media_url);

            if (mediaUrl == null)
            {
                btnPlay.Visibility          = Visibility.Visible;
                PlayProgressRing.Visibility = Visibility.Hidden;
                btnPlay.Content             = "Unavailable";
                btnPlay.IsEnabled           = false;
                return;
            }

            if (songPlayer == null)
            {
                songPlayer             = new MediaPlayer();
                songPlayer.MediaEnded += songPlayer_MediaEnded;
            }

            songPlayer.MediaOpened += songPlayer_MediaOpened;

            Uri uri = new Uri(mediaUrl);

            songPlayer.Open(uri);
            songPlayer.Play();
            songPlayer.Volume = 1;

            btnPlay.Visibility          = Visibility.Visible;
            PlayProgressRing.Visibility = Visibility.Hidden;
        }
Beispiel #23
0
        public static void test()
        {
            SterlingEngine engine = new SterlingEngine();

            engine.Activate();
            var dbInstance = engine.SterlingDatabase.RegisterDatabase <WeaponDBInstance>(
                new FileSystemDriver("weaponDB/"));

            dbInstance.Purge();
            HTMLParser p = new HTMLParser(@"/wiki/List_of_battle_rifles", @"https://en.wikipedia.org");
            var        x = p.ProcessArticle().ToList();


            x.ForEach(w =>
            {
                dbInstance.Save <Weapon>(w);
            });

            var data = dbInstance.Query <Weapon, string>();
            //Weapon weapon = dbInstance.Load<Weapon>("AK-72");
        }
        public void Open()
        {
            OpenFileDialog openFileDialog = new OpenFileDialog
            {
                Filter = "HTML Files|*.html"
            };

            if (openFileDialog.ShowDialog() == true)
            {
                string document;
                using (StreamReader sr = new StreamReader(openFileDialog.FileName))
                {
                    document = sr.ReadToEnd();
                }
                this.Elements        = new ObservableCollection <ElementModel>(HTMLParser.Parse(document));
                this.SelectedElement = this.Elements[0];
                this.BuildDocument();
                this.FilePath = openFileDialog.FileName;
                this.OpenInBrowserAvailable = true;
            }
        }
        public void VisitorTest()
        {
            try
            {
                string        input = "<html><title>blahg blah blah</title><p>para graph</p><p>second para</p></html>";
                StringBuilder text  = new StringBuilder(input);

                Console.WriteLine(input);

                var inputStream       = new AntlrInputStream(text.ToString());
                var htmlLexer         = new HTMLLexer(inputStream);
                var commonTokenStream = new CommonTokenStream(htmlLexer);
                var htmlParser        = new HTMLParser(commonTokenStream);
                var htmlContext       = htmlParser.htmlDocument();
                var visitor           = new HTMLVisitor();
                visitor.Visit(htmlContext);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Error: " + ex);
            }
        }
Beispiel #26
0
        /// <summary>
        /// Invokes parser repeatedly to read all the fragments.
        /// Writes the fragments to a string, separates them with spaces (trailing space is added too!).
        /// </summary>
        /// <param name="parser"></param>
        /// <returns></returns>
        private string ReadAllFragments(HTMLParser parser)
        {
            StringBuilder sb = new StringBuilder();

            while (!parser.Finished)
            {
                sb.Append(parser.ReadNextFragment());
            }
            try
            {
                if (parser.ReadNextFragment().Length != 0)
                {
                    throw new InvalidOperationException("Parser must return an empty fragment having read the whole text (if there's a tag after the last returned meaningful string).");
                }
                throw new InvalidOperationException("Parser must throw an exception if reading beyond end of stream.");
            }
            catch (EndOfStreamException)              // It's expected
            {
            }

            return(sb.ToString());
        }
Beispiel #27
0
        protected override bool FillListResponse()
        {
            /*
             * string myWorkAnswer = MyRequest.GetResponse;
             * if (myWorkAnswer == null)
             *  return;
             */

            if (!base.FillListResponse())
            {
                return(false);
            }

            //
            List <Tag> SearchResult = new List <Tag>();

            List <Tag> HTMLDoc = HTMLParser.Parse(lastAnswer);

            foreach (Tag item in HTMLDoc)
            {
                if (!item.IsProto)
                {
                    //SearchResult.AddRange(item.LookForChildTag("ul", true, new KeyValuePair<string, string>("class", "component-list lot-catalog__list")));
                    SearchResult.AddRange(item.LookForChildTag("li", true, new KeyValuePair <string, string>("class", "component-list__item lot-catalog__list-item")));
                }
            }
            //

            List <TorgASV> workList = new List <TorgASV>();

            foreach (Tag item in SearchResult)
            {
                workList.Add(new TorgASV(item, MyRequest));
            }

            this.ListResponse = workList;

            return(true);
        }
Beispiel #28
0
 public void Reset()
 {
     FileInfo[] bookmarkFiles = IOTools.GetFiles(_path, "bookmarks.html");
     if (bookmarkFiles == null || bookmarkFiles.Length == 0)
     {
         _parser = null;
     }
     else
     {
         _parser            = new HTMLParser(new StreamReader(bookmarkFiles[0].FullName), true);
         _parser.BreakWords = false;
         _parser.AddTagHandler("dl", new HTMLParser.TagHandler(OnDLTag));
         _parser.AddTagHandler("/dl", new HTMLParser.TagHandler(OnDLClosedTag));
         _parser.AddTagHandler("h3", new HTMLParser.TagHandler(OnHeaderTag));
         _parser.AddTagHandler("/h3", new HTMLParser.TagHandler(OnHeaderClosedTag));
         _parser.AddTagHandler("a", new HTMLParser.TagHandler(OnLinkTag));
         _parser.AddTagHandler("/a", new HTMLParser.TagHandler(OnLinkClosedTag));
         _parser.AddTagHandler("dd", new HTMLParser.TagHandler(OnDescriptionTag));
         _level    = 0;
         _inHeader = _inLink = _inDescription = false;
     }
 }
Beispiel #29
0
        private void reconSchedulerMethod(object sender, EventArgs e)
        {
            if (ICEDownloaderSwitch.Value == 1)
            {
                var a = new HTMLParser(@"C:\Users\Akhand\Documents\Visual Studio 2015\Projects\ExchangeRecon\ExchangeRecon\AppFiles\Download Data\ICE_CBNA.xls").Process().ToList();
                var b = new HTMLParser(@"C:\Users\Akhand\Documents\Visual Studio 2015\Projects\ExchangeRecon\ExchangeRecon\AppFiles\Download Data\ICE_CGML.xls").Process().ToList();
                foreach (DataTable t in b)
                {
                    a.Add(t);
                }
                Console.WriteLine("Num of tables extracted : " + a.Count);

                var data = new DataTable("ICE Data");
                MergeTables(a, data);
                AddPKtoICE(data);
                ICEqueues["ICE Raw Collated"].QData = data;
                ICEqueues["ICE Raw Collated"].ToCSV();
            }
            Console.WriteLine("Chal gaya hu");
            ICEReconTimer.Stop();
            if (ReconSwitch.Value == 1)
            {
                ICEReconStatus.Content = "Running";
                // Add the Recon Method
                ReconQueue comparisons = ICEqueues["Comparisons"];
                foreach (DataRow comp in comparisons.QData.Rows)
                {
                    queryTrials(comp, ICEqueues);
                }
                Console.WriteLine("Analysis done");
                foreach (KeyValuePair <string, ReconQueue> q in ICEqueues)
                {
                    q.Value.ToCSV();
                }
                DateTime now = TimeZoneInfo.ConvertTimeFromUtc(System.DateTime.UtcNow, TimeZoneInfo.Local);
                ICEReconStatus.Content = "Last executed at " + now.ToString("dd-mmm-yy hh:mm:ss");
            }
        }
Beispiel #30
0
        private static async Task <IEnumerable <Product> > ParseAsync(ParserSource source)
        {
            // Download web request
            string url          = source.Url;
            string responceBody = await(new HttpDownloader(url, null, null).GetPageAsync());

            // Create an appropriate parser
            IClassParser <ParserInput, Product> parser;

            if (source.ParserId == 0)
            {
                parser = new HTMLParser <ParserInput, Product>(responceBody);
            }
            else
            {
                parser = new JsonParser <ParserInput, Product>(responceBody);
            }

            // Create an input
            ParserInput input = new ParserInput(source, source.Market);

            return(parser.Parse(input));
        }
Beispiel #31
0
        public void Parse(Stream stream, Action <string> callback)
        {
            var inputStream = new AntlrInputStream(stream);
            var lexer       = new HTMLLexer(inputStream);

            lexer.RemoveErrorListeners();
            var tokenStream = new CommonTokenStream(lexer);
            var parser      = new HTMLParser(tokenStream);

            parser.RemoveErrorListeners();
            var context  = parser.htmlDocument();
            var listener = new HTMLKeywordParserListener()
            {
                Emit            = (x) => callback(x),
                MinimumLength   = MinimumLength,
                MaximumLength   = MaximumLength,
                IncludeChardata = IncludeChardata,
                IncludeComments = IncludeComments
            };
            var walker = new ParseTreeWalker();

            walker.Walk(listener, context);
        }
Beispiel #32
0
        protected override bool FillListResponse()
        {
            /*
             * string myWorkAnswer = MyRequest.GetResponse;
             * if (myWorkAnswer == null)
             *  return;
             */
            if (!base.FillListResponse())
            {
                return(false);
            }

            //
            List <Tag> SearchResult = new List <Tag>();

            List <Tag> HTMLDoc = HTMLParser.Parse(lastAnswer);

            foreach (Tag item in HTMLDoc)
            {
                if (!item.IsProto)
                {
                    SearchResult.AddRange(item.LookForChildTag("div", true, new KeyValuePair <string, string>("class", "row no-gutters registry-entry__form mr-0")));
                }
            }
            //

            List <ZakupkiGov> workList = new List <ZakupkiGov>();

            foreach (Tag item in SearchResult)
            {
                workList.Add(new ZakupkiGov(item, MyRequest));
            }

            this.ListResponse = workList;

            return(true);
        }
Beispiel #33
0
        public override void ExitAssignedAttr(HTMLParser.AssignedAttrContext ctx)
        {
            var attrNameCtx = ctx.htmlAttributeName();
            if (attrNameCtx == null)
            {
                return;
            }
            var attrValueCtx = ctx.htmlAttributeValue();
            if (attrValueCtx == null)
            {
                return;
            }

            var attrNameText = MyTreeProperty.Get(attrNameCtx);
            if (string.IsNullOrEmpty(attrNameText))
            {
                return;
            }
            var attrValueText = MyTreeProperty.Get(attrValueCtx);
            if (string.IsNullOrEmpty(attrValueText))
            {
                return;
            }

            //TODO swap attributes name & values here
            if (attrValueText.Length <= 2)
                FilteredPut(ctx, attrNameText + "=" + attrValueText);
            else
            {
                var openQuot = attrValueText.Substring(0, 1);
                var closeQuot = attrValueText.Substring(attrValueText.Length - 1, 1);
                var attrInnerValue =
                    attrValueText.Substring(1, attrValueText.Length - 2).EscapeString(EscapeStringType.XML);
                FilteredPut(ctx, attrNameText + "=" + openQuot + attrInnerValue + closeQuot);
            }
        }
Beispiel #34
0
        public override void ExitHtmlContent(HTMLParser.HtmlContentContext ctx)
        {
            var textContent = new StringBuilder();

            foreach(var elemCtx in ctx.htmlElement()){
                var elemText = MyTreeProperty.Get(elemCtx);
                if(string.IsNullOrEmpty(elemText)){
                    continue;
                }
                textContent.Append(elemText);
            }
            foreach (var charDataCtx in ctx.htmlChardata())
            {
                var cdataText = MyTreeProperty.Get(charDataCtx);
                if (string.IsNullOrWhiteSpace(cdataText))
                    continue;
                textContent.Append(cdataText);
            }

            FilteredPut(ctx, textContent.ToString());
        }
Beispiel #35
0
        public override void ExitStyle(HTMLParser.StyleContext ctx)
        {
            const string SHORT_BODY = "</>";
            const string BODY = "</style>";
            var styleBody = ctx.STYLE_BODY() ?? ctx.STYLE_SHORT_BODY();
            var styleBodyText = styleBody?.GetText();
            if (string.IsNullOrWhiteSpace(styleBodyText))
                return;
            if (styleBodyText.EndsWith(SHORT_BODY))
                styleBodyText = styleBodyText.Substring(0, styleBodyText.Length - SHORT_BODY.Length);
            if (styleBodyText.EndsWith(BODY))
                styleBodyText = styleBodyText.Substring(0, styleBodyText.Length - BODY.Length);

            styleBodyText = styleBodyText.Trim();
            _results.StyleBodies.Add(styleBodyText);
        }
Beispiel #36
0
        public override void ExitEmptyAttr(HTMLParser.EmptyAttrContext ctx)
        {
            var attrNameCtx = ctx.htmlAttributeName();
            if (attrNameCtx == null)
            {
                return;
            }
            var attrNameText = MyTreeProperty.Get(attrNameCtx);
            if (string.IsNullOrEmpty(attrNameText))
            {
                return;
            }

            //TODO swap attributes name with name-value pair here
            if(_results.EmptyAttrs.All(x => attrNameText != x))
                _results.EmptyAttrs.Add(attrNameText);

            FilteredPut(ctx, attrNameText + "='true'");
        }
Beispiel #37
0
        public override void ExitHtmlAttributeValue(HTMLParser.HtmlAttributeValueContext ctx)
        {
            var attrValue = ctx.ATTVALUE_VALUE();
            if (attrValue == null)
            {
                return;
            }

            var attrValueText = attrValue.GetText();
            FilteredPut(ctx, attrValueText);
        }
Beispiel #38
0
        public override void ExitHtmlAttributeName(HTMLParser.HtmlAttributeNameContext ctx)
        {
            var attrName = ctx.TAG_NAME();
            if (attrName == null)
            {
                return;
            }

            var attrNameText = attrName.GetText();
            FilteredPut(ctx, attrNameText);
        }
Beispiel #39
0
 public override void ExitScriptlet(HTMLParser.ScriptletContext ctx)
 {
     var scriptLetNode = ctx.SCRIPTLET();
     var scriptletText = scriptLetNode?.GetText();
     if (string.IsNullOrWhiteSpace(scriptletText))
         return;
     _results.ScriptLets.Add(scriptletText);
 }
Beispiel #40
0
 public override void ExitHtmlChardata(HTMLParser.HtmlChardataContext context)
 {
     if (string.IsNullOrWhiteSpace(context.GetText()))
         return;
     var cdatatxt = context.GetText().Trim();
     cdatatxt = cdatatxt.EscapeString(EscapeStringType.XML);
     _results.CharData.Add(cdatatxt);
     FilteredPut(context, cdatatxt);
 }
Beispiel #41
0
 public override void ExitHtmlComment(HTMLParser.HtmlCommentContext context)
 {
     var htmlCommentNode = context.HTML_COMMENT();
     var htmlCommentText = htmlCommentNode?.GetText();
     if (string.IsNullOrWhiteSpace(htmlCommentText))
         return;
     _results.HtmlComments.Add(htmlCommentText);
 }
Beispiel #42
0
        public override void ExitScript(HTMLParser.ScriptContext ctx)
        {
            const string SHORT_BODY = "</>";
            const string BODY = "</script>";
            var scriptBodyNode = ctx.SCRIPT_BODY() ?? ctx.SCRIPT_SHORT_BODY();
            var scriptBodyText = scriptBodyNode?.GetText();
            if (string.IsNullOrWhiteSpace(scriptBodyText))
                return;
            if (scriptBodyText.EndsWith(SHORT_BODY))
                scriptBodyText = scriptBodyText.Substring(0, scriptBodyText.Length - (SHORT_BODY.Length));
            if (scriptBodyText.EndsWith(BODY))
                scriptBodyText = scriptBodyText.Substring(0, scriptBodyText.Length - (BODY.Length));

            scriptBodyText = scriptBodyText.Trim();
            _results.ScriptBodies.Add(scriptBodyText);
        }
Beispiel #43
0
        public override void ExitPairElement(HTMLParser.PairElementContext ctx)
        {
            var textContent = new StringBuilder();
            textContent.Append("<");

            var tagNameCtx = ctx.htmlTagName(0);
            var tagNameNode = tagNameCtx?.TAG_NAME();

            var tagNameText = tagNameNode?.GetText();

            if(string.IsNullOrEmpty(tagNameText)){
                return;
            }

            //add this tag name to the list if its not already present
            if (!_results.Tags2Attrs.ContainsKey(tagNameText))
            {
                _results.Tags2Attrs.Add(tagNameText, new List<string>());
            }

            var tagContents = new List<string> {tagNameText};

            foreach(var attrCtx in ctx.htmlAttribute()){
                var attrText = MyTreeProperty.Get(attrCtx);
                if(string.IsNullOrEmpty(attrText)){
                    continue;
                }
                //add this tag's attributes if its not already present as-is
                if (_results.Tags2Attrs[tagNameText].All(a => a != attrText))
                {
                    _results.Tags2Attrs[tagNameText].Add(attrText);
                }
                tagContents.Add(attrText);
            }

            textContent.Append(string.Join(" ", tagContents));

            textContent.Append(">");

            var contentCtx = ctx.htmlContent();
            if(contentCtx != null){
                var contentsText = MyTreeProperty.Get(contentCtx);
                if(!string.IsNullOrEmpty(contentsText)){
                    textContent.Append(contentsText);
                }
            }

            textContent.Append("</");
            textContent.Append(tagNameText);
            textContent.Append(">");
            textContent.Append("\n");

            FilteredPut(ctx, textContent.ToString());
        }
Beispiel #44
0
        public override void ExitHtmlDocument(HTMLParser.HtmlDocumentContext ctx)
        {
            var textContent = new StringBuilder();

            foreach(var elemsCtx in ctx.htmlElements()){
                var markup = MyTreeProperty.Get(elemsCtx);
                if(string.IsNullOrEmpty(markup)){
                    continue;
                }

                textContent.Append(markup);
            }
            _results.HtmlOnly = textContent.ToString();
        }
Beispiel #45
0
 public override void ExitHtmlElements(HTMLParser.HtmlElementsContext ctx)
 {
     var topNodeCtx = ctx.htmlElement();
     var expectedFullMarkup = MyTreeProperty.Get(topNodeCtx);
     FilteredPut(ctx, expectedFullMarkup);
 }
Beispiel #46
0
 public override void ExitDtd(HTMLParser.DtdContext context)
 {
     var dtdNode = context.DTD();
     var dtdNodeText = dtdNode?.GetText();
     if (string.IsNullOrWhiteSpace(dtdNodeText))
         return;
     _results.DtdNodes.Add(dtdNodeText);
 }
Beispiel #47
-1
        public static HtmlParseResults InvokeParse(Stream stream)
        {
            var input = new AntlrInputStream(stream);
            var lexer = new HTMLLexer(input);
            var tokens = new CommonTokenStream(lexer);
            var parser = new HTMLParser(tokens);

            var tree = parser.htmlDocument();

            var walker = new ParseTreeWalker();
            var loader = new AspNetParseTree();

            walker.Walk(loader, tree);

            return loader.Results;
        }