Пример #1
0
        /// <summary>
        /// Crawl, query, and parse data from a SQL database.
        /// </summary>
        /// <param name="dbSettings">Database settings.</param>
        /// <param name="query">Query to execute.</param>
        /// <returns>Parse result.</returns>
        public ParseResult ParseFromQuery(DbSettings dbSettings, string query)
        {
            if (dbSettings == null)
            {
                throw new ArgumentNullException(nameof(dbSettings));
            }
            if (String.IsNullOrEmpty(query))
            {
                throw new ArgumentNullException(nameof(query));
            }

            ParseResult ret = new ParseResult();

            ret.Sql = new ParseResult.SqlParseResult();

            SqlCrawler  crawler = new SqlCrawler(dbSettings, query);
            CrawlResult cr      = crawler.Get();

            if (!cr.Success)
            {
                ret.Time.End = DateTime.UtcNow;
                return(ret);
            }

            return(ProcessSourceContent(cr.DataTable));
        }
Пример #2
0
        static void SqlCrawler()
        {
            string query = Common.InputString("Query:", null, true);

            if (String.IsNullOrEmpty(query))
            {
                return;
            }

            DbSettings db = new DbSettings(
                (DbType)(Enum.Parse(typeof(DbType), Common.InputString("DB type:", "Mysql", false))),
                Common.InputString("Hostname:", "localhost", false),
                Common.InputInteger("Port:", 3306, true, false),
                Common.InputString("Username:"******"root", false),
                Common.InputString("Password:"******"password", false),
                Common.InputString("Instance:", null, true),
                Common.InputString("Database name:", "dbname", false));

            SqlCrawler  sc = new SqlCrawler(db, query);
            CrawlResult cr = sc.Get();

            Console.WriteLine("Success    : " + cr.Success);
            Console.WriteLine("Start time : " + cr.Time.Start.ToString());
            Console.WriteLine("End time   : " + cr.Time.End.ToString());
            Console.WriteLine("Total ms   : " + cr.Time.TotalMs.ToString() + "ms");
            Console.WriteLine("Data       : ");
            if (cr.DataTable != null)
            {
                Console.WriteLine(Common.SerializeJson(Common.DataTableToListDynamic(cr.DataTable), true));
            }
            else
            {
                Console.WriteLine("  (null)");
            }
        }
Пример #3
0
        static void SqlCrawler()
        {
            string query = Common.InputString("Query:", null, true);

            if (String.IsNullOrEmpty(query))
            {
                return;
            }

            DbSettings db = new DbSettings(
                (DbType)(Enum.Parse(typeof(DbType), Common.InputString("DB type:", "Mysql", false))),
                Common.InputString("Hostname:", "localhost", false),
                Common.InputInteger("Port:", 3306, true, false),
                Common.InputString("Username:"******"root", false),
                Common.InputString("Password:"******"password", false),
                Common.InputString("Instance:", null, true),
                Common.InputString("Database name:", "dbname", false));

            SqlCrawler  sc = new SqlCrawler(db, query);
            CrawlResult cr = sc.Get();

            if (_OutputType.Equals("console"))
            {
                EnumerateCrawlResult(cr);
            }
            ParseCrawlResult(cr);
        }
Пример #4
0
        private static async Task PostParse(RequestMetadata md)
        {
            string header = "[Komodo.Server] " + md.Http.Request.Source.IpAddress + ":" + md.Http.Request.Source.Port + " PostParse ";

            if (String.IsNullOrEmpty(md.Params.Type))
            {
                _Logging.Warn(header + "no document type supplied");
                md.Http.Response.StatusCode  = 400;
                md.Http.Response.ContentType = "application/json";
                await md.Http.Response.Send(new ErrorResponse(400, "Supply 'type' [json/xml/html/sql/text] in querystring.", null, null).ToJson(true));

                return;
            }

            byte[]      data        = null;
            CrawlResult crawlResult = null;
            ParseResult parseResult = null;

            HttpCrawler httpCrawler = null;
            FileCrawler fileCrawler = null;

            if (!String.IsNullOrEmpty(md.Params.Url))
            {
                #region Crawl-URL

                switch (md.Params.Type.ToLower())
                {
                case "html":
                    httpCrawler = new HttpCrawler(md.Params.Url);
                    crawlResult = httpCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    HtmlParser htmlParser = new HtmlParser();
                    parseResult = htmlParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "json":
                    httpCrawler = new HttpCrawler(md.Params.Url);
                    crawlResult = httpCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    JsonParser jsonParser = new JsonParser();
                    parseResult = jsonParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "text":
                    httpCrawler = new HttpCrawler(md.Params.Url);
                    crawlResult = httpCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    TextParser textParser = new TextParser();
                    parseResult = textParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "xml":
                    httpCrawler = new HttpCrawler(md.Params.Url);
                    crawlResult = httpCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    XmlParser xmlParser = new XmlParser();
                    parseResult = xmlParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                default:
                    _Logging.Warn(header + "invalid document type for processing via URL " + md.Params.Url);
                    md.Http.Response.StatusCode  = 400;
                    md.Http.Response.ContentType = "application/json";
                    await md.Http.Response.Send(new ErrorResponse(400, "Invalid document type.", null, null).ToJson(true));

                    return;
                }

                md.Http.Response.StatusCode  = 200;
                md.Http.Response.ContentType = "application/json";
                await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty));

                return;

                #endregion
            }
            else if (!String.IsNullOrEmpty(md.Params.Filename))
            {
                #region Filename

                switch (md.Params.Type.ToLower())
                {
                case "html":
                    fileCrawler = new FileCrawler(md.Params.Filename);
                    crawlResult = fileCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    HtmlParser htmlParser = new HtmlParser();
                    parseResult = htmlParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "json":
                    fileCrawler = new FileCrawler(md.Params.Filename);
                    crawlResult = fileCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    JsonParser jsonParser = new JsonParser();
                    parseResult = jsonParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "text":
                    fileCrawler = new FileCrawler(md.Params.Filename);
                    crawlResult = fileCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    TextParser textParser = new TextParser();
                    parseResult = textParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "xml":
                    fileCrawler = new FileCrawler(md.Params.Filename);
                    crawlResult = fileCrawler.Get();
                    if (!crawlResult.Success)
                    {
                        _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true));

                        return;
                    }

                    data = crawlResult.Data;
                    XmlParser xmlParser = new XmlParser();
                    parseResult = xmlParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename);
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                default:
                    _Logging.Warn(header + "invalid document type for processing via filename " + md.Params.Filename);
                    md.Http.Response.StatusCode  = 400;
                    md.Http.Response.ContentType = "application/json";
                    await md.Http.Response.Send(new ErrorResponse(400, "Invalid document type.", null, null).ToJson(true));

                    return;
                }

                md.Http.Response.StatusCode  = 200;
                md.Http.Response.ContentType = "application/json";
                await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty));

                return;

                #endregion
            }
            else if (md.Params.Type.ToLower().Equals("sql"))
            {
                #region Query

                if (md.Http.Request.Data == null || md.Http.Request.ContentLength < 1)
                {
                    _Logging.Warn(header + "no query found in payload");
                    md.Http.Response.StatusCode  = 400;
                    md.Http.Response.ContentType = "application/json";
                    await md.Http.Response.Send(new ErrorResponse(400, "No SQL query in request payload.", null, null).ToJson(true));

                    return;
                }

                DbSettings dbSettings = new DbSettings(md.Params.DbType, md.Params.DbServer, md.Params.DbPort, md.Params.DbUser, md.Params.DbPass, md.Params.DbInstance, md.Params.DbName);
                SqlCrawler sqlCrawler = new SqlCrawler(dbSettings, Encoding.UTF8.GetString(Common.StreamToBytes(md.Http.Request.Data)));
                crawlResult = sqlCrawler.Get();
                if (!crawlResult.Success)
                {
                    _Logging.Warn(header + "failed to crawl database " + md.Params.DbName);
                    md.Http.Response.StatusCode  = 500;
                    md.Http.Response.ContentType = "application/json";
                    await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl specified database.", null, crawlResult).ToJson(true));

                    return;
                }

                SqlParser sqlParser = new SqlParser();
                parseResult = sqlParser.Parse(crawlResult.DataTable);
                if (!parseResult.Success)
                {
                    _Logging.Warn(header + "failed to parse data from database " + md.Params.DbName);
                    md.Http.Response.StatusCode  = 500;
                    md.Http.Response.ContentType = "application/json";
                    await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from specified database.", null, parseResult).ToJson(true));

                    return;
                }

                md.Http.Response.StatusCode  = 200;
                md.Http.Response.ContentType = "application/json";
                await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty));

                return;

                #endregion
            }
            else if (md.Http.Request.Data != null && md.Http.Request.ContentLength > 0)
            {
                #region Supplied-Data

                data = Common.StreamToBytes(md.Http.Request.Data);

                switch (md.Params.Type.ToLower())
                {
                case "html":
                    HtmlParser htmlParser = new HtmlParser();
                    parseResult = htmlParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from supplied data");
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "json":
                    JsonParser jsonParser = new JsonParser();
                    parseResult = jsonParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from supplied data");
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "text":
                    TextParser textParser = new TextParser();
                    parseResult = textParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from supplied data");
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                case "xml":
                    XmlParser xmlParser = new XmlParser();
                    parseResult = xmlParser.ParseBytes(data);
                    if (!parseResult.Success)
                    {
                        _Logging.Warn(header + "failed to parse data from supplied data");
                        md.Http.Response.StatusCode  = 500;
                        md.Http.Response.ContentType = "application/json";
                        await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true));

                        return;
                    }
                    break;

                default:
                    _Logging.Warn(header + "invalid document type for processing via data");
                    md.Http.Response.StatusCode  = 400;
                    md.Http.Response.ContentType = "application/json";
                    await md.Http.Response.Send(new ErrorResponse(400, "Invalid document type supplied.", null, null).ToJson(true));

                    return;
                }

                md.Http.Response.StatusCode  = 200;
                md.Http.Response.ContentType = "application/json";
                await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty));

                return;

                #endregion
            }
            else
            {
                #region Unknown

                _Logging.Warn(header + "unable to derive data source from request");
                md.Http.Response.StatusCode  = 400;
                md.Http.Response.ContentType = "application/json";
                await md.Http.Response.Send(new ErrorResponse(400, "Unable to derive data source from request.", null, null).ToJson(true));

                return;

                #endregion
            }
        }