/// <summary> /// Crawl and parse data from a file. /// </summary> /// <param name="filename">Path and filename.</param> /// <returns>Parse result.</returns> public ParseResult ParseFromFile(string filename) { if (String.IsNullOrEmpty(filename)) { throw new ArgumentNullException(nameof(filename)); } ParseResult ret = new ParseResult(); ret.Json = new ParseResult.JsonParseResult(); FileCrawler crawler = new FileCrawler(filename); CrawlResult cr = crawler.Get(); if (!cr.Success) { ret.Time.End = DateTime.UtcNow; return(ret); } byte[] sourceData = cr.Data; string sourceContent = Encoding.UTF8.GetString(sourceData); return(ProcessSourceContent(sourceContent)); }
public async Task UpdateAsync(FileCrawler fileCrawler, DateTime date) { List <ParameterCombination> parameterCombinations = await ParameterCombinationRepository.GetListAsync(fileCrawler.Id, true); foreach (ParameterCombination parameterCombination in parameterCombinations) { List <FileCrawlerRecord> fileCrawlerRecordList = await FileCrawlerRecordRepository.GetListAsync(parameterCombination.Id, date); List <FileCrawlerRecord> list = CreateFileCrawlerRecordList(fileCrawler, date, parameterCombination); foreach (FileCrawlerRecord record in list) { FileCrawlerRecord fileCrawlerRecord = fileCrawlerRecordList.FirstOrDefault(x => x.SourceTime == record.SourceTime && x.TargetTime == record.TargetTime); if (fileCrawlerRecord == null) { fileCrawlerRecord = record; await FileCrawlerRecordRepository.InsertAsync(fileCrawlerRecord); } else { fileCrawlerRecord.Url = record.Url; fileCrawlerRecord.FileName = record.FileName; fileCrawlerRecord.Stamp = record.Stamp; await FileCrawlerRecordRepository.UpdateAsync(fileCrawlerRecord); } await EnqueueAsync(fileCrawlerRecord.Id, fileCrawlerRecord.SourceTime, fileCrawler.DelaySeconds); } } }
static void Main(string[] args) { Log("Starting Processing Images...\n"); string[] images = FileCrawler.GetAllFilesByExt("png", System.IO.Directory.GetCurrentDirectory() + @"\Sprites\"); if (FileProcessor.CreateFile("ImageData", System.IO.Directory.GetCurrentDirectory() + @"\Sprites\", images)) { Log(" Processing Images Complete!\n", System.ConsoleColor.Green); } System.Console.ReadKey(true); }
/// <summary> /// Crawl the specified file from the filesystem. /// </summary> /// <param name="filename">Filename.</param> /// <returns>Result.</returns> public CrawlResult CrawlFile(string filename) { if (String.IsNullOrEmpty(filename)) { throw new ArgumentNullException(nameof(filename)); } FileCrawler fc = new FileCrawler(filename); CrawlResult fcr = fc.Get(); return(fcr); }
public async Task InsertAsync(FileCrawler fileCrawler, DateTime date) { List <ParameterCombination> parameterCombinations = await ParameterCombinationRepository.GetListAsync(fileCrawler.Id, true); foreach (ParameterCombination parameterCombination in parameterCombinations) { List <FileCrawlerRecord> list = CreateFileCrawlerRecordList(fileCrawler, date, parameterCombination); await FileCrawlerRecordRepository.InsertManyAsync(list); list.ForEach(async x => await EnqueueAsync(x.Id, x.SourceTime, fileCrawler.DelaySeconds)); } }
private static IEnumerable <Solution> FilterSolutions(IEnumerable <Solution> solutions, string pathBlackList = "") { var fileCrawler = new FileCrawler(); var workspaceHandler = new WorkspaceHandler(); var ienumSolutions = solutions.ToList(); ienumSolutions.RemoveAll(item => item == null); var solutionsWithoutTests = ienumSolutions.Select(solution => workspaceHandler.RemoveTestFiles(solution)); var filesToIgnore = fileCrawler.GetIgnoredFiles(pathBlackList); var solutionsWithoutBlackListFiles = solutionsWithoutTests.Select(solution => workspaceHandler.RemoveBlackListedDocuments(solution, filesToIgnore)); return(solutionsWithoutBlackListFiles); }
public static void Crawl(string rootDir, string crawlSubDir, string csvPath, string extensions) { FileCrawler crawler = new FileCrawler(); crawler.Initialize(rootDir, crawlSubDir, csvPath, extensions); if (!crawler.TestParameters()) { return; } crawler.Crawl(); }
protected List <Dictionary <string, string> > GetParameterCombinations(FileCrawler fileCrawler) { List <Dictionary <string, string> > list = new List <Dictionary <string, string> >(); list.Add(new Dictionary <string, string>()); List <FileCrawlerParameter> parameters = JsonSerializer.Deserialize <List <FileCrawlerParameter> >(fileCrawler.Parameters); foreach (FileCrawlerParameter parameter in parameters) { list = CartesianProduct(list, GetParameterCombinations(parameter)); } return(list); }
public IEnumerable <LinesOfCodeStat> GetLinesOfCodeStats(string directory) { var fileCrawler = new FileCrawler(new DirectoryReader()); var fileReader = new FileReader(); var lineCounter = new LineCounter(); foreach (string filePath in fileCrawler.FindSourceFileNames(directory)) { string[] lines = fileReader.ReadLinesFromFile(filePath); int linesCount = lineCounter.CountLines(lines); int linesOfCodeCount = lineCounter.CountLinesOfCode(lines); yield return(new LinesOfCodeStat(filePath, linesCount, linesOfCodeCount)); } }
public async Task <List <FileCrawlerRecordDto> > GetListAsync(GetFileCrawlerRecordListDto input) { FileCrawler fileCrawler = await FileCrawlerRepository.FindAsync(x => x.Code == input.FileCrawlerCode); if (fileCrawler != null) { List <ParameterCombination> parameterCombinationList = await ParameterCombinationRepository.GetListAsync(fileCrawler.Id); ParameterCombination parameterCombination = parameterCombinationList.FirstOrDefault(x => Equal(x.Parameters, input.Parameters)); if (parameterCombination != null) { List <FileCrawlerRecord> list = await FileCrawlerRecordRepository.GetListAsync(parameterCombination.Id, input.Date); return(ObjectMapper.Map <List <FileCrawlerRecord>, List <FileCrawlerRecordDto> >(list)); } } return(new List <FileCrawlerRecordDto>()); }
static void FileCrawler() { string filename = Common.InputString("Filename:", null, true); if (String.IsNullOrEmpty(filename)) { return; } FileCrawler fc = new FileCrawler(filename); CrawlResult cr = fc.Get(); if (_OutputType.Equals("console")) { EnumerateCrawlResult(cr); } ParseCrawlResult(cr); }
/// <summary> /// Replaces the specified parameters. /// </summary> /// <param name="parameters">The parameters.</param> public void Replace(ReplaceParameters parameters) { // Validate: if (parameters == null) { throw new ArgumentNullException("parameters"); } if (parameters.FileCrawlerParameters == null) { throw new ArgumentException("parameters.FileCrawlerParameters is null"); } if (parameters.FileCrawlerParameters.PathInfoList.Count == 0) { this.NotifyMessage(null, "Error: No files to crawl."); return; } if (parameters.ReplacePatterns.Count == 0) { this.NotifyMessage(null, "Error: No replace patterns specified."); return; } // Reset tags: AllAvailableSmartTags.Reset(); // Wrap patterns: this.wrappedPatterns = parameters.ReplacePatterns .Select(p => new ReplacePatternWrapper(p)) .ToList(); // Process: this.crawler = new FileCrawler(); this.crawler.AddObserver(this); this.crawler.Crawl(parameters.FileCrawlerParameters); this.crawler.RemoveObserver(this); // Clean up: this.wrappedPatterns = null; }
static void FileCrawler() { string filename = Common.InputString("Filename:", null, true); if (String.IsNullOrEmpty(filename)) { return; } FileCrawler fc = new FileCrawler(filename); CrawlResult cr = fc.Get(); Console.WriteLine("Success : " + cr.Success); Console.WriteLine("Start time : " + cr.Time.Start.ToString()); Console.WriteLine("End time : " + cr.Time.End.ToString()); Console.WriteLine("Total ms : " + cr.Time.TotalMs.ToString() + "ms"); Console.WriteLine("Content length : " + cr.ContentLength + " bytes"); Console.WriteLine("Metadata : " + Common.SerializeJson(cr.Metadata, false)); Console.WriteLine("Data :" + Environment.NewLine + Encoding.UTF8.GetString(cr.Data)); }
public void Should_Find_Source_File_Names() { FileCrawler fileCrawler = CreateFileCrawler(); string directory = "TestData"; _subDirectoryReader.GetFiles("TestData", "*.cs").Returns(new string[] { @"TestData\TestData.cs" }); _subDirectoryReader.GetDirectories("TestData").Returns(new string[] { @"TestData\TestDataSubDirectory" }); _subDirectoryReader.GetFiles(@"TestData\TestDataSubDirectory", "*.cs").Returns(new string[] { @"TestData\TestDataSubDirectory\SubTestData.cs" }); _subDirectoryReader.GetDirectories(@"TestData\TestDataSubDirectory").Returns(Array.Empty <string>()); var expectedResult = new List <string>() { @"TestData\TestData.cs", @"TestData\TestDataSubDirectory\SubTestData.cs" }; IEnumerable <string> result = fileCrawler.FindSourceFileNames( directory); result.Should().BeEquivalentTo(expectedResult); }
public async Task InsertAsync(FileCrawler fileCrawler) { List <Dictionary <string, string> > list = GetParameterCombinations(fileCrawler); foreach (Dictionary <string, string> dic in list) { ParameterCombination parameterCombination = new ParameterCombination(GuidGenerator.Create(), fileCrawler.Id); if (dic.ContainsKey("Periods")) { parameterCombination.Periods = dic["Periods"]; dic.Remove("Periods"); } else { parameterCombination.Periods = fileCrawler.Periods; } parameterCombination.Parameters = JsonSerializer.Serialize(dic); parameterCombination.Enabled = true; await ParameterCombinationRepository.InsertAsync(parameterCombination); } }
/// <summary>Initializes a new instance of the <see cref="WebInterface" /> class.</summary> /// <param name="mdb">The MusicDataBase instance.</param> /// <param name="authTables">The authentication tables.</param> /// <param name="player">The player.</param> /// <exception cref="System.ArgumentNullException"> /// mdb /// or /// authTables /// </exception> /// <exception cref="ArgumentNullException">mdb /// or /// player</exception> public WebInterface(MusicDataBase mdb, AuthTables authTables, IPlayer player) { FileCrawler = new FileCrawler(mdb); ArtCrawler = new ArtCrawler(mdb); this.mdb = mdb ?? throw new ArgumentNullException(nameof(mdb)); this.authTables = authTables ?? throw new ArgumentNullException(nameof(authTables)); this.player = player ?? throw new ArgumentNullException(nameof(player)); EmptyImage = WebImage.FromFile(FileSystem.Combine(mdb.WebFolder, "images", "empty.png"), mdb.CacheFolder); this.LogInfo("Loading album replacement images..."); ReplaceAlbumImages = Directory.GetFiles(FileSystem.Combine(mdb.WebFolder, "images"), "cd-??.png").Select(f => WebImage.FromFile(f, mdb.CacheFolder)).ToArray(); if (ReplaceAlbumImages.Length == 0) { ReplaceAlbumImages = new WebImage[] { WebImage.FromFile(FileSystem.Combine(mdb.WebFolder, "images", "no-image.png"), mdb.CacheFolder) }; } this.LogInfo("Loading artist replacement images..."); ReplaceArtistImages = Directory.GetFiles(FileSystem.Combine(mdb.WebFolder, "images"), "artist-??.png").Select(f => WebImage.FromFile(f, mdb.CacheFolder)).ToArray(); if (ReplaceArtistImages.Length == 0) { ReplaceArtistImages = new WebImage[] { WebImage.FromFile(FileSystem.Combine(mdb.WebFolder, "images", "no-image.png"), mdb.CacheFolder) }; } }
public async Task UpdateAsync(FileCrawler fileCrawler) { List <ParameterCombination> parameterCombinationList = await ParameterCombinationRepository.GetListAsync(fileCrawler.Id); parameterCombinationList.ForEach(x => x.Enabled = false); List <Dictionary <string, string> > list = GetParameterCombinations(fileCrawler); foreach (Dictionary <string, string> dic in list) { string periods; if (dic.ContainsKey("Periods")) { periods = dic["Periods"]; dic.Remove("Periods"); } else { periods = fileCrawler.Periods; } ParameterCombination parameterCombination = parameterCombinationList.FirstOrDefault(x => Equal(x, dic)); if (parameterCombination == null) { parameterCombination = new ParameterCombination(GuidGenerator.Create(), fileCrawler.Id); parameterCombination.Periods = periods; parameterCombination.Parameters = JsonSerializer.Serialize(dic); parameterCombination.Enabled = true; await ParameterCombinationRepository.InsertAsync(parameterCombination); } else { parameterCombination.Periods = periods; parameterCombination.Enabled = true; } } await ParameterCombinationRepository.UpdateManyAsync(parameterCombinationList); }
public async Task SeedAsync(DataSeedContext context) { #region 000000 天气图 if (!await FileCrawlerRepository.AnyAsync(x => x.Code == "000000")) { FileCrawler fileCrawler = new FileCrawler(GuidGenerator.Create(), "000000", "天气图", FileCrawlerType.Archive) { DelaySeconds = 5400, UrlDateTimeKind = DateTimeKind.Utc, UrlFormat = "http://image.nmc.cn/product/{3:yyyy/MM/dd}/WESA/SEVP_NMC_WESA_SFER_{1}_{0}_{2}_P9_{3:yyyyMMddHH}0000000.jpg", FileNameFormat = "SEVP_NMC_WESA_SFER_{1}_{0}_{2}_P9_{3:yyyyMMddHH}0000000.jpg", StampFormat = "{3:MM/dd HH:00}", Periods = "[{\"type\":2,\"start\":8,\"end\":20,\"interval\":12}]", Parameters = "[{\"code\":\"Area\",\"name\":\"区域\",\"parameterValues\":[{\"code\":\"ACWP\",\"name\":\"中国\",\"parameters\":[{\"code\":\"Type\",\"name\":\"类型\",\"parameterValues\":[{\"code\":\"EGH\",\"name\":\"基本天气分析\",\"parameters\":[],\"periods\":[]},{\"code\":\"ESPCT\",\"name\":\"叠加卫星云图\",\"parameters\":[],\"periods\":[]},{\"code\":\"ESPBT\",\"name\":\"叠加雷达拼图\",\"parameters\":[],\"periods\":[]}]}],\"periods\":[]},{\"code\":\"ACHN\",\"name\":\"亚欧\",\"parameters\":[{\"code\":\"Type\",\"name\":\"类型\",\"parameterValues\":[{\"code\":\"EGH\",\"name\":\"基本天气分析\",\"parameters\":[],\"periods\":[]},{\"code\":\"ESPCT\",\"name\":\"叠加卫星云图\",\"parameters\":[],\"periods\":[]}]}],\"periods\":[]},{\"code\":\"ANHE\",\"name\":\"北半球\",\"parameters\":[{\"code\":\"Type\",\"name\":\"类型\",\"parameterValues\":[{\"code\":\"EGH\",\"name\":\"基本天气分析\",\"parameters\":[],\"periods\":[]}]}],\"periods\":[]}]},{\"code\":\"Level\",\"name\":\"层次\",\"parameterValues\":[{\"code\":\"L00\",\"name\":\"地面\",\"parameters\":[],\"periods\":[{\"type\":2,\"start\":2,\"end\":23,\"interval\":3}]},{\"code\":\"L92\",\"name\":\"925hPa\",\"parameters\":[],\"periods\":[]},{\"code\":\"L85\",\"name\":\"850hPa\",\"parameters\":[],\"periods\":[]},{\"code\":\"L70\",\"name\":\"700hPa\",\"parameters\":[],\"periods\":[]},{\"code\":\"L50\",\"name\":\"500hPa\",\"parameters\":[],\"periods\":[]},{\"code\":\"L20\",\"name\":\"200hPa\",\"parameters\":[],\"periods\":[]},{\"code\":\"L10\",\"name\":\"100hPa\",\"parameters\":[],\"periods\":[]}]}]" }; await FileCrawlerRepository.InsertAsync(fileCrawler); await ParameterCombinationDomainService.InsertAsync(fileCrawler); } #endregion #region 000100 降水量预报 if (!await FileCrawlerRepository.AnyAsync(x => x.Code == "000100")) { FileCrawler fileCrawler = new FileCrawler(GuidGenerator.Create(), "000100", "降水量预报", FileCrawlerType.Forecast) { DelaySeconds = -3000, UrlDateTimeKind = DateTimeKind.Utc, UrlFormat = "http://image.nmc.cn/product/{1:yyyy/MM/dd}/STFC/SEVP_NMC_STFC_SFER_ER24_ACHN_L88_P9_{1:yyyyMMddHH}00{6:D3}00.JPG", FileNameFormat = "SEVP_NMC_STFC_SFER_ER24_ACHN_L88_P9_{1:yyyyMMddHH}00{6:D3}00.JPG", StampFormat = "{6}小时", Periods = "[{\"type\":2,\"start\":24,\"end\":168,\"interval\":24}]", Parameters = "[{\"code\":\"TimeOffset\",\"name\":\"起报时间\",\"parameterValues\":[{\"code\":\"8\",\"name\":\"08\",\"parameters\":[],\"periods\":[]},{\"code\":\"20\",\"name\":\"20\",\"parameters\":[],\"periods\":[]}]}]" }; await FileCrawlerRepository.InsertAsync(fileCrawler); await ParameterCombinationDomainService.InsertAsync(fileCrawler); } #endregion #region 000101 6小时降水量预报 if (!await FileCrawlerRepository.AnyAsync(x => x.Code == "000101")) { FileCrawler fileCrawler = new FileCrawler(GuidGenerator.Create(), "000101", "6小时降水量预报", FileCrawlerType.Forecast) { DelaySeconds = -3000, UrlDateTimeKind = DateTimeKind.Utc, UrlFormat = "http://image.nmc.cn/product/{1:yyyy/MM/dd}/STFC/SEVP_NMC_STFC_SFER_ER6T{6:D2}_ACHN_L88_P9_{1:yyyyMMddHH}00{6:D3}06.JPG", FileNameFormat = "SEVP_NMC_STFC_SFER_ER6T{6:D2}_ACHN_L88_P9_{1:yyyyMMddHH}00{6:D3}06.JPG", StampFormat = "{6}小时", Periods = "[{\"type\":2,\"start\":6,\"end\":24,\"interval\":6}]", Parameters = "[{\"code\":\"TimeOffset\",\"name\":\"起报时间\",\"parameterValues\":[{\"code\":\"8\",\"name\":\"08\",\"parameters\":[],\"periods\":[]},{\"code\":\"20\",\"name\":\"20\",\"parameters\":[],\"periods\":[]}]}]" }; await FileCrawlerRepository.InsertAsync(fileCrawler); await ParameterCombinationDomainService.InsertAsync(fileCrawler); } #endregion #region 000102 气温预报 if (!await FileCrawlerRepository.AnyAsync(x => x.Code == "000102")) { FileCrawler fileCrawler = new FileCrawler(GuidGenerator.Create(), "000102", "气温预报", FileCrawlerType.Forecast) { DelaySeconds = -3000, UrlDateTimeKind = DateTimeKind.Local, UrlFormat = "http://image.nmc.cn/product/{2:yyyy/MM/dd}/RFFC/SEVP_NMC_RFFC_SNWFD_{0}_ACHN_L88_P9_{2:yyyyMMddHH}00{7:D3}12.jpg", FileNameFormat = "SEVP_NMC_RFFC_SNWFD_{0}_ACHN_L88_P9_{2:yyyyMMddHH}00{7:D3}12.jpg", StampFormat = "{7}小时", Periods = "[{\"type\":2,\"start\":24,\"end\":168,\"interval\":24}]", Parameters = "[{\"code\":\"Type\",\"name\":\"类型\",\"parameterValues\":[{\"code\":\"ETM\",\"name\":\"最高气温\",\"parameters\":[],\"periods\":[]},{\"code\":\"ETN\",\"name\":\"最低气温\",\"parameters\":[],\"periods\":[]}]},{\"code\":\"TimeOffset\",\"name\":\"起报时间\",\"parameterValues\":[{\"code\":\"8\",\"name\":\"08\",\"parameters\":[],\"periods\":[]},{\"code\":\"20\",\"name\":\"20\",\"parameters\":[],\"periods\":[]}]}]" }; await FileCrawlerRepository.InsertAsync(fileCrawler); await ParameterCombinationDomainService.InsertAsync(fileCrawler); } #endregion }
protected List <FileCrawlerRecord> CreateFileCrawlerRecordList(FileCrawler fileCrawler, DateTime date, ParameterCombination parameterCombination) { List <FileCrawlerRecord> list = new List <FileCrawlerRecord>(); Dictionary <string, object> dic = JsonSerializer.Deserialize <Dictionary <string, object> >(parameterCombination.Parameters); List <FileCrawlerPeriod> periods = JsonSerializer.Deserialize <List <FileCrawlerPeriod> >(parameterCombination.Periods); if (fileCrawler.Type == FileCrawlerType.Archive) { foreach (FileCrawlerPeriod period in periods) { Func <DateTime, int, DateTime> getTimeFunc = GetGetTimeFunc(period.Type); Func <TimeSpan, int> getOffsetFunc = GetGetOffsetFunc(period.Type); for (int i = period.Start; i <= period.End; i += period.Interval) { DateTime sourceTime = getTimeFunc(date, i); dic["SourceTime"] = sourceTime; dic["SourceTimeSpan"] = sourceTime - date; dic["SourceTimeOffset"] = i; object[] args = dic.Values.ToArray(); object[] urlArgs; if (fileCrawler.UrlDateTimeKind == DateTimeKind.Utc) { DateTime sourceTimeUtc = sourceTime.ToUniversalTime(); dic["SourceTime"] = sourceTimeUtc; dic["SourceTimeSpan"] = sourceTimeUtc - sourceTimeUtc.Date; dic["SourceTimeOffset"] = getOffsetFunc(sourceTimeUtc - sourceTimeUtc.Date); urlArgs = dic.Values.ToArray(); } else { urlArgs = args; } FileCrawlerRecord record = new FileCrawlerRecord(GuidGenerator.Create(), parameterCombination.Id, sourceTime, sourceTime) { Url = string.Format(fileCrawler.UrlFormat, urlArgs), DirectoryName = string.Format("{0}\\{1:yyyyMMdd}", Configuration["Settings:Crawlers.Files.RootDirectory"], sourceTime), FileName = string.Format(fileCrawler.FileNameFormat, args), Stamp = string.Format(fileCrawler.StampFormat, args) }; list.Add(record); } } } else { int sourceTimeOffset = int.Parse(dic["TimeOffset"] as string); DateTime sourceTime = date.AddHours(sourceTimeOffset); dic["SourceTime"] = sourceTime; dic["SourceTimeSpan"] = sourceTime - date; dic["SourceTimeOffset"] = sourceTimeOffset; foreach (FileCrawlerPeriod period in periods) { Func <DateTime, int, DateTime> getTimeFunc = GetGetTimeFunc(period.Type); for (int i = period.Start; i <= period.End; i += period.Interval) { DateTime targetTime = getTimeFunc(sourceTime, i); dic["TargetTime"] = targetTime; dic["TargetTimeSpan"] = targetTime - sourceTime; dic["TargetTimeOffset"] = i; object[] args = dic.Values.ToArray(); object[] urlArgs; if (fileCrawler.UrlDateTimeKind == DateTimeKind.Utc) { DateTime sourceTimeUtc = sourceTime.ToUniversalTime(); DateTime targetTimeUtc = targetTime.ToUniversalTime(); dic["SourceTime"] = sourceTimeUtc; dic["SourceTimeSpan"] = sourceTimeUtc - sourceTimeUtc.Date; dic["SourceTimeOffset"] = (sourceTimeUtc - sourceTimeUtc.Date).Hours; dic["TargetTime"] = targetTimeUtc; urlArgs = dic.Values.ToArray(); } else { urlArgs = args; } FileCrawlerRecord record = new FileCrawlerRecord(GuidGenerator.Create(), parameterCombination.Id, sourceTime, targetTime) { Url = string.Format(fileCrawler.UrlFormat, urlArgs), DirectoryName = string.Format("{0}\\{1:yyyyMMdd}", Configuration["Settings:Crawlers.Files.RootDirectory"], sourceTime), FileName = string.Format(fileCrawler.FileNameFormat, args), Stamp = string.Format(fileCrawler.StampFormat, args) }; list.Add(record); } } } return(list); }
private static async Task PostParse(RequestMetadata md) { string header = "[Komodo.Server] " + md.Http.Request.Source.IpAddress + ":" + md.Http.Request.Source.Port + " PostParse "; if (String.IsNullOrEmpty(md.Params.Type)) { _Logging.Warn(header + "no document type supplied"); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Supply 'type' [json/xml/html/sql/text] in querystring.", null, null).ToJson(true)); return; } byte[] data = null; CrawlResult crawlResult = null; ParseResult parseResult = null; HttpCrawler httpCrawler = null; FileCrawler fileCrawler = null; if (!String.IsNullOrEmpty(md.Params.Url)) { #region Crawl-URL switch (md.Params.Type.ToLower()) { case "html": httpCrawler = new HttpCrawler(md.Params.Url); crawlResult = httpCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; HtmlParser htmlParser = new HtmlParser(); parseResult = htmlParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true)); return; } break; case "json": httpCrawler = new HttpCrawler(md.Params.Url); crawlResult = httpCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; JsonParser jsonParser = new JsonParser(); parseResult = jsonParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true)); return; } break; case "text": httpCrawler = new HttpCrawler(md.Params.Url); crawlResult = httpCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; TextParser textParser = new TextParser(); parseResult = textParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true)); return; } break; case "xml": httpCrawler = new HttpCrawler(md.Params.Url); crawlResult = httpCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; XmlParser xmlParser = new XmlParser(); parseResult = xmlParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied URL.", null, parseResult).ToJson(true)); return; } break; default: _Logging.Warn(header + "invalid document type for processing via URL " + md.Params.Url); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Invalid document type.", null, null).ToJson(true)); return; } md.Http.Response.StatusCode = 200; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty)); return; #endregion } else if (!String.IsNullOrEmpty(md.Params.Filename)) { #region Filename switch (md.Params.Type.ToLower()) { case "html": fileCrawler = new FileCrawler(md.Params.Filename); crawlResult = fileCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; HtmlParser htmlParser = new HtmlParser(); parseResult = htmlParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true)); return; } break; case "json": fileCrawler = new FileCrawler(md.Params.Filename); crawlResult = fileCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; JsonParser jsonParser = new JsonParser(); parseResult = jsonParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true)); return; } break; case "text": fileCrawler = new FileCrawler(md.Params.Filename); crawlResult = fileCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; TextParser textParser = new TextParser(); parseResult = textParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true)); return; } break; case "xml": fileCrawler = new FileCrawler(md.Params.Filename); crawlResult = fileCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true)); return; } data = crawlResult.Data; XmlParser xmlParser = new XmlParser(); parseResult = xmlParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from file " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied filename.", null, parseResult).ToJson(true)); return; } break; default: _Logging.Warn(header + "invalid document type for processing via filename " + md.Params.Filename); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Invalid document type.", null, null).ToJson(true)); return; } md.Http.Response.StatusCode = 200; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty)); return; #endregion } else if (md.Params.Type.ToLower().Equals("sql")) { #region Query if (md.Http.Request.Data == null || md.Http.Request.ContentLength < 1) { _Logging.Warn(header + "no query found in payload"); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "No SQL query in request payload.", null, null).ToJson(true)); return; } DbSettings dbSettings = new DbSettings(md.Params.DbType, md.Params.DbServer, md.Params.DbPort, md.Params.DbUser, md.Params.DbPass, md.Params.DbInstance, md.Params.DbName); SqlCrawler sqlCrawler = new SqlCrawler(dbSettings, Encoding.UTF8.GetString(Common.StreamToBytes(md.Http.Request.Data))); crawlResult = sqlCrawler.Get(); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl database " + md.Params.DbName); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl specified database.", null, crawlResult).ToJson(true)); return; } SqlParser sqlParser = new SqlParser(); parseResult = sqlParser.Parse(crawlResult.DataTable); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from database " + md.Params.DbName); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from specified database.", null, parseResult).ToJson(true)); return; } md.Http.Response.StatusCode = 200; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty)); return; #endregion } else if (md.Http.Request.Data != null && md.Http.Request.ContentLength > 0) { #region Supplied-Data data = Common.StreamToBytes(md.Http.Request.Data); switch (md.Params.Type.ToLower()) { case "html": HtmlParser htmlParser = new HtmlParser(); parseResult = htmlParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from supplied data"); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true)); return; } break; case "json": JsonParser jsonParser = new JsonParser(); parseResult = jsonParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from supplied data"); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true)); return; } break; case "text": TextParser textParser = new TextParser(); parseResult = textParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from supplied data"); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true)); return; } break; case "xml": XmlParser xmlParser = new XmlParser(); parseResult = xmlParser.ParseBytes(data); if (!parseResult.Success) { _Logging.Warn(header + "failed to parse data from supplied data"); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to parse data from supplied data.", null, parseResult).ToJson(true)); return; } break; default: _Logging.Warn(header + "invalid document type for processing via data"); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Invalid document type supplied.", null, null).ToJson(true)); return; } md.Http.Response.StatusCode = 200; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(Common.SerializeJson(parseResult, md.Params.Pretty)); return; #endregion } else { #region Unknown _Logging.Warn(header + "unable to derive data source from request"); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Unable to derive data source from request.", null, null).ToJson(true)); return; #endregion } }
private static async Task PostIndexDocument(RequestMetadata md) { #region Variables string header = "[Komodo.Server] " + md.Http.Request.Source.IpAddress + ":" + md.Http.Request.Source.Port + " PostIndexDocument "; string tempFile = _Settings.TempStorage.Disk.Directory + Guid.NewGuid().ToString(); string indexName = md.Http.Request.Url.Elements[0]; string sourceGuid = null; if (md.Http.Request.Url.Elements.Length == 2) { sourceGuid = md.Http.Request.Url.Elements[1]; } #endregion #region Check-Index-Existence Index index = _Daemon.GetIndex(indexName); if (index == null) { _Logging.Warn(header + "index " + indexName + " does not exist"); md.Http.Response.StatusCode = 404; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(404, "Unknown index.", null, null).ToJson(true)); return; } #endregion #region Check-Supplied-GUID if (!String.IsNullOrEmpty(sourceGuid)) { if (_Daemon.SourceDocumentExists(indexName, sourceGuid)) { _Logging.Warn(header + "document " + indexName + "/" + sourceGuid + " already exists"); md.Http.Response.StatusCode = 409; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(409, "Requested GUID already exists.", null, null).ToJson(true)); return; } } #endregion #region Retrieve-DocType-from-QS if (String.IsNullOrEmpty(md.Params.Type)) { _Logging.Warn(header + "no 'type' value found in querystring"); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Supply 'type' [json/xml/html/sql/text] in querystring.", null, null).ToJson(true)); return; } DocType docType = DocType.Json; switch (md.Params.Type) { case "json": docType = DocType.Json; break; case "xml": docType = DocType.Xml; break; case "html": docType = DocType.Html; break; case "sql": docType = DocType.Sql; break; case "text": docType = DocType.Text; break; case "unknown": docType = DocType.Unknown; break; default: _Logging.Warn(header + "invalid 'type' value found in querystring: " + md.Params.Type); md.Http.Response.StatusCode = 400; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Supply 'type' [json/xml/html/sql/text] in querystring.", null, null).ToJson(true)); return; } #endregion try { #region Write-Temp-File long contentLength = 0; string md5 = null; CrawlResult crawlResult = null; if (!String.IsNullOrEmpty(md.Params.Url) || !String.IsNullOrEmpty(md.Params.Filename)) { #region Crawl if (!String.IsNullOrEmpty(md.Params.Url)) { HttpCrawler httpCrawler = new HttpCrawler(md.Params.Url); crawlResult = httpCrawler.Download(tempFile); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl URL " + md.Params.Url); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied URL.", null, crawlResult).ToJson(true)); return; } contentLength = crawlResult.ContentLength; md5 = Common.Md5File(tempFile); } else { FileCrawler fileCrawler = new FileCrawler(md.Params.Filename); crawlResult = fileCrawler.Download(tempFile); if (!crawlResult.Success) { _Logging.Warn(header + "failed to crawl filename " + md.Params.Filename); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(400, "Failed to crawl supplied filename.", null, crawlResult).ToJson(true)); return; } contentLength = crawlResult.ContentLength; md5 = Common.Md5(tempFile); } #endregion } else { using (FileStream fs = new FileStream(tempFile, FileMode.Create, FileAccess.ReadWrite)) { await md.Http.Request.Data.CopyToAsync(fs); } contentLength = md.Http.Request.ContentLength; md5 = Common.Md5File(tempFile); } #endregion #region Build-Source-Document string sourceUrl = null; if (!String.IsNullOrEmpty(md.Params.Url)) { sourceUrl = md.Params.Url; } else if (!String.IsNullOrEmpty(md.Params.Filename)) { sourceUrl = md.Params.Filename; } List <string> tags = null; if (!String.IsNullOrEmpty(md.Params.Tags)) { tags = Common.CsvToStringList(md.Params.Tags); } SourceDocument src = new SourceDocument( sourceGuid, md.User.GUID, index.GUID, md.Params.Name, md.Params.Title, tags, docType, sourceUrl, md.Http.Request.ContentType, contentLength, md5); #endregion if (!md.Params.Async) { #region Sync IndexResult result = await _Daemon.AddDocument( indexName, src, Common.ReadBinaryFile(tempFile), new ParseOptions(), !md.Params.Bypass); if (!result.Success) { _Logging.Warn(header + "unable to store document in index " + indexName); md.Http.Response.StatusCode = 500; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(new ErrorResponse(500, "Unable to store document in index '" + indexName + "'.", null, result).ToJson(true)); return; } md.Http.Response.StatusCode = 200; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(Common.SerializeJson(result, md.Params.Pretty)); return; #endregion } else { #region Async IndexResult result = new IndexResult(); result.Success = true; result.GUID = src.GUID; result.Type = docType; result.ParseResult = null; result.Time = null; Task unawaited = _Daemon.AddDocument( index.Name, src, Common.ReadBinaryFile(tempFile), new ParseOptions(), !md.Params.Bypass); md.Http.Response.StatusCode = 200; md.Http.Response.ContentType = "application/json"; await md.Http.Response.Send(Common.SerializeJson(result, md.Params.Pretty)); return; #endregion } } finally { if (File.Exists(tempFile)) { File.Delete(tempFile); } } }
private static IEnumerable <string> GetSolutionsFilePaths(string directory) { var fileCrawler = new FileCrawler(); return(fileCrawler.GetSolutionsFromDirectory(directory)); }