public void UpdateManifest(DataSetSummary dataSet) { MongoAgent conn = MongoAgent.GetInstance(Config); BsonDocument query = new BsonDocument("_id", dataSet._id); BsonDocument doc = BsonDocument.Parse(JsonConvert.SerializeObject(dataSet)); doc.SetElement(new BsonElement("_id", dataSet._id)); conn.UpdateCollectionAsync(Config.ManifestCollection, query, doc); }
public List <DataSetSummary> ParsingData(MongoAgent conn) { Logger.Info("Start parsing today's manifest"); List <DataSetSummary> summary = new List <DataSetSummary>(); try { StreamReader sr = new StreamReader(Path.Combine(Config.LocalFolder, LocalFileName)); JObject jsonStr = JObject.Parse(sr.ReadToEnd()); foreach (JToken token in jsonStr.SelectToken("dataset").Children()) { //Deserialize the token from JSON to object DataSetSummary dataSetSum = JsonConvert.DeserializeObject <DataSetSummary>(token.First.ToString()); dataSetSum.token = token.Path; //Compare with the last record to decide if it needs to be download today. BsonDocument query = new BsonDocument("identifier", dataSetSum.identifier); List <BsonDocument> list = conn.ReadCollection(Config.ManifestCollection, query); if (list.Count == 0) { //If there is no record of such identifier, it means this is a new file type. It needs to be downloaded and insert into database. summary.Add(dataSetSum); conn.InsertCollectionAsync(Config.ManifestCollection, BsonDocument.Parse(token.First.ToString())); } else { //Always only check the top 1 record to decide if a downloading need to be performed or not. DataSetSummary old = BsonSerializer.Deserialize <DataSetSummary>(list[0]); if (DateTime.Parse(dataSetSum.last_updated) > DateTime.Parse(old.last_updated)) { dataSetSum._id = old._id; summary.Add(dataSetSum); //update performing. //query = new BsonDocument("_id", old._id); //BsonDocument doc = BsonDocument.Parse(JsonConvert.SerializeObject(dataSetSum)); //doc.SetElement(new BsonElement("_id", old._id)); //conn.UpdateCollectionAsync(Configurations.ManifestCollection, query, doc); } } } Logger.Info("Finish parsing today's manifest."); Logger.Info("There are " + summary.Count + " datasets need to update."); } catch (Exception e) { Logger.Error(e.Message); return(new List <DataSetSummary>()); } return(summary); }
public List <DataSetSummary> ParsingData(MongoAgent conn) { throw new NotImplementedException(); }
public void ParsingData(string DataFile, string Identifier, bool DebugMode = false) { if (DebugMode) { File.Delete(DataFile); Logger.Info("[DebugMode] is on, no data will be parsed of " + Identifier); return; } Logger.Info("Start parsing data of " + Identifier); int BatchSize = Identifier.Equals("EBA") ? 100 : 1000; int Count = 0; List <BsonDocument> documents = new List <BsonDocument>(); StreamReader reader = new StreamReader(DataFile); try { string str = ""; MongoAgent conn = MongoAgent.GetInstance(Config); while (!reader.EndOfStream) { str = reader.ReadLine(); if (str.Contains("\0")) { Logger.Warn(string.Concat("File ", Identifier, ",Line ", (Count + 1).ToString(), " has invalid char.")); str = str.Replace("\0", ""); if (string.IsNullOrEmpty(str) || string.IsNullOrWhiteSpace(str)) { continue; } } JObject obj = (JObject)JsonConvert.DeserializeObject(str); BsonDocument bdoc = BsonDocument.Parse(str); //conn.InsertCollection(Identifier, bdoc); documents.Add(bdoc); if (documents.Count == BatchSize) { conn.InsertCollection(Identifier, documents); documents.Clear(); } Count++; } if (documents.Count > 0) { conn.InsertCollection(Identifier, documents); } } catch (Exception e) { throw e; } finally { reader.Dispose(); File.Delete(DataFile); } StringBuilder sb = new StringBuilder("Finish parsing data of "); sb.Append(Identifier); sb.Append("("); sb.Append(Count.ToString()); sb.Append(")"); //logger.Info("Finish parsing data of " + Identifier); Logger.Info(sb.ToString()); }
static void Main(string[] args) { if (args == null || args.Length == 0) { GlobalContext.Properties["LogPath"] = @"C:\EIA_Updater\Logs\"; logger = LogManager.GetLogger(typeof(EIAUpdater)); logger.Error("No Configuration file found."); return; } //Configurations config = FileHandler.ReadJsontoObject<Configurations>("Config.json"); //logger.Info("Configuration file read successfully."); try { Configurations config = FileHandler.ReadJsontoObject <Configurations>(args[0]); GlobalContext.Properties["LogPath"] = Path.Combine(config.LogPath, ""); logger = LogManager.GetLogger(typeof(EIAUpdater)); logger.Info("Start updating EIA's data for today"); logger.Info("Read configuration of " + args[0]); ManifestHandler manifest = new ManifestHandler(config); if (manifest.Download()) { //If the manifest file is downloaded successfully, continue doing parsing. List <DataSetSummary> dataList = manifest.ParsingData(MongoAgent.GetInstance(config)); List <Task> processList = new List <Task>(); List <Task> completelist = new List <Task>(); foreach (DataSetSummary dataset in dataList) { try { if (processList.Count >= config.ConcurrentThread) { Task.WaitAny(processList.ToArray()); processList.ForEach(a => { if (a.Status.Equals(TaskStatus.RanToCompletion)) { completelist.Add(a); } }); foreach (Task a in completelist) { processList.Remove(a); } completelist.Clear(); } Task process = Task.Factory.StartNew(() => { DataProcessor processor = new DataProcessor(config); bool flag = processor.ProcessingData(dataset).Result; if (flag) { manifest.UpdateManifest(dataset); } }); processList.Add(process); } catch (Exception e) { Console.WriteLine(e.Message); } } Task.WaitAll(processList.ToArray()); logger.Info("All updated data had been processed for today."); } else { //If the mainfest file downloaded failed, do something else other than parsing. logger.Error("Loading manifest failed of day " + DateTime.UtcNow.ToString("yyyyMMdd")); } } catch (Exception error) { logger.Error(error.Message); } }