Ejemplo n.º 1
0
        public void UpdateManifest(DataSetSummary dataSet)
        {
            MongoAgent   conn  = MongoAgent.GetInstance(Config);
            BsonDocument query = new BsonDocument("_id", dataSet._id);
            BsonDocument doc   = BsonDocument.Parse(JsonConvert.SerializeObject(dataSet));

            doc.SetElement(new BsonElement("_id", dataSet._id));
            conn.UpdateCollectionAsync(Config.ManifestCollection, query, doc);
        }
Ejemplo n.º 2
0
        public List <DataSetSummary> ParsingData(MongoAgent conn)
        {
            Logger.Info("Start parsing today's manifest");
            List <DataSetSummary> summary = new List <DataSetSummary>();

            try
            {
                StreamReader sr      = new StreamReader(Path.Combine(Config.LocalFolder, LocalFileName));
                JObject      jsonStr = JObject.Parse(sr.ReadToEnd());

                foreach (JToken token in jsonStr.SelectToken("dataset").Children())
                {
                    //Deserialize the token from JSON to object
                    DataSetSummary dataSetSum = JsonConvert.DeserializeObject <DataSetSummary>(token.First.ToString());
                    dataSetSum.token = token.Path;

                    //Compare with the last record to decide if it needs to be download today.
                    BsonDocument        query = new BsonDocument("identifier", dataSetSum.identifier);
                    List <BsonDocument> list  = conn.ReadCollection(Config.ManifestCollection, query);
                    if (list.Count == 0)
                    {
                        //If there is no record of such identifier, it means this is a new file type. It needs to be downloaded and insert into database.
                        summary.Add(dataSetSum);
                        conn.InsertCollectionAsync(Config.ManifestCollection, BsonDocument.Parse(token.First.ToString()));
                    }
                    else
                    {
                        //Always only check the top 1 record to decide if a downloading need to be performed or not.
                        DataSetSummary old = BsonSerializer.Deserialize <DataSetSummary>(list[0]);

                        if (DateTime.Parse(dataSetSum.last_updated) > DateTime.Parse(old.last_updated))
                        {
                            dataSetSum._id = old._id;
                            summary.Add(dataSetSum);
                            //update performing.
                            //query = new BsonDocument("_id", old._id);
                            //BsonDocument doc = BsonDocument.Parse(JsonConvert.SerializeObject(dataSetSum));
                            //doc.SetElement(new BsonElement("_id", old._id));
                            //conn.UpdateCollectionAsync(Configurations.ManifestCollection, query, doc);
                        }
                    }
                }

                Logger.Info("Finish parsing today's manifest.");
                Logger.Info("There are " + summary.Count + " datasets need to update.");
            }
            catch (Exception e)
            {
                Logger.Error(e.Message);
                return(new List <DataSetSummary>());
            }
            return(summary);
        }
Ejemplo n.º 3
0
 public List <DataSetSummary> ParsingData(MongoAgent conn)
 {
     throw new NotImplementedException();
 }
Ejemplo n.º 4
0
        public void ParsingData(string DataFile, string Identifier, bool DebugMode = false)
        {
            if (DebugMode)
            {
                File.Delete(DataFile);
                Logger.Info("[DebugMode] is on, no data will be parsed of " + Identifier);
                return;
            }
            Logger.Info("Start parsing data of " + Identifier);
            int BatchSize = Identifier.Equals("EBA") ? 100 : 1000;
            int Count     = 0;
            List <BsonDocument> documents = new List <BsonDocument>();
            StreamReader        reader    = new StreamReader(DataFile);

            try
            {
                string     str  = "";
                MongoAgent conn = MongoAgent.GetInstance(Config);
                while (!reader.EndOfStream)
                {
                    str = reader.ReadLine();
                    if (str.Contains("\0"))
                    {
                        Logger.Warn(string.Concat("File ", Identifier, ",Line ", (Count + 1).ToString(), " has invalid char."));
                        str = str.Replace("\0", "");
                        if (string.IsNullOrEmpty(str) || string.IsNullOrWhiteSpace(str))
                        {
                            continue;
                        }
                    }
                    JObject      obj  = (JObject)JsonConvert.DeserializeObject(str);
                    BsonDocument bdoc = BsonDocument.Parse(str);
                    //conn.InsertCollection(Identifier, bdoc);
                    documents.Add(bdoc);

                    if (documents.Count == BatchSize)
                    {
                        conn.InsertCollection(Identifier, documents);
                        documents.Clear();
                    }

                    Count++;
                }
                if (documents.Count > 0)
                {
                    conn.InsertCollection(Identifier, documents);
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                reader.Dispose();
                File.Delete(DataFile);
            }
            StringBuilder sb = new StringBuilder("Finish parsing data of ");

            sb.Append(Identifier);
            sb.Append("(");
            sb.Append(Count.ToString());
            sb.Append(")");
            //logger.Info("Finish parsing data of " + Identifier);
            Logger.Info(sb.ToString());
        }
Ejemplo n.º 5
0
        static void Main(string[] args)
        {
            if (args == null || args.Length == 0)
            {
                GlobalContext.Properties["LogPath"] = @"C:\EIA_Updater\Logs\";
                logger = LogManager.GetLogger(typeof(EIAUpdater));
                logger.Error("No Configuration file found.");
                return;
            }
            //Configurations config = FileHandler.ReadJsontoObject<Configurations>("Config.json");
            //logger.Info("Configuration file read successfully.");
            try
            {
                Configurations config = FileHandler.ReadJsontoObject <Configurations>(args[0]);
                GlobalContext.Properties["LogPath"] = Path.Combine(config.LogPath, "");
                logger = LogManager.GetLogger(typeof(EIAUpdater));
                logger.Info("Start updating EIA's data for today");
                logger.Info("Read configuration of " + args[0]);
                ManifestHandler manifest = new ManifestHandler(config);

                if (manifest.Download())
                {
                    //If the manifest file is downloaded successfully, continue doing parsing.
                    List <DataSetSummary> dataList     = manifest.ParsingData(MongoAgent.GetInstance(config));
                    List <Task>           processList  = new List <Task>();
                    List <Task>           completelist = new List <Task>();

                    foreach (DataSetSummary dataset in dataList)
                    {
                        try
                        {
                            if (processList.Count >= config.ConcurrentThread)
                            {
                                Task.WaitAny(processList.ToArray());
                                processList.ForEach(a =>
                                {
                                    if (a.Status.Equals(TaskStatus.RanToCompletion))
                                    {
                                        completelist.Add(a);
                                    }
                                });
                                foreach (Task a in completelist)
                                {
                                    processList.Remove(a);
                                }
                                completelist.Clear();
                            }
                            Task process = Task.Factory.StartNew(() =>
                            {
                                DataProcessor processor = new DataProcessor(config);
                                bool flag = processor.ProcessingData(dataset).Result;
                                if (flag)
                                {
                                    manifest.UpdateManifest(dataset);
                                }
                            });
                            processList.Add(process);
                        }
                        catch (Exception e)
                        {
                            Console.WriteLine(e.Message);
                        }
                    }

                    Task.WaitAll(processList.ToArray());

                    logger.Info("All updated data had been processed for today.");
                }
                else
                {
                    //If the mainfest file downloaded failed, do something else other than parsing.
                    logger.Error("Loading manifest failed of day " + DateTime.UtcNow.ToString("yyyyMMdd"));
                }
            }
            catch (Exception error)
            {
                logger.Error(error.Message);
            }
        }