void blsStateBackWorker_DoWork(object sender, DoWorkEventArgs e) { System.IO.StreamWriter outputFile = new System.IO.StreamWriter(OutputFileName, true); Dictionary <string, Dictionary <DateTime, BLSValue> > bulkDataHolder = new Dictionary <string, Dictionary <DateTime, BLSValue> >(); List <DateTime> possibleDates = new List <DateTime>(); Dictionary <string, StreamWriter> outputFiles = new Dictionary <string, StreamWriter>(); Dictionary <string, string> industryOutputs = new Dictionary <string, string>(); int countFromAdjust = 0; using (Stream stream = File.Open(InputFileName, FileMode.Open)) { using (StreamReader reader = new StreamReader(stream)) { bool nextIsTitle = false; int titleCount = 0; string currentSeries = ""; do { string raw = reader.ReadLine(); string[] values = raw.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); //Find out what metric we're looking at // This will need to be tweaked on a case by case basis if (nextIsTitle) { #region for state industry data if (titleCount == 3) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { string industry = titleValues[1].Trim().Replace(',', '_'); if (!outputFiles.ContainsKey(industry)) { string newFilePath = OutputFileName.Insert(OutputFileName.Length - 4, industry); outputFiles.Add(industry, new StreamWriter(newFilePath, true)); } currentSeries = currentSeries + " - " + industry; if (!bulkDataHolder.ContainsKey(currentSeries)) { bulkDataHolder.Add(currentSeries, new Dictionary <DateTime, BLSValue>()); } } nextIsTitle = false; } else if (titleCount == 0) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = titleValues[1].Replace(',', '/'); } titleCount++; } else { titleCount++; } #endregion } //Whatever is under "Seasonally Adjusted" if (raw.StartsWith("Seasonally Adjusted")) { nextIsTitle = true; titleCount = 0; } // Now we can look at the data if (values.Length == 4 && values[0] != "Series id" && values[0] != "Series Id") { BLSValue blsVal = new BLSValue(currentSeries, values[0], values[1], values[2], values[3]); bulkDataHolder[currentSeries].Add(blsVal.ValueDate, blsVal); if (!possibleDates.Contains(blsVal.ValueDate)) { possibleDates.Add(blsVal.ValueDate); } } } while (reader.Peek() != -1); //Now we should have all the data, so let's start writing it out // First we need headers for the big file string headerData = "Date"; foreach (KeyValuePair <string, Dictionary <DateTime, BLSValue> > kv in bulkDataHolder) { headerData = headerData + "," + kv.Key.Replace(',', ' '); string[] indParse = kv.Key.Split(new string[] { " - " }, StringSplitOptions.RemoveEmptyEntries); if (!industryOutputs.ContainsKey(indParse[1])) { industryOutputs.Add(indParse[1], "Year," + indParse[0].Replace(',', ' ')); } else { industryOutputs[indParse[1]] = industryOutputs[indParse[1]] + "," + indParse[0]; } } outputFile.WriteLine(headerData); foreach (KeyValuePair <string, string> kv in industryOutputs) { if (outputFiles.ContainsKey(kv.Key)) { outputFiles[kv.Key].WriteLine(kv.Value); } } // Then we need to sort our dates (just in case) var sortedDates = from date in possibleDates orderby date ascending select date; // foreach (DateTime d in sortedDates) { string writeDateLine = "" + d.Month + "/" + d.Day + "/" + d.Year; string yearStart = "" + d.Month + "/" + d.Day + "/" + d.Year; industryOutputs = new Dictionary <string, string>(); foreach (KeyValuePair <string, Dictionary <DateTime, BLSValue> > kv in bulkDataHolder) { writeDateLine = writeDateLine + ","; if (kv.Value.ContainsKey(d)) { writeDateLine = writeDateLine + kv.Value[d].Value; } string[] indParse = kv.Key.Split(new string[] { " - " }, StringSplitOptions.RemoveEmptyEntries); string industryLine = ""; if (kv.Value.ContainsKey(d)) { industryLine = "," + kv.Value[d].Value; } else { industryLine = ","; } if (industryOutputs.ContainsKey(indParse[1])) { industryOutputs[indParse[1]] = industryOutputs[indParse[1]] + industryLine; } else { industryOutputs.Add(indParse[1], yearStart + industryLine); } } foreach (KeyValuePair <string, string> kvp in industryOutputs) { if (outputFiles.ContainsKey(kvp.Key)) { outputFiles[kvp.Key].WriteLine(kvp.Value); } } outputFile.WriteLine(writeDateLine); } foreach (KeyValuePair <string, StreamWriter> kvp in outputFiles) { kvp.Value.Close(); } outputFile.Close(); } } }
void blsBackWorker_DoWork(object sender, DoWorkEventArgs e) { System.IO.StreamWriter outputFile = new System.IO.StreamWriter(OutputFileName, true); Dictionary <string, Dictionary <DateTime, BLSValue> > bulkDataHolder = new Dictionary <string, Dictionary <DateTime, BLSValue> >(); List <DateTime> possibleDates = new List <DateTime>(); int countFromAdjust = 0; string dataType = ""; using (Stream stream = File.Open(InputFileName, FileMode.Open)) { using (StreamReader reader = new StreamReader(stream)) { bool nextIsTitle = false; int titleCount = 0; string currentSeries = ""; do { string raw = reader.ReadLine(); string[] values = raw.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); //Find out what metric we're looking at // This will need to be tweaked on a case by case basis if (nextIsTitle) { //if (titleCount == 1) //{ #region for employment Data if (dataType == "ATables") { if (titleCount == 0) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = titleValues[1].Trim(); bulkDataHolder.Add(currentSeries, new Dictionary <DateTime, BLSValue>()); } nextIsTitle = false; } } #endregion #region For Industry Data if (dataType == "BTables") { if (titleCount == 1) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = currentSeries + " - " + titleValues[1].Trim().Replace(',', '/').Trim(); if (!bulkDataHolder.ContainsKey(currentSeries)) { bulkDataHolder.Add(currentSeries, new Dictionary <DateTime, BLSValue>()); } } nextIsTitle = false; titleCount++; } else if (titleCount == 0) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = titleValues[1].Replace(',', '/').Trim(); } titleCount++; } } #endregion #region for state industry data if (dataType == "stateIndustry") { if (titleCount == 3 && dataType == "stateIndustry") { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = currentSeries + " - " + titleValues[1].Trim().Replace(',', '/'); if (!bulkDataHolder.ContainsKey(currentSeries)) { bulkDataHolder.Add(currentSeries, new Dictionary <DateTime, BLSValue>()); } } nextIsTitle = false; } else if (titleCount == 0) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = titleValues[1].Replace(',', '/'); } titleCount++; } else { titleCount++; } } #endregion #region for state employment data if (dataType == "stateEmployment") { if (titleCount == 2) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = currentSeries + " - " + titleValues[1].Trim().Replace(',', '/'); if (!bulkDataHolder.ContainsKey(currentSeries)) { bulkDataHolder.Add(currentSeries, new Dictionary <DateTime, BLSValue>()); } } nextIsTitle = false; } else if (titleCount == 0) { string[] titleValues = raw.Split(new char[] { ':' }, StringSplitOptions.RemoveEmptyEntries); if (titleValues.Length == 2) { currentSeries = titleValues[1].Replace(',', '/').Trim(); } titleCount++; } else { titleCount++; } } #endregion } // Find what kind of data we're looking at if (raw.Contains("Force Statistics from the Current Population Survey")) { // A Tables (household data) dataType = "ATables"; } else if (raw.Contains("from the Current Employment Statistics survey (National)")) { // B Tables (industry data) dataType = "BTables"; } else if (raw.Contains("Area Employment, Hours, and Earnings")) { dataType = "stateIndustry"; } else if (raw.Contains("Area Unemployment Statistics")) { dataType = "stateEmployment"; } //Whatever is under "Seasonally Adjusted" if (raw.Contains("Seasonally Adjusted") || raw == "Seasonally Adjusted") { nextIsTitle = true; titleCount = 0; } // Now we can look at the data if (values.Length == 4 && values[0] != "Series id" && values[0] != "Series Id") { currentSeries = currentSeries.Trim(); BLSValue blsVal = new BLSValue(currentSeries, values[0], values[1], values[2], values[3]); if (!blsVal.IsAnualData) { if (!bulkDataHolder.ContainsKey(currentSeries)) { bulkDataHolder.Add(currentSeries, new Dictionary <DateTime, BLSValue>()); } bulkDataHolder[currentSeries].Add(blsVal.ValueDate, blsVal); if (!possibleDates.Contains(blsVal.ValueDate)) { possibleDates.Add(blsVal.ValueDate); } } } } while (reader.Peek() != -1); //Now we should have all the data, so let's start writing it out // First we need headers string headerData = "Date"; foreach (KeyValuePair <string, Dictionary <DateTime, BLSValue> > kv in bulkDataHolder) { headerData = headerData + "," + kv.Key.Replace(',', ' '); } outputFile.WriteLine(headerData); // Then we need to sort our dates (just in case) var sortedDates = from date in possibleDates orderby date ascending select date; // foreach (DateTime d in sortedDates) { string writeDateLine = "" + d.Month + "/" + d.Day + "/" + d.Year; foreach (KeyValuePair <string, Dictionary <DateTime, BLSValue> > kv in bulkDataHolder) { writeDateLine = writeDateLine + ","; if (kv.Value.ContainsKey(d)) { writeDateLine = writeDateLine + kv.Value[d].Value; } } outputFile.WriteLine(writeDateLine); } outputFile.Close(); } } }