/// <summary> /// Gets data from files and maintains order of values in files using a Dictionary. /// </summary> /// <param name="files"></param> /// <returns></returns> private List <OutputRow> GetOrderedFileDataDictionary(List <string> files) { Dictionary <DateTime, List <double?> > keyValues = new Dictionary <DateTime, List <double?> >(); foreach (var file in files) { OutputRow.Headers.Add(file); var values = new DbfReader().ReadValues(file); foreach (var value in values) { var item = new OutputRow { Timestamp = value.Timestamp, Values = new List <double?>(new double?[files.Count]) }; if (keyValues.ContainsKey(item.Timestamp)) { keyValues[item.Timestamp][OutputRow.Headers.Count - 1] = value.Value; } else { item.Values[OutputRow.Headers.Count - 1] = value.Value; keyValues.Add(value.Timestamp, item.Values); } } } return(keyValues.GroupBy(x => x.Key).OrderBy(x => x.Key).Select(x => new OutputRow { Timestamp = x.Key, Values = x.SelectMany(y => y.Value).ToList(), }).ToList()); }
/// <summary> /// Gets data from files and maintains order of values in files using BinarySearch. /// </summary> /// <param name="files"></param> /// <returns></returns> private List <OutputRow> GetOrderedFileDataBinarySearch(List <string> files) { List <OutputRow> outputs = new List <OutputRow>(); foreach (var file in files) { OutputRow.Headers.Add(file); var values = new DbfReader().ReadValues(file); foreach (var value in values) { var item = new OutputRow { Timestamp = value.Timestamp, Values = new List <double?>(new double?[files.Count]) }; var index = outputs.BinarySearch(item); if (index < 0) { item.Values[OutputRow.Headers.Count - 1] = value.Value; outputs.Insert(~index, item); } else { outputs[index].Values[OutputRow.Headers.Count - 1] = value.Value; } } } return(outputs.OrderBy(x => x.Timestamp).ToList()); }
public void TestTask() { const string RootDir = @".\Data"; const string RelevantFileName = "128.dbf"; // TODO read all RelevantFileName files recursively from RootDir (will be copied on build) // use DbfReader to read them and extract all DataValues // here an example call for one file: var reader = new DbfReader(); var values = reader.ReadValues(@".\Data\ELEKTRO\E01\E600DI01\128.dbf"); // put all DataValues into ONE ordered (by timestamp) list of OutputRow (each timestamp shall exist only once, each file should be like a column) // the OutputRow has 2 lists: 1 static one for the headers (directory path of file) and one for the values (values of all files (same timestamp) must be merged into one OutputRow) var outputs = new List <OutputRow>(); // if there is time left, improve example where you think it isn't good enough // the following asserts should pass Assert.AreEqual(25790, outputs.Count); Assert.AreEqual(27, OutputRow.Headers.Count); Assert.AreEqual(27, outputs[0].Values.Count); Assert.AreEqual(27, outputs[11110].Values.Count); Assert.AreEqual(27, outputs[25789].Values.Count); Assert.AreEqual(633036852000000000, outputs.Min(o => o.Timestamp).Ticks); Assert.AreEqual(634756887000000000, outputs.Max(o => o.Timestamp).Ticks); Assert.AreEqual(633036852000000000, outputs[0].Timestamp.Ticks); Assert.AreEqual(634756887000000000, outputs.Last().Timestamp.Ticks); // write into file that we can compare results later on (you don't have to do something) string content = "Time\t" + string.Join("\t", OutputRow.Headers) + Environment.NewLine + string.Join(Environment.NewLine, outputs.Select(o => o.AsTextLine())); File.WriteAllText(@".\output.txt", content); }
internal bool Load(string root, string searchPattern, List <OutputRow> outputs, List <string> warnings, ref string errorMessage) { var d = new DirectoryInfo(root); var files = d.GetFiles(searchPattern); var dbfReader = new DbfReader(); foreach (FileInfo file in files) { if (!ProcessFile(file, outputs, dbfReader, warnings, ref errorMessage)) { return(false); } } // Recursively load files from sub-directories var directories = d.GetDirectories(); foreach (var directory in directories) { if (!Load(directory.FullName, searchPattern, outputs, warnings, ref errorMessage)) { return(false); } } return(true); }
public IList <OutputRow> MergeFiles(string sourceDirectory, string fileName) { var dbfFiles = Directory.EnumerateFiles(sourceDirectory, fileName, SearchOption.AllDirectories); var sortedOutputRows = new SortedList <DateTime, OutputRow>(); var numberOfFiles = dbfFiles.Count(); var currentFile = 0; var dbfReader = new DbfReader(); foreach (var dbfFile in dbfFiles) { OutputRow.Headers.Add(Path.GetDirectoryName(dbfFile)); foreach (var valueRow in dbfReader.ReadValues(dbfFile)) { if (sortedOutputRows.TryGetValue(valueRow.Timestamp, out var outputRow)) { outputRow.Values[currentFile] = valueRow.Value; } else { sortedOutputRows.Add(valueRow.Timestamp, new OutputRow { Timestamp = valueRow.Timestamp, Values = new List <double?>(new double?[numberOfFiles]) { [currentFile] = valueRow.Value } }); } } currentFile++; } return(sortedOutputRows.Values); }
private bool ProcessFile(FileInfo file, List <OutputRow> outputs, DbfReader dbfReader, List <string> warnings, ref string errorMessage) { List <DbfReader.ValueRow> valueRows = null; try { valueRows = dbfReader.ReadValues(file.FullName); } catch (Exception ex) { errorMessage = "Error reading values: " + ex.Message; return(false); } OutputRow.Headers.Add(file.DirectoryName); AddNullValuesAtEnd(outputs); foreach (var row in valueRows) { var newRow = new OutputRow() { Timestamp = row.Timestamp, }; var index = outputs.BinarySearch(newRow, new CompareRowTimestamp()); if (index < 0) // Not found { outputs.Insert(~index, newRow); SetValuesList(newRow, row.Value); } else { var outputRow = outputs[index]; if (outputRow.Values.Count < OutputRow.Headers.Count) { outputRow.Values.Add(row.Value); } else { if (row.Value != outputRow.Values[outputRow.Values.Count - 1]) { /* It could be the case that the same timestamp was duplicate in the file. * Here we are creating a list of warnings to deal with this situation, but we might as well throw an exeception * or return false and stop the recursive processing - it was a judgement call. * If we knew for sure that the timestamp is a db primary key, that is never duplicated we wouldn't need this verification */ if (outputRow.Values[outputRow.Values.Count - 1] != null) { warnings.Add("Value overriten. Path=" + file.Directory + "; Timestamp=" + row.Timestamp + "; old value = " + outputRow.Values[outputRow.Values.Count - 1] + "; new value=" + row.Value); } outputRow.Values[outputRow.Values.Count - 1] = row.Value; } } } } return(true); }