/
FileProcessor.cs
133 lines (120 loc) · 5.04 KB
/
FileProcessor.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using JBA.Exceptions;
using JBA.Model;
namespace JBA
{
internal class FileProcessor
{
//[Years=1991-2000]
private static readonly Regex regexYear =
new Regex(@"\[Years=(?<yearStart>(19|20)\d{2})-(?<yearFinish>(19|20)\d{2})\]");
//Grid-ref= 1, 148
private static readonly Regex regexGridRef = new Regex(@"^Grid-ref=\D*(?<Xref>\d+),\D*(?<Yref>\d+)");
private readonly string filename;
private readonly List<PrecipitationRecord> recordsToAdd = new List<PrecipitationRecord>();
private int startYear, endYear;
public FileProcessor(string filename)
{
this.filename = filename;
}
public List<PrecipitationRecord> Process()
{
if (!File.Exists(filename)) throw new FileNotFoundException(filename);
//Clear database if it already exists
using (JBADbContext db = new JBADbContext(filename))
{
db.Database.EnsureDeleted();
db.Database.EnsureCreated();
db.SaveChanges();
}
using (StreamReader file =
new StreamReader(filename))
{
if (TryReadHeader(file))
{
ParseRecords(file);
return recordsToAdd;
}
throw new ParseException("Unable to read file header.");
}
}
private void ParseRecords(StreamReader file)
{
string line;
while ((line = file.ReadLine()) != null)
{
//check if we found new record chunk start
Match match = regexGridRef.Match(line);
if (match.Success)
{
//parse cord values
int Xref = int.Parse(match.Groups["Xref"].Value);
int Yref = int.Parse(match.Groups["Yref"].Value);
//should contain a line for each year
for (int i = 0; i <= endYear - startYear; i++)
{
line = file.ReadLine();
if (line != null)
{
//originally used regex but due to lines like 7586, 7960 etc. had to switch to splitting string
//into fixed sized chunks
List<string> substrings = Enumerable.Range(0, line.Length / 5)
.Select(s => line.Substring(s * 5, 5)).ToList();
//should contain value for each month
if (substrings.Count == 12)
{
int monthCounter = 1;
foreach (string valueString in substrings)
{
//create records
recordsToAdd.Add(new PrecipitationRecord
{
Xref = Xref,
Yref = Yref,
Date = new DateTime(startYear + i, monthCounter, 1),
Value = int.Parse(valueString.Trim())
});
monthCounter++;
if (recordsToAdd.Count % 10000 == 0)
Console.WriteLine($"Read {recordsToAdd.Count} records from file {filename}");
}
}
else
{
throw new ParseException(
$"Grid-ref line \"{line}\" is not followed by the expected amount of data");
}
}
else
{
throw new ParseException("Unexpected end of file");
}
}
}
}
Console.WriteLine($"Read {recordsToAdd.Count} records from file {filename}");
}
private bool TryReadHeader(StreamReader file)
{
//read until we find year header line
string line;
while ((line = file.ReadLine()) != null)
{
Match match = regexYear.Match(line);
if (match.Success)
{
startYear = int.Parse(match.Groups["yearStart"].Value);
endYear = int.Parse(match.Groups["yearFinish"].Value);
//year range is not valid, abort
if (startYear > endYear) throw new ParseException("Invalid year range parsed in the file header.");
return true;
}
}
return false;
}
}
}