public void Import(string fileName, SynchronizationContext uiThread, IProgressReporter progress) { var methodMap = Names.PaymentMethods.ToDictionary(k => k, StringComparer.CurrentCultureIgnoreCase); methodMap.Add("American Express", "Credit Card"); methodMap.Add("MasterCard", "Credit Card"); methodMap.Add("Visa", "Credit Card"); progress.Caption = "Reading " + Path.GetFileName(fileName); var genderizer = Genderizer.Create(); var connector = DB.OpenFile(fileName); var matchTasks = new LinkedList <Task>(); progress.Maximum = connector.ExecuteScalar <int>("SELECT COUNT(*) FROM [Sheet1$]"); using (var reader = connector.ExecuteReader("SELECT * FROM [Sheet1$]")) { StagedPerson person = new StagedPerson(); string company = null; // The source file is denormalized, and contains one row per payment, // with columns describing the person as part of the payment. People // are separated by rows with values in the second column. while (reader.Read()) { if (progress.WasCanceled) { return; } progress.Progress++; // If we're at a boundary between people, skip the row, and start // a new person. The second row in the boundary will noop. if (!reader.IsDBNull(1)) { person = new StagedPerson(); continue; } // Stores columns that have been used for actual properties. All // other non-empty columns will be added to Comments. var usedValues = new Dictionary <int, string>(); // Gets the ordinal of a named column, and suppresses that column // from being listed in the Comments field. Func <string, int> GetField = (string name) => { Int32 ordinal = reader.GetOrdinal(name); usedValues[ordinal] = reader[ordinal].ToString(); return(ordinal); }; var fullName = reader.GetNullableString(GetField("Name")); if (fullName == null) { continue; // Bad data; ignore. } progress.Caption = "Reading payments for " + fullName; int comma = fullName.IndexOf(','); if (comma < 0 || fullName.EndsWith(", LLC")) { person.LastName = person.FullName = fullName; } else { genderizer.SetFirstName(fullName.Substring(comma + 1).Trim(), person); person.LastName = fullName.Remove(comma).Trim(); person.FullName = (person.HisName ?? person.HerName) + " " + person.LastName; } person.FullName = reader.GetNullableString(GetField("Ship To Address 1")) ?? person.FullName; SetAddress(person, reader.GetNullableString(GetField("Name Address")), reader.GetNullableString(GetField("Name Street1")), reader.GetNullableString(GetField("Name Street2")), ref company ); // If these values exist discretely in other columns (Ship To), // don't include them in Comments. usedValues.Add(-1, person.City); usedValues.Add(-2, person.State); usedValues.Add(-3, person.Zip); // Only add the person to the table if we actually have a payment // too (as opposed to the second boundary row). if (person.Table == null) { AppFramework.Table <StagedPerson>().Rows.Add(person); // Do the CPU-intensive part on separate threads so it can utilize all cores. // But only set the result on the UI thread to avoid threading bugs, both for // change events in the grid (after the caller re-enables them) and since the // child collection is not thread-safe. async void SetPerson(StagedPerson thisPerson) { Person inferredTarget = await Task.Run(() => Matcher.FindBestMatch(thisPerson)); if (inferredTarget != null) { uiThread.Post(_ => thisPerson.Person = inferredTarget, null); } } SetPerson(person); } // TODO: Warn on bad zip GetField("Date"); // Exclude Date from Comments, even if we don't fetch it below. StagedPayment payment = new StagedPayment { Date = reader.GetNullableDateTime(GetField("Date of Check")) ?? reader.GetDateTime(GetField("Date")), Method = reader.GetNullableString(GetField("Pay Meth")) ?? "Donation", Amount = (decimal)reader.GetDouble(GetField("Amount")), CheckNumber = reader.GetNullableString(GetField("Check #")), Account = Names.DefaultAccount, ExternalId = reader.GetNullableString(GetField("Num")) ?? Guid.NewGuid().ToString(), StagedPerson = person, Company = company, Comments = Enumerable .Range(0, reader.FieldCount) .Where(i => !usedValues.ContainsKey(i) && !reader.IsDBNull(i) && !usedValues.ContainsValue(reader[i].ToString())) .Select(i => reader.GetName(i) + ": " + reader[i]) .Join(Environment.NewLine) }; payment.Method = methodMap.GetOrNull(payment.Method) ?? payment.Method; AppFramework.Table <StagedPayment>().Rows.Add(payment); } } }
public void Import(string fileName, IProgressReporter progress) { var methodMap = Names.PaymentMethods.ToDictionary(k => k, StringComparer.CurrentCultureIgnoreCase); methodMap.Add("American Express", "Credit Card"); methodMap.Add("MasterCard", "Credit Card"); methodMap.Add("Visa", "Credit Card"); progress.Caption = "Reading " + Path.GetFileName(fileName); var genderizer = Genderizer.Create(); var connector = DB.OpenFile(fileName); progress.Maximum = connector.ExecuteScalar<int>("SELECT COUNT(*) FROM [Sheet1$]"); using (var reader = connector.ExecuteReader("SELECT * FROM [Sheet1$]")) { StagedPerson person = new StagedPerson(); string company = null; // The source file is denormalized, and contains one row per payment, // with columns describing the person as part of the payment. People // are separated by rows with values in the second column. while (reader.Read()) { if (progress.WasCanceled) return; progress.Progress++; // If we're at a boundary between people, skip the row, and start // a new person. The second row in the boundary will noop. if (!reader.IsDBNull(1)) { person = new StagedPerson(); continue; } // Stores columns that have been used for actual properties. All // other non-empty columns will be added to Comments. var usedValues = new Dictionary<int, string>(); // Gets the ordinal of a named column, and suppresses that column // from being listed in the Comments field. Func<string, int> GetField = (string name) => { Int32 ordinal = reader.GetOrdinal(name); usedValues[ordinal] = reader[ordinal].ToString(); return ordinal; }; var fullName = reader.GetNullableString(GetField("Name")); if (fullName == null) continue; // Bad data; ignore. progress.Caption = "Reading payments for " + fullName; int comma = fullName.IndexOf(','); if (comma < 0 || fullName.EndsWith(", LLC")) person.LastName = person.FullName = fullName; else { genderizer.SetFirstName(fullName.Substring(comma + 1).Trim(), person); person.LastName = fullName.Remove(comma).Trim(); person.FullName = (person.HisName ?? person.HerName) + " " + person.LastName; } SetAddress(person, reader.GetNullableString(GetField("Name Address")), reader.GetNullableString(GetField("Name Street1")), reader.GetNullableString(GetField("Name Street2")), ref company ); // Only add the person to the table if we actually have a payment // too (as opposed to the second boundary row). if (person.Table == null) { AppFramework.Table<StagedPerson>().Rows.Add(person); var thisPerson = person; // Do the CPU-intensive part on separate threads so it can utilize all cores. ThreadPool.QueueUserWorkItem(_ => thisPerson.Person = Matcher.FindBestMatch(thisPerson)); } // TODO: Warn on bad zip GetField("Date"); // Exclude Date from Comments, even if we don't fetch it below. StagedPayment payment = new StagedPayment { Date = reader.GetNullableDateTime(GetField("Date of Check")) ?? reader.GetDateTime(GetField("Date")), Method = reader.GetNullableString(GetField("Pay Meth")) ?? "Donation", Amount = (decimal)reader.GetDouble(GetField("Amount")), CheckNumber = reader.GetNullableString(GetField("Check #")), Account = Names.DefaultAccount, ExternalId = reader.GetNullableString(GetField("Num")) ?? Guid.NewGuid().ToString(), StagedPerson = person, Company = company, Comments = Enumerable .Range(0, reader.FieldCount) .Where(i => !usedValues.ContainsKey(i) && !reader.IsDBNull(i) && !usedValues.ContainsValue(reader[i].ToString())) .Select(i => reader.GetName(i) + ": " + reader[i]) .Join("\n") }; payment.Method = methodMap.GetOrNull(payment.Method) ?? payment.Method; AppFramework.Table<StagedPayment>().Rows.Add(payment); } } }