public static Data LoadJustSuffixes() { Data data = new Data(); // RawData data.FinalDataSet = FileLibrary.GetLines().Skip(1).Where(l => l != ",,,,,,,,,,,,,,,,,,").ToArray(); // Suffixes data.Suffixes = new AddressSuffixes(); string[] streetSuffixLines = File.ReadAllLines(StreetSuffixesPath); data.Suffixes.ShortSuffixes = streetSuffixLines.Select(n => n.Split(',')[1]).ToArray(); data.Suffixes.LongSuffixes = streetSuffixLines.Select(n => n.Split(',')[0]).ToArray(); return(data); }
static void Main(string[] args) { //HailMary hailMary = new HailMary(); //List<string> alternates = hailMary.LoadFromAlternatesFile("D:/alternates.csv", 16028369); //Row[] alternateAllData = alternates.Select(n => FileLibrary.ParseRow(n)).ToArray(); // Load Data var lines = FileLibrary.GetLines(); Row[] allData = lines.Skip(1).Where(l => l != ",,,,,,,,,,,,,,,,,,").Select(l => FileLibrary.ParseRow(l)).ToArray(); List <Row> toProcess = new List <Row>(); toProcess.AddRange(allData); //toProcess.AddRange(alternateAllData); allData = toProcess.ToArray(); var realData = toProcess.Where(r => r.EnterpriseID >= 15374761).OrderBy(n => n.MRN).ToArray(); // Clean Data Console.WriteLine("Cleaning Rows"); DataCleaningManager.CleanData(ref allData, realData); Console.WriteLine("Done Cleaning Rows"); // Load Data ClosedSets originalMatches = FileLibrary.LoadOriginalMatches(allData); ClosedSets newMatches = FileLibrary.LoadOriginalMatches(allData); // create a copy to edit // Match Data MatchingManager matchingManager = new MatchingManager(_printErrors, _printActuals, _printLargeGroupValues); matchingManager.FindAllMatches(allData, ref newMatches); List <List <int> > finalSubmission = newMatches.ClosedRowSets();//hailMary.Collapse(newMatches.ClosedRowSets()); FileLibrary.SaveFinalSubmission(finalSubmission, @"submission.csv"); Console.ReadLine(); }
public static Data LoadData(bool regenerateBKTree) { Data data = new Data(); // RawData data.FinalDataSet = FileLibrary.GetLines().Skip(1).Where(l => l != ",,,,,,,,,,,,,,,,,,").ToArray(); // Suffixes data.Suffixes = new AddressSuffixes(); string[] streetSuffixLines = File.ReadAllLines(StreetSuffixesPath); data.Suffixes.ShortSuffixes = streetSuffixLines.Select(n => n.Split(',')[1]).ToArray(); data.Suffixes.LongSuffixes = streetSuffixLines.Select(n => n.Split(',')[0]).ToArray(); // Unknown and Homeless data.UnknownAddresses = File.ReadAllLines("UnknownAddresses.csv"); data.HomelessAddresses = File.ReadAllLines("HomelessAddresses.csv"); // Abbreviations data.Abbreviations = new Dictionary <string, string>(); string[] nameValuePairs = File.ReadAllLines("Abbreviations.txt"); foreach (string nameValuePair in nameValuePairs) { string[] bits = nameValuePair.Split(',').Select(n => n.Trim()).ToArray(); data.Abbreviations.Add(bits[0], bits[1]); } data.AbbreviationsShortened = new Dictionary <string, string>(); nameValuePairs = File.ReadAllLines("AbbreviationsShortened.txt"); foreach (string nameValuePair in nameValuePairs) { string[] bits = nameValuePair.Split(',').Select(n => n.Trim()).ToArray(); data.AbbreviationsShortened.Add(bits[0], bits[1]); } // SuffixReplacementKey nameValuePairs = File.ReadAllLines("SuffixReplacementKey.txt"); data.SuffixReplacementKey = new Dictionary <string, string>(); foreach (string nameValuePair in nameValuePairs) { string[] bits = nameValuePair.Split(',').Select(n => n.Trim()).ToArray(); if (!data.SuffixReplacementKey.ContainsKey(bits[0])) { data.SuffixReplacementKey.Add(bits[0], bits[1]); } } // KnownCenters nameValuePairs = File.ReadAllLines("KnownCenters.txt"); data.KnownCenters = new Dictionary <string, Address>(); foreach (string nameValuePair in nameValuePairs) { string[] bits = nameValuePair.Split(';').Select(n => n.Trim()).ToArray(); string[] rhsAddressParts = bits[1].Split(',').Select(n => n.Trim()).ToArray(); Address address = new Address { CenterName = rhsAddressParts[0], StreetNumber = rhsAddressParts[1], StreetName = rhsAddressParts[2], City = rhsAddressParts[3], State = rhsAddressParts[4], }; if (rhsAddressParts[5].Length > 0) { address.Zip = int.Parse(rhsAddressParts[5]); } address.FullStreetName = (address.StreetNumber != "" ? $"{address.StreetNumber} {address.StreetName}" : address.StreetName); data.KnownCenters.Add(bits[0], address); } // AlternateSuffixList string[] lines = File.ReadAllLines("streetToSuffixTable.txt"); data.AlternateSuffixList = new Dictionary <string, List <string> >(); foreach (string line in lines) { string[] halves = line.Split(':'); List <string> alternates = halves[1].Split(',').ToList(); data.AlternateSuffixList.Add(halves[0], alternates); } BinaryFormatter bf = new BinaryFormatter(); using (FileStream fin = File.OpenRead("streetNames.dat")) { data.StreetData = (List <StreetName>)bf.Deserialize(fin); } data.NYCityStreets = LoadNYCityAddresses(data); data.NYStateStreets = LoadNYStateStreets(data); // BKTree if (regenerateBKTree) { data.StreetNameBKTree = BKTreeEngine.CreateBKTree(data.NYStateStreets.ToList()); //BKTreeSerializer.SerializeTo(data.BKTree, "bkTree.dat"); } else { data.StreetNameBKTree = BKTreeSerializer.DeserializeFrom("bkTree.dat"); } data.CityNameBKTree = BKTreeSerializer.DeserializeFrom("citiesBKTree.dat"); data.KnownCities = new List <string>(File.ReadAllLines("knownCities.csv")); return(data); }