示例#1
0
        public static Data LoadJustSuffixes()
        {
            Data data = new Data();

            // RawData
            data.FinalDataSet = FileLibrary.GetLines().Skip(1).Where(l => l != ",,,,,,,,,,,,,,,,,,").ToArray();
            // Suffixes
            data.Suffixes = new AddressSuffixes();
            string[] streetSuffixLines = File.ReadAllLines(StreetSuffixesPath);
            data.Suffixes.ShortSuffixes = streetSuffixLines.Select(n => n.Split(',')[1]).ToArray();
            data.Suffixes.LongSuffixes  = streetSuffixLines.Select(n => n.Split(',')[0]).ToArray();

            return(data);
        }
示例#2
0
        static void Main(string[] args)
        {
            //HailMary hailMary = new HailMary();
            //List<string> alternates = hailMary.LoadFromAlternatesFile("D:/alternates.csv", 16028369);
            //Row[] alternateAllData = alternates.Select(n => FileLibrary.ParseRow(n)).ToArray();

            // Load Data
            var lines = FileLibrary.GetLines();

            Row[] allData = lines.Skip(1).Where(l => l != ",,,,,,,,,,,,,,,,,,").Select(l => FileLibrary.ParseRow(l)).ToArray();

            List <Row> toProcess = new List <Row>();

            toProcess.AddRange(allData);
            //toProcess.AddRange(alternateAllData);

            allData = toProcess.ToArray();

            var realData = toProcess.Where(r => r.EnterpriseID >= 15374761).OrderBy(n => n.MRN).ToArray();

            // Clean Data
            Console.WriteLine("Cleaning Rows");
            DataCleaningManager.CleanData(ref allData, realData);
            Console.WriteLine("Done Cleaning Rows");

            // Load Data
            ClosedSets originalMatches = FileLibrary.LoadOriginalMatches(allData);
            ClosedSets newMatches      = FileLibrary.LoadOriginalMatches(allData); // create a copy to edit

            // Match Data
            MatchingManager matchingManager = new MatchingManager(_printErrors, _printActuals, _printLargeGroupValues);

            matchingManager.FindAllMatches(allData, ref newMatches);

            List <List <int> > finalSubmission = newMatches.ClosedRowSets();//hailMary.Collapse(newMatches.ClosedRowSets());

            FileLibrary.SaveFinalSubmission(finalSubmission, @"submission.csv");

            Console.ReadLine();
        }
示例#3
0
        public static Data LoadData(bool regenerateBKTree)
        {
            Data data = new Data();

            // RawData
            data.FinalDataSet = FileLibrary.GetLines().Skip(1).Where(l => l != ",,,,,,,,,,,,,,,,,,").ToArray();
            // Suffixes
            data.Suffixes = new AddressSuffixes();
            string[] streetSuffixLines = File.ReadAllLines(StreetSuffixesPath);
            data.Suffixes.ShortSuffixes = streetSuffixLines.Select(n => n.Split(',')[1]).ToArray();
            data.Suffixes.LongSuffixes  = streetSuffixLines.Select(n => n.Split(',')[0]).ToArray();

            // Unknown and Homeless
            data.UnknownAddresses  = File.ReadAllLines("UnknownAddresses.csv");
            data.HomelessAddresses = File.ReadAllLines("HomelessAddresses.csv");

            // Abbreviations
            data.Abbreviations = new Dictionary <string, string>();
            string[] nameValuePairs = File.ReadAllLines("Abbreviations.txt");
            foreach (string nameValuePair in nameValuePairs)
            {
                string[] bits = nameValuePair.Split(',').Select(n => n.Trim()).ToArray();
                data.Abbreviations.Add(bits[0], bits[1]);
            }

            data.AbbreviationsShortened = new Dictionary <string, string>();
            nameValuePairs = File.ReadAllLines("AbbreviationsShortened.txt");
            foreach (string nameValuePair in nameValuePairs)
            {
                string[] bits = nameValuePair.Split(',').Select(n => n.Trim()).ToArray();
                data.AbbreviationsShortened.Add(bits[0], bits[1]);
            }

            // SuffixReplacementKey
            nameValuePairs            = File.ReadAllLines("SuffixReplacementKey.txt");
            data.SuffixReplacementKey = new Dictionary <string, string>();
            foreach (string nameValuePair in nameValuePairs)
            {
                string[] bits = nameValuePair.Split(',').Select(n => n.Trim()).ToArray();
                if (!data.SuffixReplacementKey.ContainsKey(bits[0]))
                {
                    data.SuffixReplacementKey.Add(bits[0], bits[1]);
                }
            }

            // KnownCenters
            nameValuePairs    = File.ReadAllLines("KnownCenters.txt");
            data.KnownCenters = new Dictionary <string, Address>();
            foreach (string nameValuePair in nameValuePairs)
            {
                string[] bits            = nameValuePair.Split(';').Select(n => n.Trim()).ToArray();
                string[] rhsAddressParts = bits[1].Split(',').Select(n => n.Trim()).ToArray();
                Address  address         = new Address
                {
                    CenterName   = rhsAddressParts[0],
                    StreetNumber = rhsAddressParts[1],
                    StreetName   = rhsAddressParts[2],
                    City         = rhsAddressParts[3],
                    State        = rhsAddressParts[4],
                };
                if (rhsAddressParts[5].Length > 0)
                {
                    address.Zip = int.Parse(rhsAddressParts[5]);
                }

                address.FullStreetName = (address.StreetNumber != "" ? $"{address.StreetNumber} {address.StreetName}" : address.StreetName);
                data.KnownCenters.Add(bits[0], address);
            }


            // AlternateSuffixList
            string[] lines = File.ReadAllLines("streetToSuffixTable.txt");
            data.AlternateSuffixList = new Dictionary <string, List <string> >();
            foreach (string line in lines)
            {
                string[]      halves     = line.Split(':');
                List <string> alternates = halves[1].Split(',').ToList();
                data.AlternateSuffixList.Add(halves[0], alternates);
            }

            BinaryFormatter bf = new BinaryFormatter();

            using (FileStream fin = File.OpenRead("streetNames.dat"))
            {
                data.StreetData = (List <StreetName>)bf.Deserialize(fin);
            }

            data.NYCityStreets  = LoadNYCityAddresses(data);
            data.NYStateStreets = LoadNYStateStreets(data);

            // BKTree
            if (regenerateBKTree)
            {
                data.StreetNameBKTree = BKTreeEngine.CreateBKTree(data.NYStateStreets.ToList());
                //BKTreeSerializer.SerializeTo(data.BKTree, "bkTree.dat");
            }
            else
            {
                data.StreetNameBKTree = BKTreeSerializer.DeserializeFrom("bkTree.dat");
            }

            data.CityNameBKTree = BKTreeSerializer.DeserializeFrom("citiesBKTree.dat");

            data.KnownCities = new List <string>(File.ReadAllLines("knownCities.csv"));

            return(data);
        }