コード例 #1
0
        /// <summary>
        /// Extracts data from census.gov and imports it into a database
        /// </summary>
        /// <param name="toDatabase">The ISQL implementation representing the database to import the data</param>
        /// <param name="dataType">The Data Type to be extracted from census.gov and imported</param>
        /// <param name="downloadPath">The location where the ZIP files should be downloaded to</param>
        /// <param name="year">The census year data to download</param>
        /// <param name="recordsPerBatch">The number of records to insert in a single batch</param>
        /// <returns>Total imported record count</returns>
        public static async Task <DetailedReturn> ExtractData(ISQL toDatabase, DataTypes dataType, string downloadPath, int year, int recordsPerBatch = 1500)
        {
            DetailedReturn ret = new DetailedReturn();

            string unzipFolder = @"\unzipTemp\" + Guid.NewGuid() + @"\";

            await Task.Run(async() =>
            {
                Downloader d = new Downloader();
                List <string> failedDowloads = new List <string>();

                toDatabase.CreateImportDetailTable();

                // Get all links from the corresponding data page on census.gov
                List <string> addr = await d.GetLinks(year, dataType);

                int filecount = addr.Count();
                int curfile   = 0;

                // Get a list of IMPORTDETAIL record that indicate the corresponding file has already completed its import
                List <ImportDetail> alreadyDone = toDatabase.GetCompletedImportDetailList(dataType);

                if (addr.Count == alreadyDone.Count)
                {
                    ret.TotalRecordsAlreadyInDB = alreadyDone.Select(x => x.LastRecordNum).Sum();
                }
                else // Only proceed if we have not yet completed the import for this data type...
                {
                    // Loop through each of the links
                    foreach (string link in addr)
                    {
                        curfile++;

                        // Proceed only if this link is not contained in the list of completed files
                        if (!alreadyDone.Where(x => x.URL == link).Any())
                        {
                            string outHeader = "[" + dataType.ToString("g") + ": " + curfile + "/" + filecount + "]: ";
                            Console.WriteLine(DateTime.Now.ToLogFormat() + outHeader + link);

                            // Attempt to download the file
                            string dl = await d.DownloadFile(link, downloadPath);

                            // Only proceed if the file was successfully downloaded...
                            if (dl != "")
                            {
                                // Attempt to unzip the downloaded file, and get the name of the "DBF" file from the extract
                                string fileToImport = d.Unzip(dl, downloadPath + unzipFolder).Where(x => x.ToLower().Contains(".dbf")).FirstOrDefault();

                                // Only proceed if we have a DBF file to process...
                                if (fileToImport != "")
                                {
                                    // Attempt to process the file
                                    DetailedReturn thisFile      = ImportTigerData(toDatabase, dataType, fileToImport, recordsPerBatch, link, dl);
                                    ret.TotalRecordsImported    += thisFile.TotalRecordsImported;
                                    ret.TotalRecordsAlreadyInDB += thisFile.TotalRecordsAlreadyInDB;
                                    ret.TotalRecordsInFile      += thisFile.TotalRecordsInFile;
                                    ret.Errors.AddRange(thisFile.Errors);

                                    Console.WriteLine(DateTime.Now.ToLogFormat() + outHeader + thisFile.TotalRecordsImported + " records imported this session. Total records in DB now at: " + ret);

                                    // Remove the extracted files and corresponding temporary directory
                                    d.CleanFolder(downloadPath + unzipFolder);
                                }
                                else
                                {
                                    toDatabase.InsertImportDetails(link, "BAD FILE", dataType);
                                    ret.Errors.Add(new ErrorDetail(ErrorTypes.BadOrMissingLocalFile, dataType, "No DB file found to import", link, ""));
                                }
                            }
                            else
                            {
                                toDatabase.InsertImportDetails(link, "UNABLE TO DOWNLOAD", dataType);
                                failedDowloads.Add(link);

                                ret.Errors.Add(new ErrorDetail(ErrorTypes.CouldNotDownloadFile, dataType, "Failed to download", link, ""));
                            }
                        }
                    }
                }
            });

            return(ret);
        }
コード例 #2
0
        private static DetailedReturn ImportTigerData(ISQL toDatabase, DataTypes dataType, string fileToImport, int recordsPerBatch, string referenceURL, string referenceZipFile)
        {
            DetailedReturn ret = new DetailedReturn();

            // CREATE the IMPORTDETAIL table in the Database if it doesnt already exist
            toDatabase.CreateImportDetailTable();

            // Make sure the file exists that we wish to import...
            if (File.Exists(fileToImport))
            {
                // Read the dbfFile contents into a DbfTable object
                using (var dbfTable = new DbfTable(fileToImport, Encoding.UTF8))
                {
                    var header             = dbfTable.Header;
                    var versionDescription = header.VersionDescription;
                    var hasMemo            = dbfTable.Memo != null;
                    var recordCount        = header.RecordCount;
                    int rowsAffected       = 0;

                    ret.TotalRecordsInFile = recordCount;

                    // Get the IMPORTDETAIL record that matches the URL/File we are going to import
                    ImportDetail importDetail = toDatabase.GetImportDetail(referenceURL);
                    if (importDetail == null)
                    {
                        // If no matching IMPORTDETAIL file was found, create one
                        toDatabase.InsertImportDetails(referenceURL, referenceZipFile, dataType);
                        importDetail = toDatabase.GetImportDetail(referenceURL); //new ImportDetail { URL = referenceURL, LocalFile = referenceZipFile, FileType = dataType.ToString("g"), LastRecordNum = 0 };
                    }

                    ret.TotalRecordsAlreadyInDB = importDetail.LastRecordNum;

                    // Proceed only if the record count in the file exceeds that which we have already imported
                    if (recordCount > importDetail.LastRecordNum)
                    {
                        // The DataTypes enum name is going to be our TABLE name
                        string tableName = dataType.ToString("g");

                        // Generate a CREATE TABLE script representing the DbfTable
                        string tableCreate = toDatabase.GetCreateTableScript(dbfTable, tableName);
                        Debug.WriteLine(tableCreate);

                        // CREATE IF NOT EXISTS our TABLE in the DB
                        toDatabase.ExecuteNonQuery(tableCreate);

                        // Create the first part of our INSERT statement
                        string insertHeader = toDatabase.GetInsertHeader(dbfTable, tableName);

                        // Get the records from the DbfTable
                        var dbfRecord = new DbfRecord(dbfTable);

                        // We are going to INSERT multiple records with each DB Command to dramatically speed things up
                        StringBuilder multiInsert = new StringBuilder();
                        multiInsert.Append(insertHeader);

                        // Loop through each of our records...
                        while (dbfTable.Read(dbfRecord))
                        {
                            try
                            {
                                // We only want to start INSERTing records where we last left off
                                if (ret.TotalRecordsImported >= importDetail.LastRecordNum)
                                {
                                    // Skip the record if it is marked as deleted
                                    if (dbfRecord.IsDeleted)
                                    {
                                        continue;
                                    }

                                    int           col     = -1;
                                    StringBuilder rowPart = new StringBuilder();

                                    // Loop through each of the values in our record
                                    foreach (var dbfValue in dbfRecord.Values)
                                    {
                                        col++;
                                        // Get the column type of this value
                                        DbfColumn c = dbfTable.Columns[col];
                                        // Format the value properly for our INSERT statment
                                        rowPart.Append(toDatabase.FormatValueForInsert(c.ColumnType, dbfValue.ToString()) + ",");
                                    }
                                    // Add the Import Detail ID for our last column value
                                    rowPart.Append(importDetail.ID);

                                    // Append the record values to our INSERT statement
                                    multiInsert.Append("(" + rowPart.ToString() + "),");

                                    // If we have collected xxx records...
                                    if (ret.TotalRecordsImported % recordsPerBatch == 0)
                                    {
                                        // Its time to execute the INSERT
                                        rowsAffected = toDatabase.ExecuteNonQuery(multiInsert.ToString().TrimEnd(','));

                                        // If the INSERT was successful
                                        if (rowsAffected > 0)
                                        {
                                            // UPDATE the IMPORTDETAILS table
                                            toDatabase.UpdateImportDetails(referenceURL, ret.TotalRecordsImported, ret.TotalRecordsImported == recordCount);
                                        }

                                        // Prepare the next mass INSERT statement
                                        multiInsert.Clear();
                                        multiInsert.Append(insertHeader);
                                    }
                                }

                                // Update the record count
                                ret.TotalRecordsImported++;
                            }
                            catch (Exception ex)
                            {
                                //Debug.WriteLine(ex.Message);
                                ret.Errors.Add(new ErrorDetail(ErrorTypes.Exception, dataType, ex.Message, referenceURL, fileToImport));
                            }
                        }

                        ret.TotalRecordsImported -= importDetail.LastRecordNum;

                        // If we have a remaining INSERT compiled....
                        if (multiInsert.ToString() != "" && multiInsert.ToString() != insertHeader)
                        {
                            // execute the INSERT
                            rowsAffected = toDatabase.ExecuteNonQuery(multiInsert.ToString().TrimEnd(','));

                            // If the INSERT was successful
                            if (rowsAffected > 0)
                            {
                                // UPDATE the IMPORTDETAILS table
                                toDatabase.UpdateImportDetails(referenceURL, ret.TotalRecordsImported, ret.TotalRecordsImported == recordCount);
                            }
                        }
                    }
                    else // If we already appeared to have imported all the records in this file
                    {
                        toDatabase.UpdateImportDetails(referenceURL, recordCount, true);
                    }
                }
            }
            else // If the file does not exist
            {
                ret.Errors.Add(new ErrorDetail(ErrorTypes.BadOrMissingLocalFile, dataType, "File not found", referenceURL, fileToImport));
            }

            return(ret);
        }