예제 #1
0
        // Try importing a CSV file, checking its headers and values against the template's DataLabels and data types.
        // Duplicates are handled.
        // Return a list of errors if needed.
        // However, error reporting is limited to only gross mismatches.
        // Note that:
        // - rows in the CSV file that are not in the .ddb file are ignored (not reported - maybe it should be?)
        // - rows in the .ddb file that are not in the CSV file are ignored
        // - if there are more duplicate rows for an image in the .csv file than there are in the .ddb file, those extra duplicates are ignored (not reported - maybe it should be?)
        // - if there are more duplicate rows for an image in the .ddb file than there are in the .csv file, those extra duplicates are ignored (not reported - maybe it should be?)
        public static async Task <Tuple <bool, List <string> > > TryImportFromCsv(string filePath, FileDatabase fileDatabase)
        {
            // Set up a progress handler that will update the progress bar
            Progress <ProgressBarArguments> progressHandler = new Progress <ProgressBarArguments>(value =>
            {
                // Update the progress bar
                CsvReaderWriter.UpdateProgressBar(GlobalReferences.BusyCancelIndicator, value.PercentDone, value.Message, value.IsCancelEnabled, value.IsIndeterminate);
            });
            IProgress <ProgressBarArguments> progress = progressHandler;

            List <string> importErrors = new List <string>();

            return(await Task.Run(() =>
            {
                const int bulkFilesToHandle = 2000;
                int processedFilesCount = 0;
                int totalFilesProcessed = 0;
                int dateTimeErrors = 0;
                progress.Report(new ProgressBarArguments(0, "Reading the CSV file. Please wait", false, true));

                // PART 1. Read in the CSV file. Return false if there is a problem in reading the CSV file or if the CSV file is empty
                if (false == TryReadingCSVFile(filePath, out List <List <string> > parsedFile, importErrors))
                {
                    return new Tuple <bool, List <string> >(false, importErrors);
                }

                // Now that we have a parsed file, get its headers, which we will use as DataLabels
                List <string> dataLabelsFromCSV = parsedFile[0].Where(s => !string.IsNullOrWhiteSpace(s)).Distinct().ToList();

                // Part 2. Abort if required CSV column are missing or there is a problem matching the CSV file headers against the DB headers.
                if (false == VerifyCSVHeaders(fileDatabase, dataLabelsFromCSV, importErrors))
                {
                    return new Tuple <bool, List <string> >(false, importErrors);
                }

                // Part 3: Create a List of all data rows, where each row is a dictionary containing the header and that row's valued for the header
                List <Dictionary <string, string> > rowDictionaryList = GetAllDataRows(dataLabelsFromCSV, parsedFile);

                // Part 4. For every row, validate each column's data against its type. Abort if the type does not match
                if (false == VerifyDataInColumns(fileDatabase, dataLabelsFromCSV, rowDictionaryList, importErrors))
                {
                    return new Tuple <bool, List <string> >(false, importErrors);
                }

                //
                // Part 4. Check and manage duplicates
                //
                // Get a list of duplicates in the database, i.e. rows with both the Same relativePath and File
                List <string> databaseDuplicates = fileDatabase.GetDistinctRelativePathFileCombinationsDuplicates();

                // Sort the rowDictionaryList so that duplicates in the CSV file (with the same relative path / File name) are in order, one after the other.
                List <Dictionary <string, string> > sortedRowDictionaryList = rowDictionaryList.OrderBy(dict => dict["RelativePath"]).ThenBy(dict => dict["File"]).ToList();
                int sortedRowDictionaryListCount = sortedRowDictionaryList.Count;
                // Create the data structure for the query

                List <ColumnTuplesWithWhere> imagesToUpdate = new List <ColumnTuplesWithWhere>();

                // Handle duplicates and more
                int nextRowIndex = 0;
                string currentPath = String.Empty;       // the path of the current row
                string examinedPath = String.Empty;      // the path of a surrounding row currently being examined to see if its a duplicate
                string duplicatePath = String.Empty;     // a duplicate was identified, and this holds the duplicate path
                List <Dictionary <string, string> > duplicatesDictionaryList = new List <Dictionary <string, string> >();

                foreach (Dictionary <string, string> rowDict in sortedRowDictionaryList)
                {
                    // For every row...
                    nextRowIndex++;
                    currentPath = Path.Combine(rowDict[Constant.DatabaseColumn.RelativePath], rowDict[Constant.DatabaseColumn.File]);

                    #region Handle duplicates
                    // Duplicates are special cases, where we have to update each set of duplicates separately as a chunk.
                    // To begin, check if its a duplicate, which occurs if the path (RelativePath/File) is identical

                    if (currentPath == duplicatePath)
                    {
                        // we are in the middle of a sequence, and this record has the same path as the previously identified duplicate.
                        // Thus the current record has to be a duplicate.
                        // Add it to the list.
                        duplicatesDictionaryList.Add(rowDict);

                        // A check if we are at the end of the CSV file - this catches the condition where the very last entry in the sorted csv file is a duplicate
                        if (nextRowIndex >= sortedRowDictionaryListCount)
                        {
                            string error = UpdateDuplicatesInDatabase(fileDatabase, duplicatesDictionaryList, Path.GetDirectoryName(duplicatePath), Path.GetFileName(duplicatePath));
                            if (false == String.IsNullOrEmpty(error))
                            {
                                importErrors.Add(error);
                            }
                            duplicatesDictionaryList.Clear();
                        }
                        continue;
                    }
                    else
                    {
                        // Check if we are at the end of a duplicate sequence
                        if (duplicatesDictionaryList.Count > 0)
                        {
                            // This entry marks the end of a sequence as the paths aren't equal but we have duplicates. Process the prior sequence
                            string error = UpdateDuplicatesInDatabase(fileDatabase, duplicatesDictionaryList, Path.GetDirectoryName(duplicatePath), Path.GetFileName(duplicatePath));
                            if (false == String.IsNullOrEmpty(error))
                            {
                                importErrors.Add(error);
                            }
                            duplicatesDictionaryList.Clear();
                        }

                        // We are either not in a sequence, or we completed the sequence. So we need to manage the current entry.
                        if (nextRowIndex < sortedRowDictionaryListCount)
                        {
                            // We aren't currently in a sequence. Determine if the current entry is a singleton or the first duplicate in a sequence by checking its path against the next record.
                            // If it is a duplicate, add it to the list.
                            Dictionary <string, string> nextRow = sortedRowDictionaryList[nextRowIndex];
                            examinedPath = Path.Combine(nextRow[Constant.DatabaseColumn.RelativePath], nextRow[Constant.DatabaseColumn.File]);
                            if (examinedPath == currentPath)
                            {
                                // Yup, its the beginning of a sequence.
                                duplicatePath = currentPath;
                                duplicatesDictionaryList.Clear();
                                duplicatesDictionaryList.Add(rowDict);
                                continue;
                            }
                            else
                            {
                                // It must be singleton
                                duplicatePath = String.Empty;
                                if (databaseDuplicates.Contains(currentPath))
                                {
                                    // But, if the database contains a duplicate with the same relativePath/File, then we want to update just the first database duplicate, rather than update all those
                                    // database duplicates with the same value (if we let it fall thorugh
                                    duplicatesDictionaryList.Add(rowDict);
                                    string error = UpdateDuplicatesInDatabase(fileDatabase, duplicatesDictionaryList, Path.GetDirectoryName(currentPath), Path.GetFileName(currentPath));
                                    if (false == String.IsNullOrEmpty(error))
                                    {
                                        importErrors.Add(error);
                                    }
                                    duplicatesDictionaryList.Clear();
                                    continue;
                                }
                            }
                        }
                    }
                    #endregion Handle duplicates

                    #region Process each column in a row by its header type
                    // Process each non-duplicate row
                    // Note that we never update:
                    // - Path-related fields (File, RelativePath, Folder)
                    // - Date and Time-related fields (DateTime, Date, Time, UtcOffset
                    ColumnTuplesWithWhere imageToUpdate = new ColumnTuplesWithWhere();
                    CultureInfo provider = CultureInfo.InvariantCulture;
                    DateTime datePortion = DateTime.MinValue;
                    DateTime timePortion = DateTime.MinValue;
                    DateTime dateTime = DateTime.MinValue;
                    foreach (string header in rowDict.Keys)
                    {
                        // For every column ...
                        ControlRow controlRow = fileDatabase.GetControlFromTemplateTable(header);
                        // process each column but only if its of the specific type
                        if (IsStandardColumn(controlRow.Type))
                        {
                            imageToUpdate.Columns.Add(new ColumnTuple(header, rowDict[header]));
                        }
                        else
                        {
                            // Its not a standard control, so check if its a date/time control and handle that as these are special cases
                            if (controlRow.Type == Constant.DatabaseColumn.DateTime)
                            {
                                string strDateTime = rowDict[header];
                                if (DateTime.TryParseExact(strDateTime, Constant.Time.DateTimeCSVWithoutTSeparator, provider, DateTimeStyles.None, out dateTime))
                                {
                                    // Standard DateTime
                                    // System.Diagnostics.Debug.Print("Standard: " + dateTime.ToString());
                                }
                                else if (DateTime.TryParseExact(strDateTime, Constant.Time.DateTimeCSVWithTSeparator, provider, DateTimeStyles.None, out dateTime))
                                {
                                    // Standard DateTime wit T separator
                                    // System.Diagnostics.Debug.Print("StandardT: " + dateTime.ToString());
                                }
                            }
                            else if (controlRow.Type == Constant.DatabaseColumn.Date)
                            {
                                // Date only
                                string strDateTime = rowDict[header];
                                if (DateTime.TryParseExact(strDateTime, Constant.Time.DateFormat, provider, DateTimeStyles.None, out DateTime tempDateTime))
                                {
                                    datePortion = tempDateTime;
                                }
                            }
                            else if (controlRow.Type == Constant.DatabaseColumn.Time)
                            {
                                // Time only
                                string strDateTime = rowDict[header];
                                if (DateTime.TryParseExact(strDateTime, Constant.Time.TimeFormat, provider, DateTimeStyles.None, out DateTime tempDateTime))
                                {
                                    //System.Diagnostics.Debug.Print("Time only: " + tempDateTime.ToString());
                                    timePortion = tempDateTime;
                                }
                            }
                        }
                    }
                    #endregion Process each column by its header type

                    // We've now looked at all the columns in a row, so continue processing that row as needed
                    totalFilesProcessed++;

                    if (dateTime != DateTime.MinValue || (datePortion != DateTime.MinValue && timePortion != DateTime.MinValue))
                    {
                        // If the separate date and time fields were used, update dateTime from them
                        if (datePortion != DateTime.MinValue && timePortion != DateTime.MinValue)
                        {
                            // We have a valid separate date and time. Combine it.
                            dateTime = datePortion.Date + timePortion.TimeOfDay;
                        }
                        // Because we expect a UTC date/time, set its kind
                        dateTime = DateTime.SpecifyKind(dateTime, DateTimeKind.Utc);

                        // We should now have a valid dateTime. Add it to the database.
                        // Note that this resets UtcOffset to 0, as its recorded in  local time
                        imageToUpdate.Columns.Add(new ColumnTuple(Constant.DatabaseColumn.DateTime, dateTime));
                        imageToUpdate.Columns.Add(new ColumnTuple(Constant.DatabaseColumn.UtcOffset, new TimeSpan(0)));
                        imageToUpdate.Columns.Add(new ColumnTuple(Constant.DatabaseColumn.Date, DateTimeHandler.ToStringDisplayDate(dateTime)));
                        imageToUpdate.Columns.Add(new ColumnTuple(Constant.DatabaseColumn.Time, DateTimeHandler.ToStringDisplayTime(dateTime)));
                        // System.Diagnostics.Debug.Print("Wrote DateTime: " + dateTime.ToString());
                    }
                    else
                    {
                        dateTimeErrors++;
                        // importErrors.Add(String.Format("{0}: Could not extract datetime", currentPath));
                        // System.Diagnostics.Debug.Print("Could not extract datetime");
                    }
                    dateTime = DateTime.MinValue;
                    datePortion = DateTime.MinValue;
                    timePortion = DateTime.MinValue;

                    // NOTE: We currently do NOT report an error if there is a row in the csv file whose location does not match
                    // the location in the database. We could do this by performing a check before submitting a query, eg. something like:
                    //  Select Count (*) from DataTable where File='IMG_00197.JPG' or File='IMG_01406.JPG' or File='XX.JPG'
                    // where we would then compare the counts against the rows. However, this likely has a performance hit, and it doesn't
                    // return the erroneous rows... So its not done yet.

                    // Add to the query only if there are columns to add!
                    if (imageToUpdate.Columns.Count > 0)
                    {
                        if (rowDict.ContainsKey(Constant.DatabaseColumn.RelativePath) && !String.IsNullOrWhiteSpace(rowDict[Constant.DatabaseColumn.RelativePath]))
                        {
                            imageToUpdate.SetWhere(rowDict[Constant.DatabaseColumn.RelativePath], rowDict[Constant.DatabaseColumn.File]);
                        }
                        else
                        {
                            imageToUpdate.SetWhere(rowDict[Constant.DatabaseColumn.File]);
                        }
                        imagesToUpdate.Add(imageToUpdate);
                    }

                    // Write current batch of updates to database. Note that we Update the database every number of rows as specified in bulkFilesToHandle.
                    // We should probably put in a cancellation token somewhere around here...
                    if (imagesToUpdate.Count >= bulkFilesToHandle)
                    {
                        processedFilesCount += bulkFilesToHandle;
                        progress.Report(new ProgressBarArguments(Convert.ToInt32(((double)processedFilesCount) / sortedRowDictionaryListCount * 100.0), String.Format("Processing {0}/{1} files. Please wait...", processedFilesCount, sortedRowDictionaryListCount), false, false));
                        fileDatabase.UpdateFiles(imagesToUpdate);
                        imagesToUpdate.Clear();
                    }
                }
                // perform any remaining updates
                if (dateTimeErrors != 0)
                {
                    // Need to check IF THIS WORKS FOR files with no date-time fields!
                    importErrors.Add(String.Format("The Date/Time was not be updated for {0} / {1} files. ", dateTimeErrors, totalFilesProcessed));
                    if (dataLabelsFromCSV.Contains(Constant.DatabaseColumn.DateTime) || (dataLabelsFromCSV.Contains(Constant.DatabaseColumn.Date) && dataLabelsFromCSV.Contains(Constant.DatabaseColumn.Time)))
                    {
                        importErrors.Add("- some date / time values in the DateTime, Date or Time columns are in an unexpected format (see manual)");
                    }
                    else
                    {
                        importErrors.Add("- the CSV file is missing either a DateTime column or both Date and Time columns (this is ok if it was intended)");
                    }
                }
                fileDatabase.UpdateFiles(imagesToUpdate);
                return new Tuple <bool, List <string> >(true, importErrors);
            }).ConfigureAwait(true));
        }