// Tested - Switch Columns public Boolean Switch_Columns(String InputFile, String ColumnName1, String ColumnName2) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); IDictionary <int, List <String> > NewDict = new Dictionary <int, List <String> >(); int ColumnIndex1 = cu.Get_Column_Index(ColumnName1); int ColumnIndex2 = cu.Get_Column_Index(ColumnName2); if (ColumnIndex1 > -1 && ColumnIndex2 > -1) { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { String MyLine = cu.Get_Line_Content(entry.Key); cu.Swap(entry.Value, ColumnIndex1, ColumnIndex2); // entry.Value.RemoveAt(ColumnIndex); } cu.Save_File_As_CSV(InputFile); } else { return(false); } return(true); }
// Split Cell Value into other column public String Copy_Column_Content_To_Other_Column(String InputFile, String OriginColumn, String RegexPatternGroupToCopy, String TargetColumn) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int idxTar = cu.Get_Column_Index(TargetColumn); int idxOri = cu.Get_Column_Index(OriginColumn); int NumberOfMatches = 0; if (idxTar > -1 && idxOri > -1) // If column exists.. { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { if (entry.Key > 0) // preserving column headers { String CellContent = cu.Get_Cell_Content(OriginColumn, entry.Key); if (CellContent != null) { var pattern = RegexPatternGroupToCopy; var matches = Regex.Matches(CellContent, pattern); if (matches.Count > 0 && matches[0].Groups.Count > 1) { String ValueToCopy = matches[0].Groups[1].Value; cu.Set_Cell_Content(TargetColumn, entry.Key, ValueToCopy); NumberOfMatches++; } } else { return("Cell Content is Null"); } } } } else { return("At Least 1 Column in parameter does not exist."); } if (NumberOfMatches == 0) { return("No Match Found or No Regex Group defined in Regular Expression Pattern"); } else { cu.Save_File_As_CSV(InputFile); return("Number of Matching Cells Found and moved: " + NumberOfMatches); } }
// Get the Column Number based on the Column Name public int Get_Column_Index(String InputFile, String ColumnName) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); return(cu.Get_Column_Index(ColumnName)); }
// Remove String in Column Content public Boolean Remove_String_In_Column_Content(String InputFile, String ColumnName, String StringToRemove) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int idx = cu.Get_Column_Index(ColumnName); if (idx < 0) { return(false); } foreach (KeyValuePair <int, List <String> > entry in cu.dict) { if (entry.Key > 0) // dont process the column header line { if (idx > -1) { String MyContent = cu.Get_Cell_Content(ColumnName, entry.Key); String NewValue = MyContent.Replace(StringToRemove, ""); cu.Save_Cell_Value_No_Save(ColumnName, entry.Key, NewValue); } } } cu.Save_File_As_CSV(InputFile); return(true); }
// Rearranges columns to enforce a particular order public Boolean Enforce_Column_Order(String InputFile, String ColumnOrderTemplate) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); String[] ColNames = ColumnOrderTemplate.Split(','); IDictionary <int, List <String> > NewDict = new Dictionary <int, List <String> >(); List <string> AllColumnNameFromTemplate = new List <string>(); List <int> Order = new List <int>(); foreach (String s in ColNames) { AllColumnNameFromTemplate.Add(s.Trim()); // Console.WriteLine("Debug, adding:" + s.Trim()); } int NumberOfColumnsInParameters = AllColumnNameFromTemplate.Count(); int NumberOfColumnsInCsv = cu.Get_Number_Of_Columns(); if (NumberOfColumnsInParameters != NumberOfColumnsInCsv) { // Number of Columns in parameter passed is different from number of columns in CSV return(false); } //Console.WriteLine("sdf:" + NumberOfColumnsInParameters + ":" + NumberOfColumnsInCsv); //Console.ReadKey(); foreach (String Col in AllColumnNameFromTemplate) { if (cu.Get_Column_Index(Col) < 0) { return(false); } Order.Add(cu.Get_Column_Index(Col)); } foreach (KeyValuePair <int, List <String> > entry in cu.dict) { List <String> result1 = Order.Select(i => entry.Value[i]).ToList(); NewDict.Add(entry.Key, result1); } cu.dict = NewDict; cu.Save_File_As_CSV(InputFile); return(true); }
// Append a string to the content of a Cell IF a certain corresponding Cell in the same column or a different column matches a Regex (ex: if "Description" contains "Credit" then append a "-" to column "Amount") public int Append_If_Column_Matches_Pattern(String InputFile, String ColumnNameToMatch, String RegExPattern, String ColumnNameToModify, String StringToAppend, Boolean AppendAtEnd) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int CntMods = 0; int ColumnIndex = cu.Get_Column_Index(ColumnNameToMatch); int ColumnIndexToModify = cu.Get_Column_Index(ColumnNameToModify); if (ColumnIndex < 0 || ColumnIndexToModify < 0) { return(-1); } foreach (KeyValuePair <int, List <String> > entry in cu.dict) { //Console.WriteLine("Debug Column Index | Name: " + ColumnIndex + ":" + ColumnName); String MyLine = cu.Get_Line_Content(entry.Key); String CellValueToCheck = entry.Value[ColumnIndex]; var pattern = @RegExPattern; var matches = Regex.Matches(CellValueToCheck, pattern); if (entry.Key > 0 && matches.Count != 0) // not first line AND match { String OriginalValueOfCell = entry.Value[ColumnIndexToModify]; String NewValueCell = ""; if (OriginalValueOfCell.StartsWith("\"")) { NewValueCell = cu.ReplaceFirst(OriginalValueOfCell, "\"", "\"" + StringToAppend); } else { NewValueCell = StringToAppend + OriginalValueOfCell; } entry.Value[ColumnIndexToModify] = NewValueCell; CntMods++; } } cu.Save_File_As_CSV(InputFile); return(CntMods); }
// Add a new Column after an existing one private void Add_Column_After(CsvUtils cu, String ColumnName, String ColumnNameToAdd, String EmptyCellFiller) { int Index = cu.Get_Column_Index(ColumnName); if (Index > -1) // if column found { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { // do something with entry.Value or entry.Key String NewValueToInsert = ColumnNameToAdd; if (entry.Key != 0) { NewValueToInsert = EmptyCellFiller; } entry.Value.Insert(Index + 1, NewValueToInsert); } } }
// Tested - Delete an entire Line if a particular cell of a given column matches a regular expression pattern public int Delete_Column(String InputFile, String ColumnName) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); IDictionary <int, List <String> > NewDict = new Dictionary <int, List <String> >(); int ColumnIndex = cu.Get_Column_Index(ColumnName); if (ColumnIndex > -1) { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { String MyLine = cu.Get_Line_Content(entry.Key); entry.Value.RemoveAt(ColumnIndex); } cu.Save_File_As_CSV(InputFile); } return(ColumnIndex); }
// Kepp only Lines that match a regular expression public int Keep_Line_If_Cell_Matches_Pattern(String InputFile, String ColumnName, String RegExPattern) { int CntLinesKept = 0; CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); IDictionary <int, List <String> > NewDict = new Dictionary <int, List <String> >(); int ColumnIndex = cu.Get_Column_Index(ColumnName); if (ColumnIndex > -1) { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { String MyLine = cu.Get_Line_Content(entry.Key); //Console.WriteLine("Debug Line: " + MyLine); String CellValueToCheck = entry.Value[ColumnIndex]; //Console.WriteLine("Checking Value: " + CellValueToCheck + " against Regex: " + @RegExPattern); var pattern = @RegExPattern; var matches = Regex.Matches(CellValueToCheck, pattern); if (entry.Key == 0) // if First Line, then keep it regardless of match or no match { NewDict.Add(entry); } else { if (matches.Count == 0) { } else { NewDict.Add(entry); CntLinesKept++; // Console.WriteLine("Match Found, Keeping Line."); } //Console.ReadKey(); } } cu.dict = NewDict; cu.Save_File_As_CSV(InputFile); } return(CntLinesKept); }
// Transforms the content of an entire column (by replacing it with a RegEx MATCH from a regular expression) public String Transform_Column_Content(String InputFile, String ColumnName, String RegExPattern) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int colIdx = cu.Get_Column_Index(ColumnName); if (colIdx < 0) { return("Column Does Not Exist."); } int NbOfMatches = 0; foreach (KeyValuePair <int, List <String> > entry in cu.dict) { if (entry.Key > 0) // dont process the column header line { String MyContent = cu.Get_Cell_Content(ColumnName, entry.Key); String NewValue = MyContent; var pattern = @RegExPattern; var matches = Regex.Matches(MyContent, pattern); if (matches.Count > 0 && matches[0].Groups.Count > 1) { NewValue = matches[0].Groups[1].Value; NbOfMatches++; } else { } cu.Save_Cell_Value_No_Save(ColumnName, entry.Key, NewValue); } } cu.Save_File_As_CSV(InputFile); if (NbOfMatches == 0) { return("No Match Found or No Regex Group defined in Regular Expression Pattern"); } else { return("Number of Matching Cells Found and transformed: " + NbOfMatches); } }
public Boolean Rename_Column(String InputFile, String CurrentColumnName, String NewColumnName) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int idx = cu.Get_Column_Index(CurrentColumnName); if (idx > -1) { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { if (entry.Key == 0) { entry.Value[idx] = NewColumnName; } cu.Save_File_As_CSV(InputFile); return(true); } } return(false); }
// Change the content of a Cell public String Set_Cell_Content(String InputFile, String ColumnName, int LineNumber, String NewValue) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int NbOfLines = cu.Get_Number_Of_Lines(); if (LineNumber > NbOfLines) { return("Line Number exceeds the number of total lines in the input file."); } int ColIdx = cu.Get_Column_Index(ColumnName); if (ColIdx < 0) { return("Column Does Not Exist."); } cu.Set_Cell_Content(ColumnName, LineNumber, NewValue); cu.Save_File_As_CSV(InputFile); return(""); }
// Add a new Column before an existing one public int Add_Column_Before(String InputFile, String ColumnName, String ColumnNameToAdd, String EmptyCellFiller) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int Index = cu.Get_Column_Index(ColumnName); if (Index > -1) // if column found { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { // do something with entry.Value or entry.Key String NewValueToInsert = ColumnNameToAdd; if (entry.Key != 0) { NewValueToInsert = EmptyCellFiller; } entry.Value.Insert(Index, NewValueToInsert); } cu.Save_File_As_CSV(InputFile); } return(Index); }
// Split Cell Value into other column (ex: on line 3 take the content of Column "Currency" ("Currency (USD)") and copy the regex group 1 to column "Clean_Currency") using Regex expression:" Currency \(([A-Z]{3})\)" public String Copy_Cell_Content_To_Other_Column(String InputFile, String OriginColumn, int LineNumber, String RegexPatternGroupToCopy, String TargetColumn) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); String CellContent = cu.Get_Cell_Content(OriginColumn, LineNumber); if (CellContent != null) { var pattern = RegexPatternGroupToCopy; var matches = Regex.Matches(CellContent, pattern); if (matches.Count > 0 && matches[0].Groups.Count > 1) { int idx = cu.Get_Column_Index(TargetColumn); if (idx > -1) // If column exists.. { String ValueToCopy = matches[0].Groups[1].Value; // Console.WriteLine("DEBUG: Value Extracted: " + ValueToCopy); cu.Set_Cell_Content(TargetColumn, LineNumber, ValueToCopy); cu.Save_File_As_CSV(InputFile); } else { return(""); } } else { return("No Match Found or No Regex Group defined in Regular Expression Pattern"); } return(""); } else { return("Cell Content is Null"); } }
// Change the value of multiple cells in a column based on Range (Ex: set "USD" in column currency from line 3 to 15) public Boolean Save_Cell_Value_On_Range(String InputFile, String ColumnName, int LineNumberStart, int LineNumberEnd, String NewValue) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int ColumnIndex = cu.Get_Column_Index(ColumnName); if (ColumnIndex > -1 && LineNumberStart < LineNumberEnd) { foreach (KeyValuePair <int, List <String> > entry in cu.dict) { if (entry.Key >= LineNumberStart && entry.Key <= LineNumberEnd) { entry.Value[ColumnIndex] = NewValue; } } cu.Save_File_As_CSV(InputFile); } else { return(false); } return(true); }
/* * // Transforms the content of an entire column (by replacing it with a RegEx MATCH from a regular expression) * public String Split_Column_Content_based_on_groups(String InputFile, String ColumnNameToRead, String RegExPattern, String InsertAfterColumnName, String ColumnNameStub) * { * * //Check the number of matches for each Row and retrieve the Max number(N) * //Create N columns after “Insert After Column” named “Col_1”, “Col_2”, etc. * //For each row, split it into the proper number of elements * * CsvUtils cu = new CsvUtils(); * cu.SetFile(InputFile); * int colIdx = cu.Get_Column_Index(ColumnNameToRead); * if (colIdx < 0) { return "Column Does Not Exist."; } * int colIdxInsert = cu.Get_Column_Index(InsertAfterColumnName); * if (colIdxInsert < 0) { return "Column Does Not Exist:" + InsertAfterColumnName; } * int MaxNumberOfMatches = 0; * * foreach (KeyValuePair<int, List<String>> entry in cu.dict) * { * if (entry.Key > 0) // dont process the column header line * { * String MyContent = cu.Get_Cell_Content(ColumnNameToRead, entry.Key); * String NewValue = MyContent; * var pattern = @RegExPattern; * var matches = Regex.Matches(MyContent, pattern); * int NumOfMatches = matches.Count; * if (NumOfMatches > MaxNumberOfMatches) { MaxNumberOfMatches = NumOfMatches; } * } * } * //Console.WriteLine("Debug: Max Number of Matches" + MaxNumberOfMatches); * * for(int i = 0;i< MaxNumberOfMatches; i++) * { * int tempIdx = MaxNumberOfMatches - i; * //icolIdx = MaxNumberOfMatches - i; * Add_Column_After(cu, InsertAfterColumnName, ColumnNameStub + tempIdx, ""); * } * * foreach (KeyValuePair<int, List<String>> entry in cu.dict) * { * if (entry.Key > 0) // dont process the column header line * { * String MyContent = cu.Get_Cell_Content(ColumnNameToRead, entry.Key); * * var matches = Regex.Matches(MyContent, RegExPattern); * int NumOfMatches = matches.Count; * int idxMatch = 0; * foreach (Match match in matches) * { * idxMatch++; * // Console.WriteLine("Match Number:" + idxMatch + ":" + match.Value); * String CurrentCellValue = match.Value; * // Console.WriteLine("Debug: " + CurrentCellValue); * if (CurrentCellValue.Contains(',')) * { * //Console.WriteLine("Quote detected"); * entry.Value[colIdxInsert + idxMatch] = "\"" + CurrentCellValue + "\""; * } * else * { * entry.Value[colIdxInsert + idxMatch] = match.Value; * } * * } * } * } * * * cu.Save_File_As_CSV(InputFile); * return ""; * * } */ // Takes a row and duplicates it while extracting part of a given column (Regex Group Matches) public String Split_Column_Content_into_rows_based_on_matches(String InputFile, String ColumnNameToRead, String RegExPattern, int RowNumber) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); // index below is line number (without counting header) int colIdx = cu.Get_Column_Index(ColumnNameToRead); if (colIdx < 0) { return("Column Does Not Exist."); } //int MaxNumberOfMatches = 0; List <String> MyRowContent = cu.dict[RowNumber]; String MyContent = MyRowContent[colIdx]; var pattern = @RegExPattern; var matches = Regex.Matches(MyContent, pattern); int NumOfMatches = matches.Count; List <List <String> > AllNewRows = new List <List <String> >(); foreach (Match match in matches) { List <String> RowCopy = new List <String>(MyRowContent); String MatchValue = match.Value; RowCopy[colIdx] = MatchValue; AllNewRows.Add(RowCopy); } Add_Rows_After2(cu, RowNumber, AllNewRows); cu.Save_File_As_CSV(InputFile); return(""); }
// the following method is fairly complex and undocumented.. TBD public string Get_Output_As_Json(String InputFile, String TitleForSingleValues, String ListOfColumsOfStandardFields, String TitleForItemValues, String ListOfColumnsOfItemizedFields) { CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); // Get the list of columns that contain standard fields String[] ColumsOfStandardFields = ListOfColumsOfStandardFields.Split(','); // Get the list of columns that contain itemized fields String[] ColumnsOfItemizedFields = ListOfColumnsOfItemizedFields.Split(','); // This List will contain all "Key|Value" pairs retrieved from columns with standard fields: ex:[ {invoice_Date:1/2/2018} , {invoice_number:1234} ] List <KeyValuePair> AllKeyValuePairsStdFields = new List <KeyValuePair>(); // this list will contain all lists of "Key|Value" pairs retrieved form columns with itemized fields[ [{Item Number:1},{Item Total:122}],[{Item Number:2},{Item Total:144}] ] List <KeyValuePairArray> AllKeyValuePairsItemFields = new List <KeyValuePairArray>(); foreach (String ColName in ColumsOfStandardFields) { int idx = cu.Get_Column_Index(ColName); if (idx < 0) { return(GetErrorJson("Standard Column Not Found: " + ColName)); } List <String> AllVals = cu.GetAllValuesFromColumn(ColName); List <String> distinct = AllVals.Distinct().ToList(); int nbOfDisctValues = distinct.Count(); int index = 0; if (nbOfDisctValues > 1) { index = 1; } KeyValuePair kvp = new KeyValuePair(ColName, distinct[index]); AllKeyValuePairsStdFields.Add(kvp); //Console.Write("Value Std:" + distinct[1]); } List <int> ColumnsToExtract = new List <int>(); Console.Write("Debug:" + ColumnsOfItemizedFields.Count()); if (ColumnsOfItemizedFields.Count() > 0 && ColumnsOfItemizedFields[0] != "") { foreach (String ColName in ColumnsOfItemizedFields) { int idx = cu.Get_Column_Index(ColName); if (idx < 0) { return(GetErrorJson("Itemized Column Not Found: " + ColName)); } int cIdx = cu.Get_Column_Index(ColName); ColumnsToExtract.Add(cIdx); } foreach (KeyValuePair <int, List <String> > entry in cu.dict) { // List<String> OneSetofValues = new List<String>(); if (entry.Key > 0) { KeyValuePairArray kvpa = new KeyValuePairArray(); int idx = 0; foreach (String s in entry.Value) { //var result0 = String.Join(",", ColumnsToExtract.ToArray()); //Console.Write("Debug:" + entry.Value.IndexOf(s) + "|"+ result0); String ColumnHeader = cu.Get_Column_name(idx); bool isInList = ColumnsToExtract.IndexOf(idx) != -1; if (isInList) { KeyValuePair kvp = new KeyValuePair(ColumnHeader, s); kvpa.AddElement(kvp); // OneSetofValues.Add(s); } idx++; } AllKeyValuePairsItemFields.Add(kvpa); } } } String dataString = "{" + "\"" + TitleForSingleValues + "\":{"; foreach (KeyValuePair kvp in AllKeyValuePairsStdFields) { dataString = dataString + "\"" + kvp.KeyName + "\":\"" + kvp.Value + "\","; } dataString = dataString.TrimEnd(','); dataString = dataString + "}"; if (AllKeyValuePairsItemFields.Count() > 0) { dataString = dataString + "," + "\"" + TitleForItemValues + "\":["; foreach (KeyValuePairArray kvpa in AllKeyValuePairsItemFields) { dataString = dataString + "{"; foreach (KeyValuePair kvp in kvpa.listOfItems) { dataString = dataString + "\"" + kvp.KeyName + "\":" + "\"" + kvp.Value + "\","; } dataString = dataString.TrimEnd(','); dataString = dataString + "},"; } dataString = dataString.TrimEnd(','); dataString = dataString + "]"; } // Final Brace dataString = dataString + "}"; return(dataString); }
// Transforms the content of an entire column (by replacing it with a RegEx MATCH from a regular expression) public String Split_Column_Content_based_on_matches(String InputFile, String ColumnNameToRead, String RegExPattern, String InsertAfterColumnName, String ColumnNameStub) { //Check the number of matches for each Row and retrieve the Max number(N) //Create N columns after “Insert After Column” named “Col_1”, “Col_2”, etc. //For each row, split it into the proper number of elements CsvUtils cu = new CsvUtils(); cu.SetFile(InputFile); int colIdx = cu.Get_Column_Index(ColumnNameToRead); if (colIdx < 0) { return("Column Does Not Exist: " + ColumnNameToRead); } int colIdxInsert = cu.Get_Column_Index(InsertAfterColumnName); if (colIdxInsert < 0) { return("Column Does Not Exist:" + InsertAfterColumnName); } int MaxNumberOfMatches = 0; foreach (KeyValuePair <int, List <String> > entry in cu.dict) { if (entry.Key > 0) // dont process the column header line { String MyContent = cu.Get_Cell_Content(ColumnNameToRead, entry.Key); String NewValue = MyContent; var pattern = @RegExPattern; //Console.WriteLine("Debug: Content:" + MyContent); var matches = Regex.Matches(MyContent, pattern); int NumOfMatches = matches.Count; if (NumOfMatches > MaxNumberOfMatches) { MaxNumberOfMatches = NumOfMatches; } } } //Console.WriteLine("Debug: Max Number of Matches: " + MaxNumberOfMatches); for (int i = 0; i < MaxNumberOfMatches; i++) { int tempIdx = MaxNumberOfMatches - i; //icolIdx = MaxNumberOfMatches - i; Add_Column_After(cu, InsertAfterColumnName, ColumnNameStub + tempIdx, ""); } foreach (KeyValuePair <int, List <String> > entry in cu.dict) { if (entry.Key > 0) // dont process the column header line { String MyContent = cu.Get_Cell_Content(ColumnNameToRead, entry.Key); var matches = Regex.Matches(MyContent, RegExPattern); //int NumOfMatches = matches.Count; int idxGrp = 0; foreach (Match match in matches) { idxGrp++; if (idxGrp > 0) { //Console.WriteLine("Group Number:" + idxGrp + ":" + group.Value); String CurrentCellValue = match.Groups[1].Value; //Console.WriteLine("Debug: " + CurrentCellValue); if (CurrentCellValue.Contains(',')) { // Console.WriteLine("Quote detected"); entry.Value[colIdxInsert + idxGrp] = "\"" + CurrentCellValue + "\""; } else { entry.Value[colIdxInsert + idxGrp] = match.Groups[1].Value; } } } } } cu.Save_File_As_CSV(InputFile); return(""); }