public static void Convert()
        {
            XlsxToCsvConverter converter = new XlsxToCsvConverter();

            //Load Excel document from stream
            using (Stream stream = File.OpenRead("sample.xlsx"))
            {
                converter.Load(stream);
            }

            //Define the delimiter symbol between cells
            converter.Delimiter = ',';

            File.WriteAllText("convert.csv", converter.SaveAsString());
        }
Пример #2
0
        private List <Dictionary <string, List <string> > > ParseSubAccFromFile(string file)
        {
            var removeSpecCharRegexp = new Regex(@"[^a-z0-9 ]", RegexOptions.Compiled);

            var endOfWord     = @"\b";
            var startOfWord   = @"\b";
            var wordsToRemove = "fund,class,the".Split(',')
                                .Select(x => new Regex(startOfWord + x + endOfWord, RegexOptions.Compiled))
                                .ToList();

            var removeDuplicateCharRegexp = new Regex(@"[ ]{2,}", RegexOptions.Compiled);

            var removeValInBktRegexp = new Regex(@"\(.*\)", RegexOptions.Compiled);

            string[] tikerMarkWordsArr = { "GROWTH",    "CAPITAL", "INCOME",    "INVESTMENT", "ALLOCATION", "VALUE",      "EQUITY",    "INDEX",     "INTERNATIONAL", "TOTAL",    "SMALL",       "INSTITUTIONAL",
                                           "INFLATION", "MARKET",  "SELECT",    "RETURN",     "FINANCIAL",  "ASSET",      "CORPORATE", "FUND",      "FUNDS",         "ENHANCED", "CONVERTIBLE", "RETIREMENT",   "MODERATE",
                                           "BOND",      "SHORT",   "INVESTORS", "STOCK",      "HEALTH",     "BALANCED",   "GLOBAL",    "INSIGHTS",  "GOVERNMENT",    "EMERGING", "WORLD",       "HEALTHCARE",   "TREASURY",
                                           "INFO",      "REAL",    "RESERVES",  "MARKETS",    "ENERGY",     "TECHNOLOGY", "CASH",      "RESOURCES", "COMPANY",       "LONG",     "TERM",        "APPRECIATION", "HIGH",
                                           "LARGE",     "MID",     "Portfolio" };


            var tickerMarkWordsArrRegex = tikerMarkWordsArr
                                          .Select(x => new Regex(startOfWord + x.ToLower() + endOfWord, RegexOptions.Compiled))
                                          .ToList();


            var resultIssuers = new List <Dictionary <string, List <string> > >();
            var comboList     = new Dictionary <string, List <string> >();

            var    probabilityMass = 0;
            string detectedIssuer  = null;
            string possibleIsser   = null;

            using (var stream = new FileStream(file, FileMode.Open, FileAccess.Read))
            {
                var dt = XlsxToCsvConverter.XlsxToDataTable(stream, false, 0, 0);
                prgLine.Maximum = dt.Rows.Count;

                for (int j = 0; j < dt.Columns.Count; j++)
                {
                    for (int i = 0; i < dt.Rows.Count; i++)
                    {
                        prgLine.Increment(1);

                        Application.DoEvents();
                        var    row              = dt.Rows[i][j];
                        string rawVal           = row.ToString().Trim();
                        string val              = rawVal.ToLower();
                        var    tickerMarksInVal = tickerMarkWordsArrRegex.Where(regex => regex.IsMatch(val)).ToList();
                        val = removeValInBktRegexp.Replace(val, "");
                        val = removeSpecCharRegexp.Replace(val, "");
                        val = wordsToRemove.Aggregate(val, (current, regex) => regex.Replace(current, ""));
                        val = removeDuplicateCharRegexp.Replace(val, "").Trim();
                        if (string.IsNullOrEmpty(val))
                        {
                            continue;
                        }

                        if (val.Length <= 100)
                        {
                            if (rawVal.Length >= 4 && rawVal.Any(char.IsLetter) && rawVal.All(x => !char.IsLetter(x) || char.IsUpper(x)))
                            {
                                possibleIsser   = detectedIssuer ?? rawVal;
                                probabilityMass = probabilityMass > 1 ? probabilityMass : 1;
                            }


                            if (tickerMarksInVal.Count > 0)
                            {
                                possibleIsser   = detectedIssuer ?? possibleIsser ?? "";
                                probabilityMass = probabilityMass > 2 ? probabilityMass : 2;
                                comboList.AddIfNotExist(possibleIsser, new List <string>());
                                comboList[possibleIsser].Add(string.Format("{0},{1},{2}", i, j, val));
                                continue;
                            }

                            if (probabilityMass > 0)
                            {
                                probabilityMass--;

                                long longVal;
                                if (val.Length > 5 && !long.TryParse(val, out longVal))
                                {
                                    comboList.AddIfNotExist(detectedIssuer ?? possibleIsser, new List <string>());
                                    comboList[detectedIssuer ?? possibleIsser].Add(string.Format("{0},{1},{2}", i, j, val));
                                }

                                continue;
                            }
                        }

                        if (comboList.Count >= 2 || comboList.Values.Sum(x => x.Count) >= 5)
                        {
                            resultIssuers.Add(comboList);
                        }

                        comboList       = new Dictionary <string, List <string> >();
                        detectedIssuer  = null;
                        possibleIsser   = null;
                        probabilityMass = 0;
                    }

                    // startNewColumn
                    if (comboList.Count >= 2 || comboList.Values.Sum(x => x.Count) >= 5)
                    {
                        resultIssuers.Add(comboList);
                    }

                    comboList       = new Dictionary <string, List <string> >();
                    detectedIssuer  = null;
                    possibleIsser   = null;
                    probabilityMass = 0;
                }


                //finalize
                if (comboList.Count >= 2 || comboList.Values.Sum(x => x.Count) >= 5)
                {
                    resultIssuers.Add(comboList);
                }
            }

            using (var fileWriter = new StreamWriter(file + "_result.csv"))
            {
                resultIssuers = resultIssuers.OrderByDescending(x => x.Keys.Count).ToList();
                var allreadyAddedSubAcc = new HashSet <string>();

                foreach (var dict in resultIssuers)
                {
                    var uniqList = dict.Values.SelectMany(x => x).Where(value => allreadyAddedSubAcc.Add(value.Split(',')[2])).ToList();
                    if (uniqList.Count() < 5)
                    {
                        continue;
                    }

                    fileWriter.WriteLine("New Combo. KeyCount {0}, ValueCount {1}", dict.Keys.Count, dict.Values.Sum(x => x.Count));

                    foreach (var issuerToValListPair in dict)
                    {
                        foreach (var value in issuerToValListPair.Value)
                        {
                            fileWriter.WriteLine("{0},{1}", issuerToValListPair.Key, value);
                        }
                    }

                    fileWriter.WriteLine();
                }
            }

            return(resultIssuers);
        }