Пример #1
0
        private void ReadAndStandarizeIDFile()
        {
            var distinctGroupQuery = (from q in rawDataFileList where q.selected == true select q.@group).Distinct();
            var distinctHashGroupQuery = (from q in rawDataFileList where q.selected == true select q.hashGroup).Distinct();
            int counter = 0;
            foreach (string currentDistinctGroup in distinctGroupQuery)
            {
                foreach (string currentDistinctHashGroup in distinctHashGroupQuery)
                {

                    var filesInThisGroup = from q in rawDataFileList
                                           where (q.@group == currentDistinctGroup
                                           && q.hashGroup == currentDistinctHashGroup
                                           && q.selected == true)
                                           select q;
                    DistinctList<StandarizedIDData>[] standarizedIDDataTable = new DistinctList<StandarizedIDData>[hashTableElementCount];
                    for (int i = 0; i < hashTableElementCount; i++) standarizedIDDataTable[i] = new DistinctList<StandarizedIDData>();
                    foreach (File currentFile in filesInThisGroup)
                    {
                        List<StringDataFormat> stringDataFormatsForCurrentFile = getStringDataFormatsWithRightYear(currentFile);
                        List<NumberDataFormat> numberDataFormatsForCurrentFile = getNumberDataFormatsWithRightYear(currentFile);
                        int indexID = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID");
                        int indexBirthday = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_BIRTHDAY");
                        int indexSex = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_SEX");
                        int indexInDate = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_IN_DATE");
                        int indexOutDate = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_OUT_DATE");

                        using (var sr = new StreamReader(currentFile.path, Encoding.Default))
                        {
                            while (!sr.EndOfStream)
                            {
                                var dataRow = ReadRow(sr, stringDataFormatsForCurrentFile, numberDataFormatsForCurrentFile);

                                if (!isMatchBirthYearRange(dataRow.stringData[indexBirthday]))
                                    continue;

                                var newIDData = new StandarizedIDData()
                                {
                                    ID = dataRow.stringData[indexID],
                                    Birthday = dataRow.stringData[indexBirthday],
                                    isMale = dataRow.stringData[indexSex] == "M",
                                    firstInDate = dataRow.stringData[indexInDate].StringToDate(),
                                    lastInDate = dataRow.stringData[indexInDate].StringToDate(),
                                    firstOutDate = dataRow.stringData[indexOutDate].StringToDate(),
                                    lastOutDate = dataRow.stringData[indexOutDate].StringToDate()
                                };
                                uint hash = getIDHash(newIDData);
                                int index = standarizedIDDataTable[hash].AddDistinct(newIDData);
                                if (index >= 0)
                                {
                                    standarizedIDDataTable[hash][index].isMale = newIDData.isMale;
                                    standarizedIDDataTable[hash][index].firstInDate = newIDData.firstInDate;
                                    standarizedIDDataTable[hash][index].firstOutDate = newIDData.firstOutDate;
                                    standarizedIDDataTable[hash][index].lastInDate = newIDData.lastInDate;
                                    standarizedIDDataTable[hash][index].lastOutDate = newIDData.lastOutDate;
                                }
                                else
                                {
                                    counter++;
                                }
                            }
                        }
                    }
                    var outputFilePath = getOutputFilePath(currentDistinctGroup, currentDistinctHashGroup);
                    using (var sw = new StreamWriter(outputFilePath, false, System.Text.Encoding.Default))
                    {
                        sw.WriteLine(StandarizedIDData.ToTitle());
                        foreach (var thisTable in standarizedIDDataTable)
                            foreach (var thisstandarizedIDData in thisTable)
                            {
                                sw.WriteLine(thisstandarizedIDData.ToWriteLine());
                            }
                    }
                }
            }
        }
Пример #2
0
 private uint getIDHash(StandarizedIDData inputIDData) //使用第二組四位數來作為hash (第一組留給split group by hash用)
 {
     return uint.Parse(inputIDData.ID.Substring(4, 4), System.Globalization.NumberStyles.HexNumber);
 }