private void ReadAndStandarizeIDFile() { var distinctGroupQuery = (from q in rawDataFileList where q.selected == true select q.@group).Distinct(); var distinctHashGroupQuery = (from q in rawDataFileList where q.selected == true select q.hashGroup).Distinct(); int counter = 0; foreach (string currentDistinctGroup in distinctGroupQuery) { foreach (string currentDistinctHashGroup in distinctHashGroupQuery) { var filesInThisGroup = from q in rawDataFileList where (q.@group == currentDistinctGroup && q.hashGroup == currentDistinctHashGroup && q.selected == true) select q; DistinctList<StandarizedIDData>[] standarizedIDDataTable = new DistinctList<StandarizedIDData>[hashTableElementCount]; for (int i = 0; i < hashTableElementCount; i++) standarizedIDDataTable[i] = new DistinctList<StandarizedIDData>(); foreach (File currentFile in filesInThisGroup) { List<StringDataFormat> stringDataFormatsForCurrentFile = getStringDataFormatsWithRightYear(currentFile); List<NumberDataFormat> numberDataFormatsForCurrentFile = getNumberDataFormatsWithRightYear(currentFile); int indexID = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID"); int indexBirthday = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_BIRTHDAY"); int indexSex = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_SEX"); int indexInDate = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_IN_DATE"); int indexOutDate = stringDataFormatsForCurrentFile.FindIndex(x => x.key == "ID_OUT_DATE"); using (var sr = new StreamReader(currentFile.path, Encoding.Default)) { while (!sr.EndOfStream) { var dataRow = ReadRow(sr, stringDataFormatsForCurrentFile, numberDataFormatsForCurrentFile); if (!isMatchBirthYearRange(dataRow.stringData[indexBirthday])) continue; var newIDData = new StandarizedIDData() { ID = dataRow.stringData[indexID], Birthday = dataRow.stringData[indexBirthday], isMale = dataRow.stringData[indexSex] == "M", firstInDate = dataRow.stringData[indexInDate].StringToDate(), lastInDate = dataRow.stringData[indexInDate].StringToDate(), firstOutDate = dataRow.stringData[indexOutDate].StringToDate(), lastOutDate = dataRow.stringData[indexOutDate].StringToDate() }; uint hash = getIDHash(newIDData); int index = standarizedIDDataTable[hash].AddDistinct(newIDData); if (index >= 0) { standarizedIDDataTable[hash][index].isMale = newIDData.isMale; standarizedIDDataTable[hash][index].firstInDate = newIDData.firstInDate; standarizedIDDataTable[hash][index].firstOutDate = newIDData.firstOutDate; standarizedIDDataTable[hash][index].lastInDate = newIDData.lastInDate; standarizedIDDataTable[hash][index].lastOutDate = newIDData.lastOutDate; } else { counter++; } } } } var outputFilePath = getOutputFilePath(currentDistinctGroup, currentDistinctHashGroup); using (var sw = new StreamWriter(outputFilePath, false, System.Text.Encoding.Default)) { sw.WriteLine(StandarizedIDData.ToTitle()); foreach (var thisTable in standarizedIDDataTable) foreach (var thisstandarizedIDData in thisTable) { sw.WriteLine(thisstandarizedIDData.ToWriteLine()); } } } } }
private uint getIDHash(StandarizedIDData inputIDData) //使用第二組四位數來作為hash (第一組留給split group by hash用) { return uint.Parse(inputIDData.ID.Substring(4, 4), System.Globalization.NumberStyles.HexNumber); }