public bool Parse(List <string> lst, BreastCancerSampleItem item, bool defaultReturnValue) { //First of all, using key to find value foreach (var s in Keys) { var l = lst.Find(m => m.StartsWith(s)); if (l != null) { SetValue(item, l.Substring(s.Length + 1).Trim()); return(true); } } //If there is line matching regex, using regex to assign value. if (ValueRegex != null) { foreach (var reg in ValueRegex) { var mLine = lst.Find(m => reg.Match(m).Success); if (mLine != null) { var m = reg.Match(mLine); SetValue(item, m.Groups[1].Value); return(true); } } } //Finally, find question and answer for (int j = 0; j < lst.Count; j++) { if (lst[j].StartsWith("Pathological Question:") && GetAnswer(lst[j]).Equals(Question)) { SetValue(item, GetAnswer(lst[j + 1])); return(true); } } SetValue(item, DefaultValue); return(defaultReturnValue); }
public void ParseDataset(string datasetDirectory, Dictionary <string, BreastCancerSampleItem> sampleMap) { var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var dirname = Path.GetFileName(datasetDirectory); var map = new RawSampleInfoReader().ReadDescriptionFromDirectory(datasetDirectory); foreach (var key in map.Keys) { if (files.ContainsKey(key.ToLower())) { if (!sampleMap.ContainsKey(key)) { sampleMap[key] = new BreastCancerSampleItem(dirname, key); } var sample = sampleMap[key]; var qsMap = map[key]; //parse information foreach (var question in qsMap.Keys) { if (converters.ContainsKey(question)) { var converter = converters[question]; var answer = qsMap[question].First(); converter.SetProperty(sample, answer); } } //set defaultvalue foreach (var dfConverter in defaultConverters) { dfConverter.Key.SetProperty(sample, dfConverter.Value); } } } }
public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap) { var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var dirname = Path.GetFileName(datasetDirectory); var map = new RawSampleInfoReader().ReadDescriptionFromDirectory(datasetDirectory); foreach (var key in map.Keys) { if (files.ContainsKey(key.ToLower())) { if (!sampleMap.ContainsKey(key)) { sampleMap[key] = new BreastCancerSampleItem(dirname, key); } var sample = sampleMap[key]; var qsMap = map[key]; //parse information foreach (var question in qsMap.Keys) { if (converters.ContainsKey(question)) { var converter = converters[question]; var answer = qsMap[question].First(); converter.SetProperty(sample, answer); } } //set defaultvalue foreach (var dfConverter in defaultConverters) { dfConverter.Key.SetProperty(sample, dfConverter.Value); } } } }
public void ParseDataset(string datasetDirectory, Dictionary <string, BreastCancerSampleItem> sampleMap) { var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var dirname = Path.GetFileName(datasetDirectory); //The status of ER, PR is on the sample title var samples = new GseSeriesMatrixReader().ReadFromDirectory(datasetDirectory); foreach (var a in samples) { var filename = a.Key.ToLower(); if (files.ContainsKey(filename.ToLower())) { var title = a.Value[GsmConsts.SampleTitle]; var m = r.Match(title.First()); var er = m.Groups[1].Value.Equals("p") ? "pos" : "neg"; var pr = m.Groups[2].Value.Equals("p") ? "pos" : "neg"; var ts = m.Groups[3].Value; var n = m.Groups[4].Value; var grade = m.Groups[5].Value; var key = filename.ToUpper(); if (!sampleMap.ContainsKey(key)) { sampleMap[key] = new BreastCancerSampleItem(dirname, filename.ToUpper()); } BreastCancerSampleItem item = sampleMap[key]; item.ER = er; item.PR = pr; item.TumorStatus = ts; item.Grade = grade; } } }
public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap) { var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var dirname = Path.GetFileName(datasetDirectory); //The status of ER, PR is on the sample title var samples = new GseSeriesMatrixReader().ReadFromDirectory(datasetDirectory); foreach (var a in samples) { var filename = a.Key.ToLower(); if (files.ContainsKey(filename.ToLower())) { var title = a.Value[GsmConsts.SampleTitle]; var m = r.Match(title.First()); var er = m.Groups[1].Value.Equals("p") ? "pos" : "neg"; var pr = m.Groups[2].Value.Equals("p") ? "pos" : "neg"; var ts = m.Groups[3].Value; var n = m.Groups[4].Value; var grade = m.Groups[5].Value; var key = filename.ToUpper(); if (!sampleMap.ContainsKey(key)) { sampleMap[key] = new BreastCancerSampleItem(dirname, filename.ToUpper()); } BreastCancerSampleItem item = sampleMap[key]; item.ER = er; item.PR = pr; item.TumorStatus = ts; item.Grade = grade; } } }
public void ParseDataset(string datasetDirectory, Dictionary <string, BreastCancerSampleItem> sampleMap) { var files = new HashSet <string>(from f in CelFile.GetCelFiles(datasetDirectory, false) select Path.GetFileNameWithoutExtension(f)); var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt"); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); var dataset = Path.GetFileName(datasetDirectory); foreach (var a in ann) { var filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample)); if (files.Contains(filename)) { if (!sampleMap.ContainsKey(filename)) { sampleMap[filename] = new BreastCancerSampleItem(); sampleMap[filename].Dataset = dataset; sampleMap[filename].Sample = filename; } var item = sampleMap[filename]; string value; if (FindValue(a, ColumnName.Age, out value)) { item.Age = value; } if (FindValue(a, ColumnName.ER, out value)) { item.ER = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.PR, out value)) { item.PR = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.HER2, out value)) { item.HER2 = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.Stage, out value)) { item.Stage = value; } if (FindValue(a, ColumnName.TumorStage, out value)) { item.TumorStatus = value; } if (FindValue(a, ColumnName.Grade, out value)) { item.Grade = value; } if (FindValue(a, ColumnName.NodalStatus, out value)) { item.NodalStatus = value; } if (FindValue(a, ColumnName.PCR, out value)) { item.PCR = value; } if (FindValue(a, ColumnName.DFS, out value)) { item.DFS = value; } if (FindValue(a, ColumnName.DFSTime, out value)) { item.DFSTime = value; } if (FindValue(a, ColumnName.RFS, out value)) { item.RFS = value; } if (FindValue(a, ColumnName.RFSTime, out value)) { item.RFSTime = value; } if (FindValue(a, ColumnName.DMFS, out value)) { item.DMFS = value; } if (FindValue(a, ColumnName.DMFSTime, out value)) { item.DMFSTime = value; } if (FindValue(a, ColumnName.OverallServive, out value)) { item.OverallSurvival = value; } if (FindValue(a, ColumnName.DeadOfDisease, out value)) { item.DeadOfDisease = value; } } } }
public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap) { var files = new HashSet<string>(from f in CelFile.GetCelFiles(datasetDirectory, false) select Path.GetFileNameWithoutExtension(f)); var sdrfFile = Directory.GetFiles(datasetDirectory, "*.sdrf.txt"); if (sdrfFile.Length == 0) { throw new ArgumentException("Cannot find sdrf file in directory " + datasetDirectory); } var ann = new AnnotationFormat("^#").ReadFromFile(sdrfFile[0]); var dataset = Path.GetFileName(datasetDirectory); foreach (var a in ann) { var filename = Path.GetFileNameWithoutExtension(FindValue(a, ColumnName.Sample)); if (files.Contains(filename)) { if (!sampleMap.ContainsKey(filename)) { sampleMap[filename] = new BreastCancerSampleItem(); sampleMap[filename].Dataset = dataset; sampleMap[filename].Sample = filename; } var item = sampleMap[filename]; string value; if (FindValue(a, ColumnName.Age, out value)) { item.Age = value; } if (FindValue(a, ColumnName.ER, out value)) { item.ER = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.PR, out value)) { item.PR = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.HER2, out value)) { item.HER2 = StatusValue.TransferStatus(value); } if (FindValue(a, ColumnName.Stage, out value)) { item.Stage = value; } if (FindValue(a, ColumnName.TumorStage, out value)) { item.TumorStatus = value; } if (FindValue(a, ColumnName.Grade, out value)) { item.Grade = value; } if (FindValue(a, ColumnName.NodalStatus, out value)) { item.NodalStatus = value; } if (FindValue(a, ColumnName.PCR, out value)) { item.PCR = value; } if (FindValue(a, ColumnName.DFS, out value)) { item.DFS = value; } if (FindValue(a, ColumnName.DFSTime, out value)) { item.DFSTime = value; } if (FindValue(a, ColumnName.RFS, out value)) { item.RFS = value; } if (FindValue(a, ColumnName.RFSTime, out value)) { item.RFSTime = value; } if (FindValue(a, ColumnName.DMFS, out value)) { item.DMFS = value; } if (FindValue(a, ColumnName.DMFSTime, out value)) { item.DMFSTime = value; } if (FindValue(a, ColumnName.OverallServive, out value)) { item.OverallSurvival = value; } if (FindValue(a, ColumnName.DeadOfDisease, out value)) { item.DeadOfDisease = value; } } } }