public Dictionary <string, SampleItem> ParseDataset(string datasetDirectory) { var result = new Dictionary <string, SampleItem>(); var columns = (from m in maps where !string.IsNullOrEmpty(m.PropertyName) select m).ToDictionary(m => m.AnnotationName); var dvs = (from m in maps.DefaultValues where !string.IsNullOrEmpty(m.Value) select m).ToList(); var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var datasetName = Path.GetFileName(datasetDirectory); var dataMap = new RawSampleInfoReader().ReadDescriptionFromDirectory(datasetDirectory); foreach (var sampleName in dataMap.Keys) { string filename; if (files.TryGetValue(sampleName.ToUpper(), out filename)) { var sample = new SampleItem() { Dataset = datasetName, Sample = sampleName, SampleFile = filename }; result[sampleName] = sample; var qsMap = dataMap[sampleName]; foreach (var column in columns) { List <string> values; if (qsMap.TryGetValue(column.Key, out values)) { sample.Annotations[column.Value.PropertyName] = values.FirstOrDefault(); } } foreach (var dv in dvs) { if (!sample.Annotations.ContainsKey(dv.PropertyName)) { sample.Annotations[dv.PropertyName] = dv.Value; } } } } return(result); }
public override IEnumerable <string> Process() { var sdata = new RawSampleInfoReader().ReadDescriptionFromDirectory(options.InputDirectory); var data = sdata.ToList().ToDictionary(m => m.Key.ToUpper(), m => m.Value); var files = GeoUtils.GetGsmNameFileMap(options.InputDirectory); var samples = (from k in files.Keys select k.ToUpper()).OrderBy(m => m).ToList(); var columns = (from d in data.Values from col in d.Keys select col).Distinct().OrderBy(m => m).ToList(); bool bError = false; var errorFile = options.OutputFile + ".error"; using (var sw = new StreamWriter(options.OutputFile)) using (var swErr = new StreamWriter(errorFile)) { sw.WriteLine("Sample\t{0}", columns.Merge("\t")); foreach (var sample in samples) { if (!data.ContainsKey(sample)) { var error = string.Format("Cannot find {0} in {1}", sample, options.InputDirectory); swErr.WriteLine(error); Progress.SetMessage(error); bError = true; continue; } var dic = data[sample]; sw.Write(sample); foreach (var column in columns) { if (dic.ContainsKey(column)) { sw.Write("\t{0}", dic[column].Merge(" ! ")); } else { sw.Write("\t"); } } sw.WriteLine(); } } if (!bError) { File.Delete(errorFile); } return(new string[] { options.OutputFile }); }
public Dictionary<string, SampleItem> ParseDataset(string datasetDirectory) { var result = new Dictionary<string, SampleItem>(); var columns = (from m in maps where !string.IsNullOrEmpty(m.PropertyName) select m).ToDictionary(m => m.AnnotationName); var dvs = (from m in maps.DefaultValues where !string.IsNullOrEmpty(m.Value) select m).ToList(); var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var datasetName = Path.GetFileName(datasetDirectory); var dataMap = new RawSampleInfoReader().ReadDescriptionFromDirectory(datasetDirectory); foreach (var sampleName in dataMap.Keys) { string filename; if (files.TryGetValue(sampleName.ToUpper(), out filename)) { var sample = new SampleItem() { Dataset = datasetName, Sample = sampleName, SampleFile = filename }; result[sampleName] = sample; var qsMap = dataMap[sampleName]; foreach (var column in columns) { List<string> values; if (qsMap.TryGetValue(column.Key, out values)) { sample.Annotations[column.Value.PropertyName] = values.FirstOrDefault(); } } foreach (var dv in dvs) { if (!sample.Annotations.ContainsKey(dv.PropertyName)) { sample.Annotations[dv.PropertyName] = dv.Value; } } } } return result; }
public override IEnumerable<string> Process() { var sdata = new RawSampleInfoReader().ReadDescriptionFromDirectory(options.InputDirectory); var data = sdata.ToList().ToDictionary(m => m.Key.ToUpper(), m => m.Value); var files = GeoUtils.GetGsmNameFileMap(options.InputDirectory); var samples = (from k in files.Keys select k.ToUpper()).OrderBy(m => m).ToList(); var columns = (from d in data.Values from col in d.Keys select col).Distinct().OrderBy(m => m).ToList(); bool bError = false; var errorFile = options.OutputFile + ".error"; using (var sw = new StreamWriter(options.OutputFile)) using (var swErr = new StreamWriter(errorFile)) { sw.WriteLine("Sample\t{0}", columns.Merge("\t")); foreach (var sample in samples) { if (!data.ContainsKey(sample)){ var error = string.Format("Cannot find {0} in {1}", sample, options.InputDirectory); swErr.WriteLine(error); Progress.SetMessage(error); bError = true; continue; } var dic = data[sample]; sw.Write(sample); foreach (var column in columns) { if (dic.ContainsKey(column)) { sw.Write("\t{0}", dic[column].Merge(" ! ")); } else { sw.Write("\t"); } } sw.WriteLine(); } } if (!bError) { File.Delete(errorFile); } return new string[] { options.OutputFile }; }
public void ParseDataset(string datasetDirectory, Dictionary<string, BreastCancerSampleItem> sampleMap) { var files = GeoUtils.GetGsmNameFileMap(datasetDirectory); var dirname = Path.GetFileName(datasetDirectory); var map = new RawSampleInfoReader().ReadDescriptionFromDirectory(datasetDirectory); foreach (var key in map.Keys) { if (files.ContainsKey(key.ToLower())) { if (!sampleMap.ContainsKey(key)) { sampleMap[key] = new BreastCancerSampleItem(dirname, key); } var sample = sampleMap[key]; var qsMap = map[key]; //parse information foreach (var question in qsMap.Keys) { if (converters.ContainsKey(question)) { var converter = converters[question]; var answer = qsMap[question].First(); converter.SetProperty(sample, answer); } } //set defaultvalue foreach (var dfConverter in defaultConverters) { dfConverter.Key.SetProperty(sample, dfConverter.Value); } } } }
public void NewFromData(string subdir) { var siformat = Directory.GetFiles(subdir, "*.siformat"); TextFileDefinition prefile = new TextFileDefinition(); if (siformat.Length > 0) { prefile.ReadFromFile(siformat[0]); bool bFound = false; prefile.ForEach(m => { if (m.PropertyName.Equals("TumorStage")) { m.PropertyName = "TumorStatus"; bFound = true; } if (m.PropertyName.Equals("Metastasis")) { m.PropertyName = "MetastasisStatus"; bFound = true; } }); if (bFound) { prefile.WriteToFile(siformat[0]); } } var map = new RawSampleInfoReader().ReadDescriptionFromDirectory(subdir); lastDirectory = subdir; lastFile = String.Empty; var files = new HashSet<string>(from f in CelFile.GetCelFiles(subdir, false) select GeoUtils.GetGsmName(f)); Dictionary<string, HashSet<string>> headers = new Dictionary<string, HashSet<string>>(); foreach (var m in map) { var gsm = m.Key.ToLower(); if (!files.Contains(gsm)) { continue; } var curmap = m.Value; foreach (var entry in curmap) { if (!headers.ContainsKey(entry.Key)) { headers[entry.Key] = new HashSet<string>(); } headers[entry.Key].UnionWith(entry.Value); } } ClearDataSource(); items.Clear(); foreach (var part in headers) { items.Add(new FileDefinitionItem() { AnnotationName = part.Key, Example = (from v in part.Value orderby v select v).Merge(";") }); } foreach (var olditem in prefile) { if (!string.IsNullOrEmpty(olditem.PropertyName)) { var newitem = items.Find(m => m.AnnotationName.Equals(olditem.AnnotationName)); if (newitem != null) { newitem.PropertyName = olditem.PropertyName; } } } items.DefaultValues.Clear(); foreach (var olddv in prefile.DefaultValues) { if (propertyNames.Contains(olddv.PropertyName)) { items.DefaultValues.Add(new DefaultValue() { PropertyName = olddv.PropertyName, Value = olddv.Value }); } } items.Sort((m1, m2) => m1.AnnotationName.CompareTo(m2.AnnotationName)); UpdateDataSource(); this.Text = title + " - " + Path.GetFileName(subdir); }
private void btnTest_Click(object sender, EventArgs e) { if (dlgOpenDirectory.ShowDialog() == System.Windows.Forms.DialogResult.OK) { FormToDefinition(); var parser = new PropertyMappingParser(items); var map = new Dictionary<string, BreastCancerSampleItem>(); parser.ParseDataset(dlgOpenDirectory.SelectedPath, map); var lst = (from v in map.Values orderby v.Sample select v).ToList(); var form = new BreastCancerSampleInformationForm(); var reader = new RawSampleInfoReader(); form.SetRawInfoReader(reader, Path.GetFileNameWithoutExtension(dlgOpenDirectory.SelectedPath)); form.SetDataSource(lst); form.ShowDialog(); } }
public void NewFromData(string subdir) { try { var siformat = Directory.GetFiles(subdir, "*.siformat"); TextFileDefinition prefile = new TextFileDefinition(); if (siformat.Length > 0) { prefile.ReadFromFile(siformat[0]); } var map = new RawSampleInfoReader().ReadDescriptionFromDirectory(subdir); LastDirectory = subdir; lastFile = String.Empty; var files = new HashSet<string>(from f in CelFile.GetCelFiles(subdir, false) select GeoUtils.GetGsmName(f)); Dictionary<string, HashSet<string>> headers = new Dictionary<string, HashSet<string>>(); foreach (var m in map) { var gsm = m.Key.ToUpper(); if (!files.Contains(gsm)) { continue; } var curmap = m.Value; foreach (var entry in curmap) { if (!headers.ContainsKey(entry.Key)) { headers[entry.Key] = new HashSet<string>(); } headers[entry.Key].UnionWith(entry.Value); } } ClearDataSource(); items.Clear(); foreach (var part in headers) { items.Add(new FileDefinitionItem() { AnnotationName = part.Key, Example = (from v in part.Value orderby v select v).Merge(";") }); } foreach (var olditem in prefile) { if (!string.IsNullOrEmpty(olditem.PropertyName)) { var newitem = items.Find(m => m.AnnotationName.Equals(olditem.AnnotationName)); if (newitem != null) { newitem.PropertyName = olditem.PropertyName; } } } items.DefaultValues.Clear(); items.Sort((m1, m2) => m1.AnnotationName.CompareTo(m2.AnnotationName)); UpdateDataSource(); label1.Text = "Annotation/property mapping - " + Path.GetFileName(subdir); dlgOpenDirectory.SelectedPath = subdir; dlgSaveFormatFile.FileName = Path.Combine(subdir, Path.GetFileName(subdir) + ".siformat"); } catch (Exception ex) { MessageBox.Show(this, ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } }
public void NewFromData(string subdir) { try { var siformat = Directory.GetFiles(subdir, "*.siformat"); TextFileDefinition prefile = new TextFileDefinition(); if (siformat.Length > 0) { prefile.ReadFromFile(siformat[0]); } var map = new RawSampleInfoReader().ReadDescriptionFromDirectory(subdir); LastDirectory = subdir; lastFile = String.Empty; var files = new HashSet <string>(from f in CelFile.GetCelFiles(subdir, false) select GeoUtils.GetGsmName(f)); Dictionary <string, HashSet <string> > headers = new Dictionary <string, HashSet <string> >(); foreach (var m in map) { var gsm = m.Key.ToUpper(); if (!files.Contains(gsm)) { continue; } var curmap = m.Value; foreach (var entry in curmap) { if (!headers.ContainsKey(entry.Key)) { headers[entry.Key] = new HashSet <string>(); } headers[entry.Key].UnionWith(entry.Value); } } ClearDataSource(); items.Clear(); foreach (var part in headers) { items.Add(new FileDefinitionItem() { AnnotationName = part.Key, Example = (from v in part.Value orderby v select v).Merge(";") }); } foreach (var olditem in prefile) { if (!string.IsNullOrEmpty(olditem.PropertyName)) { var newitem = items.Find(m => m.AnnotationName.Equals(olditem.AnnotationName)); if (newitem != null) { newitem.PropertyName = olditem.PropertyName; } } } items.DefaultValues.Clear(); items.Sort((m1, m2) => m1.AnnotationName.CompareTo(m2.AnnotationName)); UpdateDataSource(); label1.Text = "Annotation/property mapping - " + Path.GetFileName(subdir); dlgOpenDirectory.SelectedPath = subdir; dlgSaveFormatFile.FileName = Path.Combine(subdir, Path.GetFileName(subdir) + ".siformat"); } catch (Exception ex) { MessageBox.Show(this, ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); } }