public bool ImportFile(string fileName) { IVisuMap app = GeneticAnalysis.App.ScriptApp; var fn = fileName.ToLower(); if (!( fn.EndsWith(".fastq") || fn.EndsWith(".fq") || fn.EndsWith(".fq.gz") || fn.EndsWith(".fastq.gz") )) { return(false); } FileInfo fInfo = new FileInfo(fileName); string shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.')); List <string> headList = new List <string>(); List <long> seqIdx = new List <long>(); List <long> seqLen = new List <long>(); var seqBlob = new SeqBlob(shortName); using (StreamReader trH = new StreamReader(fileName)) { StreamReader tr = null; if (fn.EndsWith(".gz")) { GZipInputStream gzStream = new GZipInputStream(trH.BaseStream); tr = new StreamReader(gzStream); } else { tr = trH; } int unknowLetters = 0; List <char> unknownList = new List <char>(); while (!tr.EndOfStream) { string line = null; try { line = tr.ReadLine(); } catch (ICSharpCode.SharpZipLib.SharpZipBaseException) { break; } if (line.StartsWith("@")) { string sHeader = line.Split(' ')[0]; sHeader = sHeader.Substring(1); headList.Add(sHeader); long sBegin = seqBlob.Length; seqIdx.Add(sBegin); line = tr.ReadLine(); foreach (char c in line) { char cc = char.ToUpper(c); int k = ACGT.IndexOf(cc); if (k >= 0) { seqBlob.AddLetter(k); } else if (!char.IsWhiteSpace(c)) { seqBlob.AddLetter(SeqBlob.UKNOWN_LETTER); // unknown letter. if (unknownList.Count < 100) { unknownList.Add(c); } unknowLetters++; } } seqLen.Add(seqBlob.Length - sBegin); // add the length // Ignore the quality lines tr.ReadLine(); tr.ReadLine(); // the quality indicators. if ((headList.Count % 1000) == 0) { app.Title = "N: " + headList.Count; Application.DoEvents(); } } } if (unknowLetters > 0) { StringBuilder sb = new StringBuilder(); foreach (char c in unknownList) { sb.Append(c); sb.Append(','); } if (unknownList.Count == 100) { sb.Append("..."); } MessageBox.Show("Converted " + unknowLetters + " unknown/meta letters to 'N': " + sb.ToString()); } } string oldPresion = app.Folder.NumberPrecision; app.Folder.NumberPrecision = "g10"; IFreeTable table = app.New.FreeTable(); table.AddColumn("SeqIdx", true); table.AddColumn("SeqLen", true); table.AddColumn("Header", false); table.ColumnSpecList[0].Name = seqBlob.Name; table.AddRows("Id", headList.Count); for (int row = 0; row < headList.Count; row++) { var rs = table.RowSpecList[row]; var R = table.Matrix[row]; R[0] = seqIdx[row].ToString(); R[1] = seqLen[row].ToString(); R[2] = headList[row]; } seqBlob.Dispose(); string dsName = table.SaveAsDataset(shortName, ""); if (dsName == null) { MessageBox.Show("Cannot import the data: " + app.LastError); return(false); } else { var newDataset = app.Folder.OpenDataset(dsName); } app.Folder.NumberPrecision = oldPresion; // ValidateTable(); return(true); }
public bool ImportFile0(string fileName) { IVisuMap app = CustomImporter.App.ScriptApp; if (!fileName.ToLower().EndsWith(".fcs")) { return(false); // Let other import handle it. } INumberTable dataTable = null; IFreeTable textTable = null; bool compensated = false; bool logScaled = false; using (StreamReader sr = new StreamReader(fileName)) { // // Read the text segment. // char[] header = new char[42]; sr.ReadBlock(header, 0, header.Length); int textBegin = int.Parse(new string(header, 10, 8)); int textEnd = int.Parse(new string(header, 18, 8)); int beginData = int.Parse(new string(header, 26, 8)); int endData = int.Parse(new string(header, 34, 8)); char[] line = new char[textEnd + 4]; sr.ReadBlock(line, header.Length, line.Length - header.Length); string textSeg = new string(line, textBegin + 1, textEnd - textBegin); char delimiter = line[textBegin]; textTable = app.New.FreeTable(); textTable.AddColumn("Value", false); string[] textFields = textSeg.Split(delimiter); textTable.AddRows("P", textFields.Length / 2); IList <IRowSpec> rowSpecList = textTable.RowSpecList; for (int row = 0; row < textTable.Rows; row++) { rowSpecList[row].Id = textFields[2 * row]; textTable.Matrix[row][0] = textFields[2 * row + 1]; } // // Read in the data segement // fcsInfo = new FcsInfo(textTable, header); if ((beginData == 0) && (textTable.IndexOfRow("$BEGINDATA") > 0)) { beginData = int.Parse(textTable.Matrix[textTable.IndexOfRow("$BEGINDATA")][0]); } if ((endData == 0) && (textTable.IndexOfRow("$ENDDATA") > 0)) { endData = int.Parse(textTable.Matrix[textTable.IndexOfRow("$ENDDATA")][0]); } dataTable = app.New.NumberTable(fcsInfo.Rows, fcsInfo.Columns); using (BinaryReader br = new BinaryReader(sr.BaseStream)) { br.BaseStream.Seek(beginData, SeekOrigin.Begin); Byte[] buf4 = new byte[4]; Byte[] buf8 = new byte[8]; int bitOffset = 0; for (int row = 0; row < fcsInfo.Rows; row++) { for (int col = 0; col < fcsInfo.Columns; col++) { ColumnInfo ci = fcsInfo.ColumnInfo[col]; Byte[] data = ReadBits(br, ci.Bits, ref bitOffset); if (data.Length < ci.Bytes) { row = fcsInfo.Rows; // enforce premature loop-end. break; } Byte[] buf = (fcsInfo.DataType == FcsInfo.DataTypes.Double) ? buf8 : buf4; Array.Clear(buf, 0, buf.Length); Array.Copy(data, 0, buf, 0, ci.Bytes); if (!fcsInfo.BigEndian) { // Intel CPU expects BigEndian format. Array.Reverse(buf, 0, ci.Bytes); } switch (fcsInfo.DataType) { case FcsInfo.DataTypes.Integer: dataTable.Matrix[row][col] = (BitConverter.ToUInt32(buf, 0) & ci.RangeMask); break; case FcsInfo.DataTypes.Float: dataTable.Matrix[row][col] = BitConverter.ToSingle(buf, 0); break; case FcsInfo.DataTypes.Double: dataTable.Matrix[row][col] = BitConverter.ToDouble(buf, 0); break; } } } } } // Post processing for (int col = 0; col < fcsInfo.Columns; col++) { IColumnSpec cs = dataTable.ColumnSpecList[col]; ColumnInfo cInfo = fcsInfo.ColumnInfo[col]; cs.Id = cInfo.ShortName; //cs.Name = cInfo.Name + ( cInfo.IsLinear ? ".Lin" : ".Log"); cs.Name = cInfo.Name; if ((cs.Id == "TIME") || (cs.Id == "TIME1")) { int timeStepIdx = textTable.IndexOfRow("$TIMESTEP"); if (timeStepIdx >= 0) { double timeStep = double.Parse(textTable.Matrix[timeStepIdx][0]); for (int row = 0; row < fcsInfo.Rows; row++) { dataTable.Matrix[row][col] *= timeStep; } } } } IList <IRowSpec> rSpecList = dataTable.RowSpecList; for (int row = 0; row < fcsInfo.Rows; row++) { rSpecList[row].Id = row.ToString(); } FileInfo fInfo = new FileInfo(fileName); string shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.')); if (saveMetaInfo >= 1) { SaveParameterTable(textTable, shortName); if (saveMetaInfo >= 2) { textTable.SaveAsDataset(shortName + " (Text Seg)", "Text segement of data table " + shortName); } } if (autoCompensation) { try { string sMatrix = null; for (int i = 0; i < textTable.Rows; i++) { string id = textTable.RowSpecList[i].Id.ToLower(); if (id.StartsWith("$")) { id = id.Substring(1); } if ((id == "spill") || (id == "spillover")) { sMatrix = textTable.Matrix[i][0]; break; } } if (sMatrix == null) { throw new Exception(""); // silently ignore compensation as no compensation matrix available. } string[] fs = sMatrix.Split(','); int dimension = int.Parse(fs[0]); if (fs.Length != (dimension * dimension + dimension + 1)) { throw new Exception("Invalid spill over matrix."); } INumberTable cMatrix = app.New.NumberTable(dimension, dimension); List <string> parList = fs.Skip(1).Take(dimension).ToList(); int idx; if (parList.Count(id => int.TryParse(id, out idx)) == parList.Count) { // The columns are specified by a list of indexes. We convert them here to ids for (int i = 0; i < parList.Count; i++) { parList[i] = dataTable.ColumnSpecList[int.Parse(parList[i]) - 1].Id; // index starts with 1 ! } } for (int col = 0; col < cMatrix.Columns; col++) { cMatrix.ColumnSpecList[col].Id = parList[col]; } int fsIdx = dimension + 1; for (int row = 0; row < cMatrix.Rows; row++) { for (int col = 0; col < cMatrix.Columns; col++) { cMatrix.Matrix[row][col] = double.Parse(fs[fsIdx++]); } } var cData = dataTable.SelectColumnsById(parList); if (cData.Columns != parList.Count) { if (dataTable.ColumnSpecList.Count(cl => cl.Id.Equals("<" + parList[0] + ">")) == 1) { // siliently ignore aready compensated data. throw new Exception(""); } else { throw new Exception("Invalid spill over matrix: unknown names."); } } var math = CustomImporter.App.GetMathAdaptor(); var m = math.InvertMatrix((double[][])cMatrix.Matrix); for (int row = 0; row < cMatrix.Rows; row++) { cMatrix.Matrix[row] = m[row]; } cData = cData.Multiply(cMatrix); // perform the comensation with the inverse matrix of the spill over matrix. cData.CopyValuesTo(dataTable); compensated = true; } catch (Exception ex) { if (ex.Message != "") { MessageBox.Show("Cannot perform compensation: " + ex.Message); } } } if (logTransform) { double[][] m = (double[][])dataTable.Matrix; double T = 262144; double W = 1.0; double M = 4.5; string[] settings = app.GetProperty( "CustomImporter.Logicle.Settings", "262144; 1.0; 4.5").Split(';'); if (settings.Length == 3) { double.TryParse(settings[0], out T); double.TryParse(settings[1], out W); double.TryParse(settings[2], out M); } var fastLogicle = new VisuMap.DataCleansing.FastLogicle(T, W, M); double maxVal = fastLogicle.MaxValue; double minVal = fastLogicle.MinValue; for (int col = 0; col < fcsInfo.Columns; col++) { if (fcsInfo.ColumnInfo[col].LogTrans) { for (int row = 0; row < fcsInfo.Rows; row++) { m[row][col] = M * fastLogicle.scale(Math.Min(maxVal, Math.Max(minVal, m[row][col]))); } logScaled = true; } } } string msg = "Dataset imported from " + fileName + ". Version: " + fcsInfo.version; if (compensated) { msg += ", compensated"; } if (logScaled) { msg += ", log-scaled"; } string dsName = dataTable.SaveAsDataset(shortName, msg); app.Folder.OpenDataset(dsName); INumberTable nTable = app.GetNumberTable(); List <IColumnSpec> csList = nTable.ColumnSpecList as List <IColumnSpec>; int fsc = csList.FindIndex(cs => cs.Id == "FSC-A"); int ssc = csList.FindIndex(cs => cs.Id == "SSC-A"); if ((fsc >= 0) && (ssc >= 0)) { var xy = app.New.XyPlot(nTable); xy.Show(); xy.XAxisIndex = fsc; xy.YAxisIndex = ssc; xy.AutoScaling = true; xy.Redraw(); xy.CaptureMap(); xy.Close(); app.Map.Name = "FSC/SSC"; } else { // We just do create a simple PCA view. IPcaView pca = nTable.ShowPcaView(); pca.ResetView(); pca.CaptureMap(); app.Map.Redraw(); pca.Close(); app.Map.Name = "PCA-All"; } app.Map.GlyphType = glyphName; app.Map.Redraw(); if (showResult) { app.New.SpectrumBand(nTable).Show(); } fcsInfo = null; return(true); }
public bool ImportFile(string fileName) { IVisuMap app = CustomImporter.App.ScriptApp; // This importer object only recognizes files with name ending // with .arff if (!(fileName.ToLower().EndsWith(".arff"))) { return(false); } StreamReader tr = new StreamReader(fileName); List <string> columnName = new List <string>(); List <string> columnDesc = new List <string>(); List <bool> isNumber = new List <bool>(); int rows = 0; while (!tr.EndOfStream) { string line = tr.ReadLine(); if (line.StartsWith("@")) { string[] fs = line.Split(' '); if (fs[0].ToLower() == "@attribute") { string[] fields = line.Split(new char[] { ' ', '\t', '\"' }, StringSplitOptions.RemoveEmptyEntries); columnName.Add(fields[1]); if (fields.Length >= 4) { StringBuilder sb = new StringBuilder(); for (int i = 2; i < (fields.Length - 1); i++) { if (i != 2) { sb.Append(" "); } sb.Append(fields[i]); } columnDesc.Add(sb.ToString()); } else { columnDesc.Add(""); } string typeStr = fields[fields.Length - 1].ToLower(); if ((typeStr == "numeric") || (typeStr == "real")) { isNumber.Add(true); } else { isNumber.Add(false); } continue; } } else if (line.StartsWith("%") || string.IsNullOrEmpty(line)) { ; } else { rows++; } } // // Load the file into a table and save the table as // a dataset into the current folder. // IFreeTable table = app.New.FreeTable(); Dictionary <string, int> uniqId = new Dictionary <string, int>(); for (int col = 0; col < columnName.Count; col++) { string id = columnName[col]; if (uniqId.ContainsKey(id)) { uniqId[id]++; id += "_" + uniqId[id]; } else { uniqId[id] = 0; } table.AddColumn(id, columnDesc[col], isNumber[col]); } table.AddRows("r", rows); int row = 0; tr.BaseStream.Seek(0, SeekOrigin.Begin); while (!tr.EndOfStream) { string line = tr.ReadLine(); if (string.IsNullOrEmpty(line) || line.StartsWith("@") || line.StartsWith("%")) { continue; } string[] fs = line.Split(',', '\t'); IList <string> r = table.Matrix[row]; for (int col = 0; col < table.Columns; col++) { if (fs[col].Equals("?")) { r[col] = app.MissingValueReplacement.ToString(); } else { r[col] = fs[col]; } } row++; } tr.Close(); tr.Dispose(); // Set row type to indicate the class attributes. int classCol = table.IndexOfColumn("class"); if ((classCol >= 0) && (table.ColumnSpecList[classCol].DataType == 'e')) { Dictionary <string, short> uniType = new Dictionary <string, short>(); IList <IRowSpec> rsList = table.RowSpecList; for (int rw = 0; rw < table.Rows; rw++) { string sClass = table.Matrix[rw][classCol]; if (!uniType.ContainsKey(sClass)) { uniType.Add(sClass, (short)uniType.Count); } rsList[rw].Type = uniType[sClass]; } } FileInfo fInfo = new FileInfo(fileName); string shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.')); string dsName = table.SaveAsDataset(shortName, "Dataset imported from ARFF file."); if (dsName == null) { MessageBox.Show("Cannot import the data: " + app.LastError); } else { app.Folder.OpenDataset(dsName); if (app.GetProperty("CustomImporter.Arff.DoPca", "1").Equals("1")) { IPcaView pca = app.New.PcaView(); pca.Show(); pca.ResetView(); pca.CaptureMap(); app.Map.Redraw(); pca.Close(); } } return(true); }
public bool ImportFile(string fileName) { IVisuMap app = CustomImporter.App.ScriptApp; string fn = fileName.ToLower(); if (!(fn.EndsWith(".sdf") || fn.EndsWith(".mol"))) { return(false); // Let other import handle it. } char[] sep = new char[] { ' ', '\t' }; // field separators. // // Frequently used atoms will have fixed body types. // Dictionary <string, short> typeNames = new Dictionary <string, short>(); // used to determine body types. short atomIdx = 0; typeNames["C"] = atomIdx++; typeNames["O"] = atomIdx++; typeNames["H"] = atomIdx++; typeNames["N"] = atomIdx++; typeNames["S"] = atomIdx++; typeNames["P"] = atomIdx++; typeNames["F"] = atomIdx++; typeNames["Na"] = atomIdx++; typeNames["Cl"] = atomIdx++; typeNames["Br"] = atomIdx++; try { using (StreamReader tr = new StreamReader(fileName)) { string header = tr.ReadLine(); string cmmt = tr.ReadLine() + ";" + tr.ReadLine(); string[] info = tr.ReadLine().Split(sep, StringSplitOptions.RemoveEmptyEntries); int atoms = int.Parse(info[0]); int bonds = int.Parse(info[1]); IFreeTable table = app.New.FreeTable(); table.AddColumn("X", true); table.AddColumn("Y", true); table.AddColumn("Z", true); for (int i = 0; i < atoms; i++) { table.AddColumn("A" + i, true); } table.AddRows("A", atoms); bool is2D = true; for (int row = 0; row < atoms; row++) { string[] fs = tr.ReadLine().Split(sep, StringSplitOptions.RemoveEmptyEntries); table.RowSpecList[row].Id = table.ColumnSpecList[3 + row].Id = fs[3] + "." + (1 + row); table.RowSpecList[row].Name = table.ColumnSpecList[3 + row].Name = fs[3]; if (!typeNames.ContainsKey(fs[3])) { typeNames[fs[3]] = atomIdx++; } table.RowSpecList[row].Type = table.ColumnSpecList[row + 3].Group = typeNames[fs[3]]; table.Matrix[row][0] = fs[0]; table.Matrix[row][1] = fs[1]; table.Matrix[row][2] = fs[2]; if (double.Parse(fs[2]) != 0) { is2D = false; } } for (int j = 0; j < bonds; j++) { string[] fs = tr.ReadLine().Split(sep, StringSplitOptions.RemoveEmptyEntries); int atom1 = int.Parse(fs[0]) - 1; int atom2 = int.Parse(fs[1]) - 1; table.Matrix[atom1][atom2 + 3] = table.Matrix[atom2][atom1 + 3] = fs[2]; } FileInfo fInfo = new FileInfo(fileName); string shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.')); string dsName = table.SaveAsDataset(shortName, header + "; " + cmmt); if (dsName == null) { MessageBox.Show("Cannot import the data: " + app.LastError); return(false); } else { app.Folder.OpenDataset(dsName); double cX = app.Map.Width / 2; double cY = app.Map.Height / 2; app.Map.Depth = is2D ? 0 : Math.Max(app.Map.Width, app.Map.Height); double cZ = app.Map.Depth / 2; INumberTable numTable = app.Dataset.GetNumberTable(); double minX = double.MaxValue; double minY = double.MaxValue; double minZ = double.MaxValue; double maxX = double.MinValue; double maxY = double.MinValue; double maxZ = double.MinValue; for (int row = 0; row < numTable.Rows; row++) { IList <double> R = numTable.Matrix[row]; minX = Math.Min(minX, R[0]); minY = Math.Min(minY, R[1]); minZ = Math.Min(minZ, R[2]); maxX = Math.Max(maxX, R[0]); maxY = Math.Max(maxY, R[1]); maxZ = Math.Max(maxZ, R[2]); } double cx = (maxX + minX) / 2; double cy = (maxY + minY) / 2; double cz = (maxZ + minZ) / 2; double mSize = Math.Max(maxX - minX, Math.Max(maxY - minY, maxZ - minZ)); double factor = Math.Min(app.Map.Width, app.Map.Height) * 0.85 / mSize; for (int row = 0; row < numTable.Rows; row++) { IBody body = app.Dataset.BodyList[row]; IList <double> R = numTable.Matrix[row]; body.ShowName = true; body.X = (R[0] - cx) * factor + cX; body.Y = (R[1] - cy) * factor + cY; body.Z = (R[2] - cz) * factor + cZ; } // The following stmt will cause the window to refresh. app.Map.GlyphType = "Colored Glyphs"; app.Map.MapType = is2D ? "Rectangle" : "Cube"; } } } catch (Exception ex) { MessageBox.Show("Failed to load SDF/MOL file: " + ex.Message); } return(true); }