Beispiel #1
0
        public bool ImportFile(string fileName)
        {
            IVisuMap app = GeneticAnalysis.App.ScriptApp;
            var      fn  = fileName.ToLower();

            if (!(
                    fn.EndsWith(".fastq") ||
                    fn.EndsWith(".fq") ||
                    fn.EndsWith(".fq.gz") ||
                    fn.EndsWith(".fastq.gz")
                    ))
            {
                return(false);
            }

            FileInfo      fInfo     = new FileInfo(fileName);
            string        shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.'));
            List <string> headList  = new List <string>();
            List <long>   seqIdx    = new List <long>();
            List <long>   seqLen    = new List <long>();
            var           seqBlob   = new SeqBlob(shortName);

            using (StreamReader trH = new StreamReader(fileName)) {
                StreamReader tr = null;
                if (fn.EndsWith(".gz"))
                {
                    GZipInputStream gzStream = new GZipInputStream(trH.BaseStream);
                    tr = new StreamReader(gzStream);
                }
                else
                {
                    tr = trH;
                }

                int         unknowLetters = 0;
                List <char> unknownList   = new List <char>();

                while (!tr.EndOfStream)
                {
                    string line = null;
                    try {
                        line = tr.ReadLine();
                    } catch (ICSharpCode.SharpZipLib.SharpZipBaseException) {
                        break;
                    }

                    if (line.StartsWith("@"))
                    {
                        string sHeader = line.Split(' ')[0];
                        sHeader = sHeader.Substring(1);
                        headList.Add(sHeader);
                        long sBegin = seqBlob.Length;
                        seqIdx.Add(sBegin);

                        line = tr.ReadLine();
                        foreach (char c in line)
                        {
                            char cc = char.ToUpper(c);
                            int  k  = ACGT.IndexOf(cc);
                            if (k >= 0)
                            {
                                seqBlob.AddLetter(k);
                            }
                            else if (!char.IsWhiteSpace(c))
                            {
                                seqBlob.AddLetter(SeqBlob.UKNOWN_LETTER);  // unknown letter.
                                if (unknownList.Count < 100)
                                {
                                    unknownList.Add(c);
                                }
                                unknowLetters++;
                            }
                        }
                        seqLen.Add(seqBlob.Length - sBegin); // add the length

                        // Ignore the quality lines
                        tr.ReadLine();
                        tr.ReadLine();  // the quality indicators.

                        if ((headList.Count % 1000) == 0)
                        {
                            app.Title = "N: " + headList.Count; Application.DoEvents();
                        }
                    }
                }

                if (unknowLetters > 0)
                {
                    StringBuilder sb = new StringBuilder();
                    foreach (char c in unknownList)
                    {
                        sb.Append(c); sb.Append(',');
                    }
                    if (unknownList.Count == 100)
                    {
                        sb.Append("...");
                    }
                    MessageBox.Show("Converted " + unknowLetters + " unknown/meta letters to 'N': " + sb.ToString());
                }
            }

            string oldPresion = app.Folder.NumberPrecision;

            app.Folder.NumberPrecision = "g10";
            IFreeTable table = app.New.FreeTable();

            table.AddColumn("SeqIdx", true);
            table.AddColumn("SeqLen", true);
            table.AddColumn("Header", false);
            table.ColumnSpecList[0].Name = seqBlob.Name;
            table.AddRows("Id", headList.Count);
            for (int row = 0; row < headList.Count; row++)
            {
                var rs = table.RowSpecList[row];
                var R  = table.Matrix[row];
                R[0] = seqIdx[row].ToString();
                R[1] = seqLen[row].ToString();
                R[2] = headList[row];
            }

            seqBlob.Dispose();
            string dsName = table.SaveAsDataset(shortName, "");

            if (dsName == null)
            {
                MessageBox.Show("Cannot import the data: " + app.LastError);
                return(false);
            }
            else
            {
                var newDataset = app.Folder.OpenDataset(dsName);
            }
            app.Folder.NumberPrecision = oldPresion;

            // ValidateTable();
            return(true);
        }
Beispiel #2
0
        public bool ImportFile0(string fileName)
        {
            IVisuMap app = CustomImporter.App.ScriptApp;

            if (!fileName.ToLower().EndsWith(".fcs"))
            {
                return(false);  // Let other import handle it.
            }

            INumberTable dataTable   = null;
            IFreeTable   textTable   = null;
            bool         compensated = false;
            bool         logScaled   = false;

            using (StreamReader sr = new StreamReader(fileName)) {
                //
                // Read the text segment.
                //
                char[] header = new char[42];
                sr.ReadBlock(header, 0, header.Length);
                int textBegin = int.Parse(new string(header, 10, 8));
                int textEnd   = int.Parse(new string(header, 18, 8));
                int beginData = int.Parse(new string(header, 26, 8));
                int endData   = int.Parse(new string(header, 34, 8));

                char[] line = new char[textEnd + 4];
                sr.ReadBlock(line, header.Length, line.Length - header.Length);
                string textSeg   = new string(line, textBegin + 1, textEnd - textBegin);
                char   delimiter = line[textBegin];

                textTable = app.New.FreeTable();
                textTable.AddColumn("Value", false);

                string[] textFields = textSeg.Split(delimiter);
                textTable.AddRows("P", textFields.Length / 2);
                IList <IRowSpec> rowSpecList = textTable.RowSpecList;
                for (int row = 0; row < textTable.Rows; row++)
                {
                    rowSpecList[row].Id      = textFields[2 * row];
                    textTable.Matrix[row][0] = textFields[2 * row + 1];
                }

                //
                // Read in the data segement
                //
                fcsInfo = new FcsInfo(textTable, header);
                if ((beginData == 0) && (textTable.IndexOfRow("$BEGINDATA") > 0))
                {
                    beginData = int.Parse(textTable.Matrix[textTable.IndexOfRow("$BEGINDATA")][0]);
                }
                if ((endData == 0) && (textTable.IndexOfRow("$ENDDATA") > 0))
                {
                    endData = int.Parse(textTable.Matrix[textTable.IndexOfRow("$ENDDATA")][0]);
                }
                dataTable = app.New.NumberTable(fcsInfo.Rows, fcsInfo.Columns);

                using (BinaryReader br = new BinaryReader(sr.BaseStream)) {
                    br.BaseStream.Seek(beginData, SeekOrigin.Begin);

                    Byte[] buf4 = new byte[4];
                    Byte[] buf8 = new byte[8];

                    int bitOffset = 0;
                    for (int row = 0; row < fcsInfo.Rows; row++)
                    {
                        for (int col = 0; col < fcsInfo.Columns; col++)
                        {
                            ColumnInfo ci   = fcsInfo.ColumnInfo[col];
                            Byte[]     data = ReadBits(br, ci.Bits, ref bitOffset);
                            if (data.Length < ci.Bytes)
                            {
                                row = fcsInfo.Rows; // enforce premature loop-end.
                                break;
                            }
                            Byte[] buf = (fcsInfo.DataType == FcsInfo.DataTypes.Double) ? buf8 : buf4;
                            Array.Clear(buf, 0, buf.Length);
                            Array.Copy(data, 0, buf, 0, ci.Bytes);
                            if (!fcsInfo.BigEndian)
                            {
                                // Intel CPU expects BigEndian format.
                                Array.Reverse(buf, 0, ci.Bytes);
                            }

                            switch (fcsInfo.DataType)
                            {
                            case FcsInfo.DataTypes.Integer:
                                dataTable.Matrix[row][col] = (BitConverter.ToUInt32(buf, 0) & ci.RangeMask);
                                break;

                            case FcsInfo.DataTypes.Float:
                                dataTable.Matrix[row][col] = BitConverter.ToSingle(buf, 0);
                                break;

                            case FcsInfo.DataTypes.Double:
                                dataTable.Matrix[row][col] = BitConverter.ToDouble(buf, 0);
                                break;
                            }
                        }
                    }
                }
            }

            // Post processing
            for (int col = 0; col < fcsInfo.Columns; col++)
            {
                IColumnSpec cs    = dataTable.ColumnSpecList[col];
                ColumnInfo  cInfo = fcsInfo.ColumnInfo[col];
                cs.Id = cInfo.ShortName;
                //cs.Name = cInfo.Name + ( cInfo.IsLinear ? ".Lin" : ".Log");
                cs.Name = cInfo.Name;

                if ((cs.Id == "TIME") || (cs.Id == "TIME1"))
                {
                    int timeStepIdx = textTable.IndexOfRow("$TIMESTEP");
                    if (timeStepIdx >= 0)
                    {
                        double timeStep = double.Parse(textTable.Matrix[timeStepIdx][0]);
                        for (int row = 0; row < fcsInfo.Rows; row++)
                        {
                            dataTable.Matrix[row][col] *= timeStep;
                        }
                    }
                }
            }

            IList <IRowSpec> rSpecList = dataTable.RowSpecList;

            for (int row = 0; row < fcsInfo.Rows; row++)
            {
                rSpecList[row].Id = row.ToString();
            }


            FileInfo fInfo     = new FileInfo(fileName);
            string   shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.'));

            if (saveMetaInfo >= 1)
            {
                SaveParameterTable(textTable, shortName);
                if (saveMetaInfo >= 2)
                {
                    textTable.SaveAsDataset(shortName + " (Text Seg)", "Text segement of data table " + shortName);
                }
            }

            if (autoCompensation)
            {
                try {
                    string sMatrix = null;
                    for (int i = 0; i < textTable.Rows; i++)
                    {
                        string id = textTable.RowSpecList[i].Id.ToLower();
                        if (id.StartsWith("$"))
                        {
                            id = id.Substring(1);
                        }
                        if ((id == "spill") || (id == "spillover"))
                        {
                            sMatrix = textTable.Matrix[i][0];
                            break;
                        }
                    }

                    if (sMatrix == null)
                    {
                        throw new Exception("");                   // silently ignore compensation as no compensation matrix available.
                    }
                    string[] fs        = sMatrix.Split(',');
                    int      dimension = int.Parse(fs[0]);

                    if (fs.Length != (dimension * dimension + dimension + 1))
                    {
                        throw new Exception("Invalid spill over matrix.");
                    }

                    INumberTable  cMatrix = app.New.NumberTable(dimension, dimension);
                    List <string> parList = fs.Skip(1).Take(dimension).ToList();

                    int idx;
                    if (parList.Count(id => int.TryParse(id, out idx)) == parList.Count)
                    {
                        // The columns are specified by a list of indexes. We convert them here to ids
                        for (int i = 0; i < parList.Count; i++)
                        {
                            parList[i] = dataTable.ColumnSpecList[int.Parse(parList[i]) - 1].Id;   // index starts with 1 !
                        }
                    }

                    for (int col = 0; col < cMatrix.Columns; col++)
                    {
                        cMatrix.ColumnSpecList[col].Id = parList[col];
                    }
                    int fsIdx = dimension + 1;
                    for (int row = 0; row < cMatrix.Rows; row++)
                    {
                        for (int col = 0; col < cMatrix.Columns; col++)
                        {
                            cMatrix.Matrix[row][col] = double.Parse(fs[fsIdx++]);
                        }
                    }

                    var cData = dataTable.SelectColumnsById(parList);
                    if (cData.Columns != parList.Count)
                    {
                        if (dataTable.ColumnSpecList.Count(cl => cl.Id.Equals("<" + parList[0] + ">")) == 1)
                        {
                            // siliently ignore aready compensated data.
                            throw new Exception("");
                        }
                        else
                        {
                            throw new Exception("Invalid spill over matrix: unknown names.");
                        }
                    }

                    var math = CustomImporter.App.GetMathAdaptor();
                    var m    = math.InvertMatrix((double[][])cMatrix.Matrix);
                    for (int row = 0; row < cMatrix.Rows; row++)
                    {
                        cMatrix.Matrix[row] = m[row];
                    }

                    cData = cData.Multiply(cMatrix);  // perform the comensation with the inverse matrix of the spill over matrix.
                    cData.CopyValuesTo(dataTable);
                    compensated = true;
                } catch (Exception ex) {
                    if (ex.Message != "")
                    {
                        MessageBox.Show("Cannot perform compensation: " + ex.Message);
                    }
                }
            }

            if (logTransform)
            {
                double[][] m = (double[][])dataTable.Matrix;

                double   T        = 262144;
                double   W        = 1.0;
                double   M        = 4.5;
                string[] settings = app.GetProperty(
                    "CustomImporter.Logicle.Settings", "262144; 1.0; 4.5").Split(';');
                if (settings.Length == 3)
                {
                    double.TryParse(settings[0], out T);
                    double.TryParse(settings[1], out W);
                    double.TryParse(settings[2], out M);
                }
                var    fastLogicle = new VisuMap.DataCleansing.FastLogicle(T, W, M);
                double maxVal      = fastLogicle.MaxValue;
                double minVal      = fastLogicle.MinValue;

                for (int col = 0; col < fcsInfo.Columns; col++)
                {
                    if (fcsInfo.ColumnInfo[col].LogTrans)
                    {
                        for (int row = 0; row < fcsInfo.Rows; row++)
                        {
                            m[row][col] = M * fastLogicle.scale(Math.Min(maxVal, Math.Max(minVal, m[row][col])));
                        }
                        logScaled = true;
                    }
                }
            }

            string msg = "Dataset imported from " + fileName + ". Version: " + fcsInfo.version;

            if (compensated)
            {
                msg += ", compensated";
            }
            if (logScaled)
            {
                msg += ", log-scaled";
            }
            string dsName = dataTable.SaveAsDataset(shortName, msg);

            app.Folder.OpenDataset(dsName);
            INumberTable nTable = app.GetNumberTable();


            List <IColumnSpec> csList = nTable.ColumnSpecList as List <IColumnSpec>;
            int fsc = csList.FindIndex(cs => cs.Id == "FSC-A");
            int ssc = csList.FindIndex(cs => cs.Id == "SSC-A");

            if ((fsc >= 0) && (ssc >= 0))
            {
                var xy = app.New.XyPlot(nTable);
                xy.Show();
                xy.XAxisIndex  = fsc;
                xy.YAxisIndex  = ssc;
                xy.AutoScaling = true;
                xy.Redraw();
                xy.CaptureMap();
                xy.Close();
                app.Map.Name = "FSC/SSC";
            }
            else
            {
                // We just do create a simple PCA view.
                IPcaView pca = nTable.ShowPcaView();
                pca.ResetView();
                pca.CaptureMap();
                app.Map.Redraw();
                pca.Close();
                app.Map.Name = "PCA-All";
            }

            app.Map.GlyphType = glyphName;
            app.Map.Redraw();

            if (showResult)
            {
                app.New.SpectrumBand(nTable).Show();
            }

            fcsInfo = null;
            return(true);
        }
Beispiel #3
0
        public bool ImportFile(string fileName)
        {
            IVisuMap app = CustomImporter.App.ScriptApp;

            // This importer object only recognizes files with name ending
            // with .arff
            if (!(fileName.ToLower().EndsWith(".arff")))
            {
                return(false);
            }

            StreamReader  tr         = new StreamReader(fileName);
            List <string> columnName = new List <string>();
            List <string> columnDesc = new List <string>();
            List <bool>   isNumber   = new List <bool>();

            int rows = 0;

            while (!tr.EndOfStream)
            {
                string line = tr.ReadLine();
                if (line.StartsWith("@"))
                {
                    string[] fs = line.Split(' ');
                    if (fs[0].ToLower() == "@attribute")
                    {
                        string[] fields = line.Split(new char[] { ' ', '\t', '\"' }, StringSplitOptions.RemoveEmptyEntries);
                        columnName.Add(fields[1]);
                        if (fields.Length >= 4)
                        {
                            StringBuilder sb = new StringBuilder();
                            for (int i = 2; i < (fields.Length - 1); i++)
                            {
                                if (i != 2)
                                {
                                    sb.Append(" ");
                                }
                                sb.Append(fields[i]);
                            }
                            columnDesc.Add(sb.ToString());
                        }
                        else
                        {
                            columnDesc.Add("");
                        }
                        string typeStr = fields[fields.Length - 1].ToLower();
                        if ((typeStr == "numeric") || (typeStr == "real"))
                        {
                            isNumber.Add(true);
                        }
                        else
                        {
                            isNumber.Add(false);
                        }
                        continue;
                    }
                }
                else if (line.StartsWith("%") || string.IsNullOrEmpty(line))
                {
                    ;
                }
                else
                {
                    rows++;
                }
            }

            //
            // Load the file into a table and save the table as
            // a dataset into the current folder.
            //
            IFreeTable table = app.New.FreeTable();
            Dictionary <string, int> uniqId = new Dictionary <string, int>();

            for (int col = 0; col < columnName.Count; col++)
            {
                string id = columnName[col];
                if (uniqId.ContainsKey(id))
                {
                    uniqId[id]++;
                    id += "_" + uniqId[id];
                }
                else
                {
                    uniqId[id] = 0;
                }
                table.AddColumn(id, columnDesc[col], isNumber[col]);
            }

            table.AddRows("r", rows);
            int row = 0;

            tr.BaseStream.Seek(0, SeekOrigin.Begin);
            while (!tr.EndOfStream)
            {
                string line = tr.ReadLine();
                if (string.IsNullOrEmpty(line) || line.StartsWith("@") || line.StartsWith("%"))
                {
                    continue;
                }
                string[]       fs = line.Split(',', '\t');
                IList <string> r  = table.Matrix[row];
                for (int col = 0; col < table.Columns; col++)
                {
                    if (fs[col].Equals("?"))
                    {
                        r[col] = app.MissingValueReplacement.ToString();
                    }
                    else
                    {
                        r[col] = fs[col];
                    }
                }
                row++;
            }

            tr.Close();
            tr.Dispose();

            // Set row type to indicate the class attributes.
            int classCol = table.IndexOfColumn("class");

            if ((classCol >= 0) && (table.ColumnSpecList[classCol].DataType == 'e'))
            {
                Dictionary <string, short> uniType = new Dictionary <string, short>();
                IList <IRowSpec>           rsList  = table.RowSpecList;
                for (int rw = 0; rw < table.Rows; rw++)
                {
                    string sClass = table.Matrix[rw][classCol];
                    if (!uniType.ContainsKey(sClass))
                    {
                        uniType.Add(sClass, (short)uniType.Count);
                    }
                    rsList[rw].Type = uniType[sClass];
                }
            }


            FileInfo fInfo     = new FileInfo(fileName);
            string   shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.'));
            string   dsName    = table.SaveAsDataset(shortName, "Dataset imported from ARFF file.");


            if (dsName == null)
            {
                MessageBox.Show("Cannot import the data: " + app.LastError);
            }
            else
            {
                app.Folder.OpenDataset(dsName);

                if (app.GetProperty("CustomImporter.Arff.DoPca", "1").Equals("1"))
                {
                    IPcaView pca = app.New.PcaView();
                    pca.Show();
                    pca.ResetView();
                    pca.CaptureMap();
                    app.Map.Redraw();
                    pca.Close();
                }
            }

            return(true);
        }
Beispiel #4
0
        public bool ImportFile(string fileName)
        {
            IVisuMap app = CustomImporter.App.ScriptApp;
            string   fn  = fileName.ToLower();

            if (!(fn.EndsWith(".sdf") || fn.EndsWith(".mol")))
            {
                return(false);  // Let other import handle it.
            }

            char[] sep = new char[] { ' ', '\t' };  // field separators.
            //
            // Frequently used atoms will have fixed body types.
            //
            Dictionary <string, short> typeNames = new Dictionary <string, short>(); // used to determine body types.
            short atomIdx = 0;

            typeNames["C"]  = atomIdx++;
            typeNames["O"]  = atomIdx++;
            typeNames["H"]  = atomIdx++;
            typeNames["N"]  = atomIdx++;
            typeNames["S"]  = atomIdx++;
            typeNames["P"]  = atomIdx++;
            typeNames["F"]  = atomIdx++;
            typeNames["Na"] = atomIdx++;
            typeNames["Cl"] = atomIdx++;
            typeNames["Br"] = atomIdx++;

            try {
                using (StreamReader tr = new StreamReader(fileName)) {
                    string   header = tr.ReadLine();
                    string   cmmt   = tr.ReadLine() + ";" + tr.ReadLine();
                    string[] info   = tr.ReadLine().Split(sep, StringSplitOptions.RemoveEmptyEntries);
                    int      atoms  = int.Parse(info[0]);
                    int      bonds  = int.Parse(info[1]);

                    IFreeTable table = app.New.FreeTable();
                    table.AddColumn("X", true);
                    table.AddColumn("Y", true);
                    table.AddColumn("Z", true);
                    for (int i = 0; i < atoms; i++)
                    {
                        table.AddColumn("A" + i, true);
                    }
                    table.AddRows("A", atoms);

                    bool is2D = true;

                    for (int row = 0; row < atoms; row++)
                    {
                        string[] fs = tr.ReadLine().Split(sep, StringSplitOptions.RemoveEmptyEntries);
                        table.RowSpecList[row].Id              =
                            table.ColumnSpecList[3 + row].Id   = fs[3] + "." + (1 + row);
                        table.RowSpecList[row].Name            =
                            table.ColumnSpecList[3 + row].Name = fs[3];
                        if (!typeNames.ContainsKey(fs[3]))
                        {
                            typeNames[fs[3]] = atomIdx++;
                        }
                        table.RowSpecList[row].Type = table.ColumnSpecList[row + 3].Group = typeNames[fs[3]];

                        table.Matrix[row][0] = fs[0];
                        table.Matrix[row][1] = fs[1];
                        table.Matrix[row][2] = fs[2];

                        if (double.Parse(fs[2]) != 0)
                        {
                            is2D = false;
                        }
                    }

                    for (int j = 0; j < bonds; j++)
                    {
                        string[] fs    = tr.ReadLine().Split(sep, StringSplitOptions.RemoveEmptyEntries);
                        int      atom1 = int.Parse(fs[0]) - 1;
                        int      atom2 = int.Parse(fs[1]) - 1;
                        table.Matrix[atom1][atom2 + 3]     =
                            table.Matrix[atom2][atom1 + 3] = fs[2];
                    }

                    FileInfo fInfo     = new FileInfo(fileName);
                    string   shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.'));
                    string   dsName    = table.SaveAsDataset(shortName, header + "; " + cmmt);

                    if (dsName == null)
                    {
                        MessageBox.Show("Cannot import the data: " + app.LastError);
                        return(false);
                    }
                    else
                    {
                        app.Folder.OpenDataset(dsName);
                        double cX = app.Map.Width / 2;
                        double cY = app.Map.Height / 2;
                        app.Map.Depth = is2D ? 0 : Math.Max(app.Map.Width, app.Map.Height);
                        double cZ = app.Map.Depth / 2;

                        INumberTable numTable = app.Dataset.GetNumberTable();
                        double       minX     = double.MaxValue;
                        double       minY     = double.MaxValue;
                        double       minZ     = double.MaxValue;
                        double       maxX     = double.MinValue;
                        double       maxY     = double.MinValue;
                        double       maxZ     = double.MinValue;
                        for (int row = 0; row < numTable.Rows; row++)
                        {
                            IList <double> R = numTable.Matrix[row];
                            minX = Math.Min(minX, R[0]);
                            minY = Math.Min(minY, R[1]);
                            minZ = Math.Min(minZ, R[2]);
                            maxX = Math.Max(maxX, R[0]);
                            maxY = Math.Max(maxY, R[1]);
                            maxZ = Math.Max(maxZ, R[2]);
                        }
                        double cx     = (maxX + minX) / 2;
                        double cy     = (maxY + minY) / 2;
                        double cz     = (maxZ + minZ) / 2;
                        double mSize  = Math.Max(maxX - minX, Math.Max(maxY - minY, maxZ - minZ));
                        double factor = Math.Min(app.Map.Width, app.Map.Height) * 0.85 / mSize;

                        for (int row = 0; row < numTable.Rows; row++)
                        {
                            IBody          body = app.Dataset.BodyList[row];
                            IList <double> R    = numTable.Matrix[row];
                            body.ShowName = true;
                            body.X        = (R[0] - cx) * factor + cX;
                            body.Y        = (R[1] - cy) * factor + cY;
                            body.Z        = (R[2] - cz) * factor + cZ;
                        }

                        // The following stmt will cause the window to refresh.
                        app.Map.GlyphType = "Colored Glyphs";
                        app.Map.MapType   = is2D ? "Rectangle" : "Cube";
                    }
                }
            } catch (Exception ex) {
                MessageBox.Show("Failed to load SDF/MOL file: " + ex.Message);
            }
            return(true);
        }