Пример #1
0
        public bool ImportFile(string fileName)
        {
            IVisuMap app = CustomImporter.App.ScriptApp;

            // This importer object only recognizes files with name ending
            // with .arff
            if (!(fileName.ToLower().EndsWith(".arff")))
            {
                return(false);
            }

            StreamReader  tr         = new StreamReader(fileName);
            List <string> columnName = new List <string>();
            List <string> columnDesc = new List <string>();
            List <bool>   isNumber   = new List <bool>();

            int rows = 0;

            while (!tr.EndOfStream)
            {
                string line = tr.ReadLine();
                if (line.StartsWith("@"))
                {
                    string[] fs = line.Split(' ');
                    if (fs[0].ToLower() == "@attribute")
                    {
                        string[] fields = line.Split(new char[] { ' ', '\t', '\"' }, StringSplitOptions.RemoveEmptyEntries);
                        columnName.Add(fields[1]);
                        if (fields.Length >= 4)
                        {
                            StringBuilder sb = new StringBuilder();
                            for (int i = 2; i < (fields.Length - 1); i++)
                            {
                                if (i != 2)
                                {
                                    sb.Append(" ");
                                }
                                sb.Append(fields[i]);
                            }
                            columnDesc.Add(sb.ToString());
                        }
                        else
                        {
                            columnDesc.Add("");
                        }
                        string typeStr = fields[fields.Length - 1].ToLower();
                        if ((typeStr == "numeric") || (typeStr == "real"))
                        {
                            isNumber.Add(true);
                        }
                        else
                        {
                            isNumber.Add(false);
                        }
                        continue;
                    }
                }
                else if (line.StartsWith("%") || string.IsNullOrEmpty(line))
                {
                    ;
                }
                else
                {
                    rows++;
                }
            }

            //
            // Load the file into a table and save the table as
            // a dataset into the current folder.
            //
            IFreeTable table = app.New.FreeTable();
            Dictionary <string, int> uniqId = new Dictionary <string, int>();

            for (int col = 0; col < columnName.Count; col++)
            {
                string id = columnName[col];
                if (uniqId.ContainsKey(id))
                {
                    uniqId[id]++;
                    id += "_" + uniqId[id];
                }
                else
                {
                    uniqId[id] = 0;
                }
                table.AddColumn(id, columnDesc[col], isNumber[col]);
            }

            table.AddRows("r", rows);
            int row = 0;

            tr.BaseStream.Seek(0, SeekOrigin.Begin);
            while (!tr.EndOfStream)
            {
                string line = tr.ReadLine();
                if (string.IsNullOrEmpty(line) || line.StartsWith("@") || line.StartsWith("%"))
                {
                    continue;
                }
                string[]       fs = line.Split(',', '\t');
                IList <string> r  = table.Matrix[row];
                for (int col = 0; col < table.Columns; col++)
                {
                    if (fs[col].Equals("?"))
                    {
                        r[col] = app.MissingValueReplacement.ToString();
                    }
                    else
                    {
                        r[col] = fs[col];
                    }
                }
                row++;
            }

            tr.Close();
            tr.Dispose();

            // Set row type to indicate the class attributes.
            int classCol = table.IndexOfColumn("class");

            if ((classCol >= 0) && (table.ColumnSpecList[classCol].DataType == 'e'))
            {
                Dictionary <string, short> uniType = new Dictionary <string, short>();
                IList <IRowSpec>           rsList  = table.RowSpecList;
                for (int rw = 0; rw < table.Rows; rw++)
                {
                    string sClass = table.Matrix[rw][classCol];
                    if (!uniType.ContainsKey(sClass))
                    {
                        uniType.Add(sClass, (short)uniType.Count);
                    }
                    rsList[rw].Type = uniType[sClass];
                }
            }


            FileInfo fInfo     = new FileInfo(fileName);
            string   shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.'));
            string   dsName    = table.SaveAsDataset(shortName, "Dataset imported from ARFF file.");


            if (dsName == null)
            {
                MessageBox.Show("Cannot import the data: " + app.LastError);
            }
            else
            {
                app.Folder.OpenDataset(dsName);

                if (app.GetProperty("CustomImporter.Arff.DoPca", "1").Equals("1"))
                {
                    IPcaView pca = app.New.PcaView();
                    pca.Show();
                    pca.ResetView();
                    pca.CaptureMap();
                    app.Map.Redraw();
                    pca.Close();
                }
            }

            return(true);
        }
Пример #2
0
        public bool ImportFile0(string fileName)
        {
            IVisuMap app = CustomImporter.App.ScriptApp;

            if (!fileName.ToLower().EndsWith(".fcs"))
            {
                return(false);  // Let other import handle it.
            }

            INumberTable dataTable   = null;
            IFreeTable   textTable   = null;
            bool         compensated = false;
            bool         logScaled   = false;

            using (StreamReader sr = new StreamReader(fileName)) {
                //
                // Read the text segment.
                //
                char[] header = new char[42];
                sr.ReadBlock(header, 0, header.Length);
                int textBegin = int.Parse(new string(header, 10, 8));
                int textEnd   = int.Parse(new string(header, 18, 8));
                int beginData = int.Parse(new string(header, 26, 8));
                int endData   = int.Parse(new string(header, 34, 8));

                char[] line = new char[textEnd + 4];
                sr.ReadBlock(line, header.Length, line.Length - header.Length);
                string textSeg   = new string(line, textBegin + 1, textEnd - textBegin);
                char   delimiter = line[textBegin];

                textTable = app.New.FreeTable();
                textTable.AddColumn("Value", false);

                string[] textFields = textSeg.Split(delimiter);
                textTable.AddRows("P", textFields.Length / 2);
                IList <IRowSpec> rowSpecList = textTable.RowSpecList;
                for (int row = 0; row < textTable.Rows; row++)
                {
                    rowSpecList[row].Id      = textFields[2 * row];
                    textTable.Matrix[row][0] = textFields[2 * row + 1];
                }

                //
                // Read in the data segement
                //
                fcsInfo = new FcsInfo(textTable, header);
                if ((beginData == 0) && (textTable.IndexOfRow("$BEGINDATA") > 0))
                {
                    beginData = int.Parse(textTable.Matrix[textTable.IndexOfRow("$BEGINDATA")][0]);
                }
                if ((endData == 0) && (textTable.IndexOfRow("$ENDDATA") > 0))
                {
                    endData = int.Parse(textTable.Matrix[textTable.IndexOfRow("$ENDDATA")][0]);
                }
                dataTable = app.New.NumberTable(fcsInfo.Rows, fcsInfo.Columns);

                using (BinaryReader br = new BinaryReader(sr.BaseStream)) {
                    br.BaseStream.Seek(beginData, SeekOrigin.Begin);

                    Byte[] buf4 = new byte[4];
                    Byte[] buf8 = new byte[8];

                    int bitOffset = 0;
                    for (int row = 0; row < fcsInfo.Rows; row++)
                    {
                        for (int col = 0; col < fcsInfo.Columns; col++)
                        {
                            ColumnInfo ci   = fcsInfo.ColumnInfo[col];
                            Byte[]     data = ReadBits(br, ci.Bits, ref bitOffset);
                            if (data.Length < ci.Bytes)
                            {
                                row = fcsInfo.Rows; // enforce premature loop-end.
                                break;
                            }
                            Byte[] buf = (fcsInfo.DataType == FcsInfo.DataTypes.Double) ? buf8 : buf4;
                            Array.Clear(buf, 0, buf.Length);
                            Array.Copy(data, 0, buf, 0, ci.Bytes);
                            if (!fcsInfo.BigEndian)
                            {
                                // Intel CPU expects BigEndian format.
                                Array.Reverse(buf, 0, ci.Bytes);
                            }

                            switch (fcsInfo.DataType)
                            {
                            case FcsInfo.DataTypes.Integer:
                                dataTable.Matrix[row][col] = (BitConverter.ToUInt32(buf, 0) & ci.RangeMask);
                                break;

                            case FcsInfo.DataTypes.Float:
                                dataTable.Matrix[row][col] = BitConverter.ToSingle(buf, 0);
                                break;

                            case FcsInfo.DataTypes.Double:
                                dataTable.Matrix[row][col] = BitConverter.ToDouble(buf, 0);
                                break;
                            }
                        }
                    }
                }
            }

            // Post processing
            for (int col = 0; col < fcsInfo.Columns; col++)
            {
                IColumnSpec cs    = dataTable.ColumnSpecList[col];
                ColumnInfo  cInfo = fcsInfo.ColumnInfo[col];
                cs.Id = cInfo.ShortName;
                //cs.Name = cInfo.Name + ( cInfo.IsLinear ? ".Lin" : ".Log");
                cs.Name = cInfo.Name;

                if ((cs.Id == "TIME") || (cs.Id == "TIME1"))
                {
                    int timeStepIdx = textTable.IndexOfRow("$TIMESTEP");
                    if (timeStepIdx >= 0)
                    {
                        double timeStep = double.Parse(textTable.Matrix[timeStepIdx][0]);
                        for (int row = 0; row < fcsInfo.Rows; row++)
                        {
                            dataTable.Matrix[row][col] *= timeStep;
                        }
                    }
                }
            }

            IList <IRowSpec> rSpecList = dataTable.RowSpecList;

            for (int row = 0; row < fcsInfo.Rows; row++)
            {
                rSpecList[row].Id = row.ToString();
            }


            FileInfo fInfo     = new FileInfo(fileName);
            string   shortName = fInfo.Name.Substring(0, fInfo.Name.LastIndexOf('.'));

            if (saveMetaInfo >= 1)
            {
                SaveParameterTable(textTable, shortName);
                if (saveMetaInfo >= 2)
                {
                    textTable.SaveAsDataset(shortName + " (Text Seg)", "Text segement of data table " + shortName);
                }
            }

            if (autoCompensation)
            {
                try {
                    string sMatrix = null;
                    for (int i = 0; i < textTable.Rows; i++)
                    {
                        string id = textTable.RowSpecList[i].Id.ToLower();
                        if (id.StartsWith("$"))
                        {
                            id = id.Substring(1);
                        }
                        if ((id == "spill") || (id == "spillover"))
                        {
                            sMatrix = textTable.Matrix[i][0];
                            break;
                        }
                    }

                    if (sMatrix == null)
                    {
                        throw new Exception("");                   // silently ignore compensation as no compensation matrix available.
                    }
                    string[] fs        = sMatrix.Split(',');
                    int      dimension = int.Parse(fs[0]);

                    if (fs.Length != (dimension * dimension + dimension + 1))
                    {
                        throw new Exception("Invalid spill over matrix.");
                    }

                    INumberTable  cMatrix = app.New.NumberTable(dimension, dimension);
                    List <string> parList = fs.Skip(1).Take(dimension).ToList();

                    int idx;
                    if (parList.Count(id => int.TryParse(id, out idx)) == parList.Count)
                    {
                        // The columns are specified by a list of indexes. We convert them here to ids
                        for (int i = 0; i < parList.Count; i++)
                        {
                            parList[i] = dataTable.ColumnSpecList[int.Parse(parList[i]) - 1].Id;   // index starts with 1 !
                        }
                    }

                    for (int col = 0; col < cMatrix.Columns; col++)
                    {
                        cMatrix.ColumnSpecList[col].Id = parList[col];
                    }
                    int fsIdx = dimension + 1;
                    for (int row = 0; row < cMatrix.Rows; row++)
                    {
                        for (int col = 0; col < cMatrix.Columns; col++)
                        {
                            cMatrix.Matrix[row][col] = double.Parse(fs[fsIdx++]);
                        }
                    }

                    var cData = dataTable.SelectColumnsById(parList);
                    if (cData.Columns != parList.Count)
                    {
                        if (dataTable.ColumnSpecList.Count(cl => cl.Id.Equals("<" + parList[0] + ">")) == 1)
                        {
                            // siliently ignore aready compensated data.
                            throw new Exception("");
                        }
                        else
                        {
                            throw new Exception("Invalid spill over matrix: unknown names.");
                        }
                    }

                    var math = CustomImporter.App.GetMathAdaptor();
                    var m    = math.InvertMatrix((double[][])cMatrix.Matrix);
                    for (int row = 0; row < cMatrix.Rows; row++)
                    {
                        cMatrix.Matrix[row] = m[row];
                    }

                    cData = cData.Multiply(cMatrix);  // perform the comensation with the inverse matrix of the spill over matrix.
                    cData.CopyValuesTo(dataTable);
                    compensated = true;
                } catch (Exception ex) {
                    if (ex.Message != "")
                    {
                        MessageBox.Show("Cannot perform compensation: " + ex.Message);
                    }
                }
            }

            if (logTransform)
            {
                double[][] m = (double[][])dataTable.Matrix;

                double   T        = 262144;
                double   W        = 1.0;
                double   M        = 4.5;
                string[] settings = app.GetProperty(
                    "CustomImporter.Logicle.Settings", "262144; 1.0; 4.5").Split(';');
                if (settings.Length == 3)
                {
                    double.TryParse(settings[0], out T);
                    double.TryParse(settings[1], out W);
                    double.TryParse(settings[2], out M);
                }
                var    fastLogicle = new VisuMap.DataCleansing.FastLogicle(T, W, M);
                double maxVal      = fastLogicle.MaxValue;
                double minVal      = fastLogicle.MinValue;

                for (int col = 0; col < fcsInfo.Columns; col++)
                {
                    if (fcsInfo.ColumnInfo[col].LogTrans)
                    {
                        for (int row = 0; row < fcsInfo.Rows; row++)
                        {
                            m[row][col] = M * fastLogicle.scale(Math.Min(maxVal, Math.Max(minVal, m[row][col])));
                        }
                        logScaled = true;
                    }
                }
            }

            string msg = "Dataset imported from " + fileName + ". Version: " + fcsInfo.version;

            if (compensated)
            {
                msg += ", compensated";
            }
            if (logScaled)
            {
                msg += ", log-scaled";
            }
            string dsName = dataTable.SaveAsDataset(shortName, msg);

            app.Folder.OpenDataset(dsName);
            INumberTable nTable = app.GetNumberTable();


            List <IColumnSpec> csList = nTable.ColumnSpecList as List <IColumnSpec>;
            int fsc = csList.FindIndex(cs => cs.Id == "FSC-A");
            int ssc = csList.FindIndex(cs => cs.Id == "SSC-A");

            if ((fsc >= 0) && (ssc >= 0))
            {
                var xy = app.New.XyPlot(nTable);
                xy.Show();
                xy.XAxisIndex  = fsc;
                xy.YAxisIndex  = ssc;
                xy.AutoScaling = true;
                xy.Redraw();
                xy.CaptureMap();
                xy.Close();
                app.Map.Name = "FSC/SSC";
            }
            else
            {
                // We just do create a simple PCA view.
                IPcaView pca = nTable.ShowPcaView();
                pca.ResetView();
                pca.CaptureMap();
                app.Map.Redraw();
                pca.Close();
                app.Map.Name = "PCA-All";
            }

            app.Map.GlyphType = glyphName;
            app.Map.Redraw();

            if (showResult)
            {
                app.New.SpectrumBand(nTable).Show();
            }

            fcsInfo = null;
            return(true);
        }