Ejemplo n.º 1
0
        public AmericanGut(FileInfo file)
        {
            SampleId = file.Name;
            char[] sep1 = { '\t' };
            char[] sep2 = { ';' };
            _dataTable = DataInterfaces.GetTaxNameRankDataTable();
            var lines = File.ReadAllLines(file.FullName);

            //First line is the header
            for (var i = 1; i < lines.Length; i++)
            {
                if (!string.IsNullOrWhiteSpace(lines[i]))
                {
                    var parts1 = lines[i].Split(sep1, StringSplitOptions.RemoveEmptyEntries);
                    if (parts1.Length == 2)
                    {
                        var    rank  = "no rank";
                        var    name  = "";
                        double count = 0.0;
                        double.TryParse(parts1[1], out count);
                        count = count * 1000000;
                        var parts2 = parts1[0].Split(sep2, StringSplitOptions.RemoveEmptyEntries);
                        // We start with the last one and walk back.
                        for (var j = parts2.Length - 1; j > 0; j--)
                        {
                            var part = parts2[j];
                            if (part.Length > 5) //elimate empty ones, i.e. ;g__ with nothing
                            {
                                switch (part.Substring(0, 1))
                                {
                                case "k": rank = "kingdom"; break;

                                case "p": rank = "phylum"; break;

                                case "c": rank = "class"; break;

                                case "o": rank = "order"; break;

                                case "f": rank = "family"; break;

                                case "g": rank = "genus"; break;

                                case "s": rank = "species"; break;
                                }
                                name = part.Substring(3);
                                break;
                            }
                        }
                        if (name.Length > 3)
                        {
                            var row = _dataTable.NewRow();
                            row["tax_rank"]       = rank;
                            row["tax_name"]       = name;
                            row["BaseOneMillion"] = count;
                            _dataTable.Rows.Add(row);
                        }
                    }
                }
            }
        }
Ejemplo n.º 2
0
        public XenoGene(FileInfo file)
        {
            _dataTable     = DataInterfaces.GetTaxNameRankDataTable();
            SampleDateTime = DateTime.UtcNow;
            char[] lineSep           = { '\r', '\n' };
            char[] sep               = { ' ' };
            var    txt               = ReadPdfFile(file.FullName);
            var    lines             = txt.Split(lineSep, StringSplitOptions.RemoveEmptyEntries);
            string appliesToNextLine = "";
            int    lineNo            = 0;
            double amount            = 0.0;

            foreach (var line in lines)
            {
                var    skip = false;
                string rank = "";
                string name = "";
                if (line.IndexOf("ORDEN", StringComparison.OrdinalIgnoreCase) == 0)
                {
                    skip = true;
                }
                if (line.IndexOf("INFO", StringComparison.OrdinalIgnoreCase) == 0)
                {
                    skip = true;
                }
                if (line.IndexOf("REPORT", StringComparison.OrdinalIgnoreCase) == 0)
                {
                    skip = true;
                }
                if (line.IndexOf("Xeno", StringComparison.OrdinalIgnoreCase) == 0)
                {
                    skip = true;
                }

                if (line.IndexOf("Inscrita", StringComparison.OrdinalIgnoreCase) == 0)
                {
                    skip = true;
                }
                if (line.IndexOf("Pág", StringComparison.OrdinalIgnoreCase) == 0)
                {
                    skip = true;
                }
                if (!skip)
                {
                    var firstSpace = line.IndexOf(" ");
                    var firstComma = line.IndexOf(",");
                    var lineNoTest = firstSpace < 0 ? "na" : line.Substring(0, firstSpace);
                    if (int.TryParse(lineNoTest, out lineNo))
                    {
                        //If missing no number
                        if (firstComma < 0)
                        {
                            name = line;
                        }
                        else
                        {
                            var parts    = line.Split(sep, StringSplitOptions.RemoveEmptyEntries);
                            var lastItem = parts[parts.Length - 1];
                            if (lastItem.Contains(",")) //european number
                            {
                                lastItem = lastItem.Replace(",", ".");
                                if (double.TryParse(lastItem, out amount))
                                {
                                    amount = amount * 10000.0; //permission
                                    switch (parts[2])
                                    {
                                    case "G": rank = "genus"; break;

                                    case "G1": rank = "genus"; break;

                                    case "G2": rank = "genus"; break;

                                    case "C": rank = "class"; break;

                                    case "C1": rank = "class"; break;

                                    case "C2": rank = "class"; break;

                                    case "F": rank = "family"; break;

                                    case "S": rank = "species"; break;

                                    case "S1": rank = "strain"; break;

                                    case "S2": rank = "strain"; break;

                                    case "S3": rank = "strain"; break;

                                    case "O": rank = "Order"; break;

                                    case "F1": rank = "no rank"; break;

                                    case "F2": rank = "no rank"; break;

                                    case "F3": rank = "no rank"; break;

                                    case "O1": rank = "no rank"; break;

                                    case "O2": rank = "no rank"; break;

                                    case "O3": rank = "no rank"; break;

                                    case "P1": rank = "no rank"; break;

                                    case "D1": rank = "skip"; break;

                                    case "D2": rank = "skip"; break;

                                    case "D3": rank = "skip"; break;

                                    case "U": rank = "skip"; break;

                                    case "R": rank = "skip"; break;

                                    case "R1": rank = "skip"; break;

                                    case "P": rank = "phylum"; break;

                                    default:
                                        rank = "skip";
                                        Console.WriteLine($"Unknown {parts[2]}");
                                        break;
                                    }
                                    if (parts.Length > 4)
                                    {
                                        var sb = new StringBuilder(parts[3]);
                                        for (var p = 4; p < parts.Length - 1; p++)
                                        {
                                            sb.Append($" {parts[p]}");
                                        }
                                        name = sb.ToString();
                                    }

                                    if (!rank.Equals("skip", StringComparison.OrdinalIgnoreCase) && amount > 0.0 && !string.IsNullOrWhiteSpace(name))
                                    {
                                        Console.WriteLine(name);
                                        var row = _dataTable.NewRow();
                                        row["tax_rank"]       = rank;
                                        row["tax_name"]       = name;
                                        row["BaseOneMillion"] = amount;
                                        _dataTable.Rows.Add(row);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        appliesToNextLine = line;
                    }
                }
            }
            SampleId = file.Name;
        }