public AmericanGut(FileInfo file) { SampleId = file.Name; char[] sep1 = { '\t' }; char[] sep2 = { ';' }; _dataTable = DataInterfaces.GetTaxNameRankDataTable(); var lines = File.ReadAllLines(file.FullName); //First line is the header for (var i = 1; i < lines.Length; i++) { if (!string.IsNullOrWhiteSpace(lines[i])) { var parts1 = lines[i].Split(sep1, StringSplitOptions.RemoveEmptyEntries); if (parts1.Length == 2) { var rank = "no rank"; var name = ""; double count = 0.0; double.TryParse(parts1[1], out count); count = count * 1000000; var parts2 = parts1[0].Split(sep2, StringSplitOptions.RemoveEmptyEntries); // We start with the last one and walk back. for (var j = parts2.Length - 1; j > 0; j--) { var part = parts2[j]; if (part.Length > 5) //elimate empty ones, i.e. ;g__ with nothing { switch (part.Substring(0, 1)) { case "k": rank = "kingdom"; break; case "p": rank = "phylum"; break; case "c": rank = "class"; break; case "o": rank = "order"; break; case "f": rank = "family"; break; case "g": rank = "genus"; break; case "s": rank = "species"; break; } name = part.Substring(3); break; } } if (name.Length > 3) { var row = _dataTable.NewRow(); row["tax_rank"] = rank; row["tax_name"] = name; row["BaseOneMillion"] = count; _dataTable.Rows.Add(row); } } } } }
public XenoGene(FileInfo file) { _dataTable = DataInterfaces.GetTaxNameRankDataTable(); SampleDateTime = DateTime.UtcNow; char[] lineSep = { '\r', '\n' }; char[] sep = { ' ' }; var txt = ReadPdfFile(file.FullName); var lines = txt.Split(lineSep, StringSplitOptions.RemoveEmptyEntries); string appliesToNextLine = ""; int lineNo = 0; double amount = 0.0; foreach (var line in lines) { var skip = false; string rank = ""; string name = ""; if (line.IndexOf("ORDEN", StringComparison.OrdinalIgnoreCase) == 0) { skip = true; } if (line.IndexOf("INFO", StringComparison.OrdinalIgnoreCase) == 0) { skip = true; } if (line.IndexOf("REPORT", StringComparison.OrdinalIgnoreCase) == 0) { skip = true; } if (line.IndexOf("Xeno", StringComparison.OrdinalIgnoreCase) == 0) { skip = true; } if (line.IndexOf("Inscrita", StringComparison.OrdinalIgnoreCase) == 0) { skip = true; } if (line.IndexOf("Pág", StringComparison.OrdinalIgnoreCase) == 0) { skip = true; } if (!skip) { var firstSpace = line.IndexOf(" "); var firstComma = line.IndexOf(","); var lineNoTest = firstSpace < 0 ? "na" : line.Substring(0, firstSpace); if (int.TryParse(lineNoTest, out lineNo)) { //If missing no number if (firstComma < 0) { name = line; } else { var parts = line.Split(sep, StringSplitOptions.RemoveEmptyEntries); var lastItem = parts[parts.Length - 1]; if (lastItem.Contains(",")) //european number { lastItem = lastItem.Replace(",", "."); if (double.TryParse(lastItem, out amount)) { amount = amount * 10000.0; //permission switch (parts[2]) { case "G": rank = "genus"; break; case "G1": rank = "genus"; break; case "G2": rank = "genus"; break; case "C": rank = "class"; break; case "C1": rank = "class"; break; case "C2": rank = "class"; break; case "F": rank = "family"; break; case "S": rank = "species"; break; case "S1": rank = "strain"; break; case "S2": rank = "strain"; break; case "S3": rank = "strain"; break; case "O": rank = "Order"; break; case "F1": rank = "no rank"; break; case "F2": rank = "no rank"; break; case "F3": rank = "no rank"; break; case "O1": rank = "no rank"; break; case "O2": rank = "no rank"; break; case "O3": rank = "no rank"; break; case "P1": rank = "no rank"; break; case "D1": rank = "skip"; break; case "D2": rank = "skip"; break; case "D3": rank = "skip"; break; case "U": rank = "skip"; break; case "R": rank = "skip"; break; case "R1": rank = "skip"; break; case "P": rank = "phylum"; break; default: rank = "skip"; Console.WriteLine($"Unknown {parts[2]}"); break; } if (parts.Length > 4) { var sb = new StringBuilder(parts[3]); for (var p = 4; p < parts.Length - 1; p++) { sb.Append($" {parts[p]}"); } name = sb.ToString(); } if (!rank.Equals("skip", StringComparison.OrdinalIgnoreCase) && amount > 0.0 && !string.IsNullOrWhiteSpace(name)) { Console.WriteLine(name); var row = _dataTable.NewRow(); row["tax_rank"] = rank; row["tax_name"] = name; row["BaseOneMillion"] = amount; _dataTable.Rows.Add(row); } } } } } else { appliesToNextLine = line; } } } SampleId = file.Name; }