public static List <CleavageSite> ReadCleavageSite(string file) { string[] lines = FileExtension.ReadList(file); List <CleavageSite> sites = new List <CleavageSite>(); foreach (string line in lines) { var arr = line.Split(','); if (!arr[2].IsDigit()) { continue; } CleavageSite site = new CleavageSite(); site.MiRNA = arr[0]; site.Gene = arr[1]; site.StartAt = int.Parse(arr[2]); //int extendLeft = site.StartAt - 1; //int extendRight = Gene.GetCount(site.Gene) - (site.StartAt - 1 + 21); //site.Extendability = Math.Min(extendLeft, extendRight); sites.Add(site); } return(sites); }
public static void ToFasta(string keyFile, string fastaFile) { List <CleavageSite> sites = new List <CleavageSite>(); string content = string.Empty; using (StreamReader sr = new StreamReader(keyFile)) { content = sr.ReadToEnd(); } var lines = content.Split(new char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (string line in lines) { var arr = line.Split('_'); if (!arr[2].IsDigit()) { continue; } CleavageSite site = new CleavageSite(); site.MiRNA = arr[0]; site.Gene = arr[1]; site.StartAt = int.Parse(arr[2]); sites.Add(site); } using (StreamWriter sr = new StreamWriter(fastaFile)) { foreach (CleavageSite site in sites) { string id = ">" + site.MiRNA + ":[" + site.StartAt.ToString() + "," + site.EndAt.ToString() + "]"; } } }
private static void GenerateShapeFile(CleavageSite site, int startAt, int endAt, string fileName) { string content = string.Empty; for (int position = startAt; position < endAt; position++) { content += $"{position - startAt + 1} {Reactivity.GetReactivity(site.Gene, position)}\r\n"; } FileExtension.Save(content, fileName); }
/// <summary> /// Generate the cleavage sites and sort it by the extendability /// </summary> public static void GenerateCleavegeSites() { string[] lines = FileExtension.ReadList(Config.AllCleavegSiteFile); Console.WriteLine($"Total Cleavage Sites:{lines.Length}"); List <CleavageSite> sites = new List <CleavageSite>(); foreach (string line in lines) { var arr = line.Split('_'); if (!arr[2].IsDigit()) { continue; } if (!Config.ValidNames.Contains(arr[1])) { continue; } CleavageSite site = new CleavageSite(); site.MiRNA = arr[0]; site.Gene = arr[1]; site.StartAt = int.Parse(arr[2]); int extendLeft = site.StartAt - 1; int extendRight = Gene.GetCount(site.Gene) - (site.StartAt - 1 + 21); site.Extendability = Math.Min(extendLeft, extendRight); sites.Add(site); } sites.Sort((a, b) => - a.Extendability.CompareTo(b.Extendability)); //create file StringBuilder site_content = new StringBuilder(); foreach (var site in sites) { site_content.AppendLine(site.ToString()); } FileExtension.Save(site_content.ToString(), Config.WorkingFolder + "\\cleavage_sites_repeat.csv"); List <CleavageSite> combinedSites = Combine(sites); //create file StringBuilder combined_site_content = new StringBuilder(); foreach (var site in combinedSites) { combined_site_content.AppendLine(site.ToString()); } FileExtension.Save(combined_site_content.ToString(), Config.WorkingFolder + "\\cleavage_sites.csv"); }
public static bool HasEfficiency_21(CleavageSite site, DegradomeType dType) { var dict = LoadDict(dType); for (int i = 0; i <= 21; i++) { if (dict[site.Gene].ContainsKey(site.StartAt - 1 + i)) { return(true); } } return(false); }
public static CleavageSite Parse(string s) { CleavageSite site = new CleavageSite(); var arr = s.Split(new char[] { '_', ',' }); site.MiRNA = arr[0]; site.Gene = arr[1]; site.StartAt = int.Parse(arr[2]); if (arr.Length > 3) { site.Extendability = int.Parse(arr[3]); } return(site); }
public static void GenerateCleavageSiteFiles_Yang() { List <CleavageSite> sites = new List <CleavageSite>(); string[] lines = FileExtension.ReadList(Config.WorkingFolder + "cleavage_sites.csv"); Console.WriteLine($"Total Cleavage Sites:{lines.Length}"); foreach (string line in lines) { var arr = line.Split('_'); CleavageSite site = CleavageSite.Parse(line); sites.Add(site); } Console.WriteLine($"After filtered by clevage efficiency and reactivity:{sites.Count}"); //Generate 4 files filtered by 4 degradome datasets. foreach (DegradomeType dType in EnumUtil.GetValues <DegradomeType>()) { StringBuilder site_content_1 = new StringBuilder(); StringBuilder site_content_3 = new StringBuilder(); StringBuilder site_content_4 = new StringBuilder(); foreach (CleavageSite site in sites) { //if (site.Extendability < 50) //{ // break; //} float efficiency = Efficiency.GetEfficiency(site, dType); if (efficiency > 0) { site_content_1.AppendLine(site.ToStringWithMiRNANames()); } else if (Efficiency.HasEfficiency_21(site, dType)) { site_content_3.AppendLine(site.ToStringWithMiRNANames()); } else if (Efficiency.HasEfficiency_Gene(site, dType)) { site_content_4.AppendLine(site.ToStringWithMiRNANames()); } } FileExtension.Save(site_content_1.ToString(), Config.WorkingFolder + "\\cleavage_sites_" + dType + "_1.csv"); FileExtension.Save(site_content_3.ToString(), Config.WorkingFolder + "\\cleavage_sites_" + dType + "_3.csv"); FileExtension.Save(site_content_4.ToString(), Config.WorkingFolder + "\\cleavage_sites_" + dType + "_4.csv"); } }
private static List <CleavageSite> Combine(List <CleavageSite> sites) { List <CleavageSite> result = new List <CleavageSite>(); var groups = from site in sites group site by site.Gene + site.StartAt into newGroup select newGroup; foreach (var group in groups) { CleavageSite cs = new CleavageSite(); cs.Extendability = group.First().Extendability; cs.EndAt = group.First().EndAt; cs.Gene = group.First().Gene; cs.StartAt = group.First().StartAt; cs.MiRNA = string.Join("|", group.ToList().Select(c => c.MiRNA)); result.Add(cs); } return(result); }
public static bool HasEfficiency_Gene(CleavageSite site, DegradomeType dType) { var dict = LoadDict(dType); return(dict[site.Gene].Count > 0); }
public static float GetEfficiency(CleavageSite site, DegradomeType dType) { return(GetEfficiency(site.Gene, site.StartAt - 1 + 10, site.StartAt - 1 + 11, dType)); }
public static void GenerateStructureFiles() { if (!Directory.Exists(Config.CsStrucFolder)) { Directory.CreateDirectory(Config.CsStrucFolder); } foreach (DegradomeType dType in EnumUtil.GetValues <DegradomeType>()) { string degDir = Config.CsStrucFolder + dType.ToString(); if (!Directory.Exists(degDir)) { Directory.CreateDirectory(degDir); } string[] cleavageSiteSList = FileExtension.ReadList($"{Config.WorkingFolder}\\cleavage_site_{dType}.csv"); foreach (int extend in new int[] { 25, 50 }) { int length = extend * 2 + 21; //$"{dir}\\{length} string lengthDir = $"{degDir}\\{length}"; if (!Directory.Exists(lengthDir)) { Directory.CreateDirectory(lengthDir); } List <string> dotBrackets = new List <string>(); for (int i = 0; i < cleavageSiteSList.Length; i++) { if (string.IsNullOrWhiteSpace(cleavageSiteSList[i])) { continue; } CleavageSite site = CleavageSite.Parse(cleavageSiteSList[i]); //Generate .seq file int startAt = site.StartAt - 1 - extend; //Check if the extended cleavage site is available //(the start and ending points are legal) if (startAt < 0) { Console.WriteLine(startAt); continue; } int endAt = site.StartAt - 1 + extend + 21; string fullSequence = Gene.GetSequence(site.Gene); //check if the ending point is reasonable if (endAt > fullSequence.Length) { Console.WriteLine(endAt); continue; } string cleavageSiteSequence = fullSequence.Substring(startAt, endAt - startAt); string seqFileContent = $">{site.Gene}[{startAt},{endAt}]\r\n{cleavageSiteSequence.Replace("U", "T")}"; FileExtension.Save(seqFileContent, $"{degDir}\\{length}\\{i}.seq"); //generate the .shape file GenerateShapeFile(site, startAt, endAt, $"{degDir}\\{length}\\{i}.shape"); //generate .db file //Use ViennaRNA/RNAFold to do that. //RNAFold--shape = ap2.shape < ap2.seq string dotBracket = RnaFoldWrapper.Fold($"{degDir}\\{length}\\{i}.shape", $"{degDir}\\{length}\\{i}.seq"); dotBrackets.Add(dotBracket); //availableCleavageSites.Add(cleavage_site_sList[i]); } FileExtension.SaveList($"{Config.CsStrucFolder}\\cs_structure_{length}_{dType}.txt", dotBrackets); } } GenerateRnaStructPlots(); }
public static void GenerateCleavageSiteFiles() { List <CleavageSite> sites = new List <CleavageSite>(); string[] lines = FileExtension.ReadList(Config.WorkingFolder + "cleavage_sites.csv"); Console.WriteLine($"Total Cleavage Sites:{lines.Length}"); foreach (string line in lines) { var arr = line.Split('_'); CleavageSite site = CleavageSite.Parse(line); sites.Add(site); } Console.WriteLine($"After filtered by cleavage efficiency and reactivity:{sites.Count}"); //Generate 4 files filtered by 4 degradome datasets. foreach (DegradomeType dType in EnumUtil.GetValues <DegradomeType>()) { string site_content = string.Empty; string efficiency_content = string.Empty; string efficiency_log_content = string.Empty; string reactivity_content = string.Empty; string reactivity_25_content = string.Empty; string reactivity_50_content = string.Empty; //string reactivity_75_content = string.Empty; //string reactivity_100_content = string.Empty; foreach (CleavageSite site in sites) { if (site.Extendability < 50) { break; } //if the cleavage efficiency is zero, //we can say that this is not a cleavage site. //filter it out. float efficiency = Efficiency.GetEfficiency(site, dType); if (efficiency == 0) { continue; } site_content += site.ToString() + "\n"; efficiency_content += efficiency.ToString() + "\n"; efficiency_log_content += Math.Log(efficiency).ToString() + "\n"; for (int j = 0; j < 21; j++) { reactivity_content += Reactivity.GetReactivity(site.Gene, site.StartAt - 1 + j) + ","; } for (int j = 0 - 25; j < 21 + 25; j++) { reactivity_25_content += Reactivity.GetReactivity(site.Gene, site.StartAt - 1 + j) + ","; } for (int j = 0 - 50; j < 21 + 50; j++) { reactivity_50_content += Reactivity.GetReactivity(site.Gene, site.StartAt - 1 + j) + ","; } reactivity_content = reactivity_content.TrimEnd(',') + "\n"; reactivity_25_content = reactivity_25_content.TrimEnd(',') + "\n"; reactivity_50_content = reactivity_50_content.TrimEnd(',') + "\n"; } FileExtension.Save(site_content, Config.WorkingFolder + "\\cleavage_site_" + dType + ".csv"); FileExtension.Save(efficiency_content, Config.WorkingFolder + "\\cs_efficiencies_" + dType + ".csv"); FileExtension.Save(efficiency_log_content, Config.WorkingFolder + "\\cs_efficiencies_log_" + dType + ".csv"); FileExtension.Save(reactivity_content, Config.WorkingFolder + "\\cs_reactivity_" + dType + ".csv"); FileExtension.Save(reactivity_25_content, Config.WorkingFolder + "\\cs_reactivity_" + dType + "_25.csv"); FileExtension.Save(reactivity_50_content, Config.WorkingFolder + "\\cs_reactivity_" + dType + "_50.csv"); //FileExtension.Save(reactivity_75_content, Config.WorkingFoler + "\\cs_reactivity_" + dType + "_75.csv"); //FileExtension.Save(reactivity_100_content, Config.WorkingFoler + "\\cs_reactivity_" + dType + "_100.csv"); } }