Ejemplo n.º 1
0
    public void NextGeneration()
    {
        CalculateFitness();
        generation++;

        survivors.Clear();
        List <Genome> children = new List <Genome>();

        int numChampions = (int)(GenomeUtils.POP_SIZE * GenomeUtils.CHAMPION_RATE);
        int numASexual   = (int)(GenomeUtils.POP_SIZE * GenomeUtils.ASEXUAL_RATE);

        //The top 10% of nnets will be reproduced untouched
        for (int i = 0; i < numChampions; i++)
        {
            Genome champ = GenomeUtils.Clone(genomes[i]);
            children.Add(champ);
            survivors.Add(champ);
        }

        //In each generation, 25% of offspring resulted from mutation without crossover.
        for (int i = 0; i < numASexual; i++)
        {
            children.Add(GenomeUtils.Clone(genomes[i]));
        }

        //Remaining will be crossovers
        for (int i = children.Count; i < GenomeUtils.POP_SIZE; i++)
        {
            //pick random species
            Species s = species[Random.Range(0, species.Count)];

            Genome parent1 = s.GetRandomGenome();
            Genome parent2 = s.GetRandomGenome();

            children.Add(GenomeUtils.Crossover(parent1, parent2));
        }

        //Mutate all children
        for (int i = (int)(GenomeUtils.POP_SIZE * GenomeUtils.CHAMPION_RATE); i < children.Count; i++)
        {
            if (Random.value < GenomeUtils.MUTATION_RATE)
            {
                children[i].Mutate();
            }
            if (Random.value < GenomeUtils.ADD_CONNECTION_RATE)
            {
                children[i].AddConnectionMutation();
            }
            if (Random.value < GenomeUtils.ADD_NODE_RATE)
            {
                children[i].AddNodeMutation();
            }
        }

        genomes = children;


        SetSpecies();
        MakeNNets();
    }
Ejemplo n.º 2
0
    void SetSpecies()
    {
        species.Clear();
        speciesMap.Clear();

        foreach (Genome g in genomes)
        {
            bool match = false;
            foreach (Species s in species)
            {
                if (GenomeUtils.CompatibilityDistance(g, s.mascot, C1, C2, C3) < GenomeUtils.SPECIES_DIST)
                {
                    s.AddMember(g);
                    speciesMap.Add(g, s);
                    match = true;
                    break;
                }
            }
            if (!match)
            {
                Species newSpecies = new Species(g);
                species.Add(newSpecies);
                speciesMap.Add(g, newSpecies);
            }
        }


        Debug.Log("Gen: " + generation + ", Population: " + population + ", Species: " + species.Count);
    }
Ejemplo n.º 3
0
    void GetTargetDestination()
    {
        if (mode == Mode.AI)
        {
            float[] output = brain.GetOutput();

            float angle = (output[0]) * 2 * Mathf.PI;
            float speed = GenomeUtils.Sigmoid(output[1]);
            //float speed = 1f;
            SetAcceleration(angle, speed);
            if (output.Length > 2)
            {
                float splitUrge = GenomeUtils.Sigmoid(output[2]);
                if (splitUrge >= 0.8f)
                {
                    Split();
                }
            }
        }
        else
        {
            Vector3 worldPosition = Camera.main.ScreenToWorldPoint(Input.mousePosition);
            float   xDiff         = worldPosition.x - player.x;
            float   yDiff         = worldPosition.y - player.y;

            float angle    = Mathf.Atan2(yDiff, xDiff);
            float distance = Mathf.Sqrt(xDiff * xDiff + yDiff * yDiff);


            distance = Mathf.Clamp(distance, 0f, 1f);

            SetAcceleration(angle, distance);
        }
    }
Ejemplo n.º 4
0
 public static void SortTRna(this List<FeatureItemGroup> items)
 {
   if (items.All(m => TRNA.Match(m.Name).Success))
   {
     GenomeUtils.SortChromosome(items, m => TRNA.Match(m.Name).Groups[1].Value,
       m => int.Parse(TRNA.Match(m.Name).Groups[2].Value));
   }
 }
        public override IEnumerable <string> Process()
        {
            var data = new CnMOPsItemReader().ReadFromFile(options.InputFile);

            Dictionary <string, List <ItemRange> > result = MergeRange(data);
            var seqnames = result.Keys.OrderBy(m => m).ToList();

            GenomeUtils.SortChromosome(seqnames, l => l, l => 1);

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("seqname\tstart\tend\tlocus\tsample\tsample_start\tsample_end\tsample_type");

                foreach (var seqname in seqnames)
                {
                    var ranges = result[seqname];
                    foreach (var range in ranges)
                    {
                        foreach (var cn in range.Items)
                        {
                            if (options.IgnoreCN1CN3 && (cn.CN.Equals("CN1") || cn.CN.Equals("CN3")))
                            {
                                continue;
                            }
                            sw.WriteLine("{0}\t{1}\t{2}\t{0}:{1}-{2}\t{3}\t{4}\t{5}\t{6}",
                                         seqname, range.Start, range.End, cn.FileName, cn.Start, cn.End, cn.CN);
                        }
                    }
                }
            }

            var filenames = (from d in data select d.FileName).Distinct().OrderBy(l => l).ToArray();

            using (var sw = new StreamWriter(options.OutputFile + ".cnvr"))
            {
                sw.WriteLine("seqname\tstart\tend\tfile\t{0}", filenames.Merge("\t"));

                foreach (var seqname in seqnames)
                {
                    var ranges = result[seqname];
                    foreach (var range in ranges)
                    {
                        var cns = (from filename in filenames
                                   let cn = range.Items.Where(l => l.FileName.Equals(filename)).FirstOrDefault()
                                            select cn == null ? "CN2" : cn.CN).ToArray();
                        if (options.IgnoreCN1CN3 && cns.All(l => l.Equals("CN1") || l.Equals("CN2") || l.Equals("CN3")))
                        {
                            continue;
                        }

                        sw.WriteLine("{0}\t{1}\t{2}\t{0}_{1}_{2}\t{3}",
                                     seqname, range.Start, range.End, cns.Merge("\t"));
                    }
                }
            }

            return(new[] { options.OutputFile, options.OutputFile + ".cnvr" });
        }
Ejemplo n.º 6
0
    //Add a random connection between two nodes
    public void AddConnectionMutation()
    {
        List <NodeGene> values = Enumerable.ToList(nodes.Values);
        NodeGene        node1  = values[Random.Range(0, values.Count)];
        NodeGene        node2  = values[Random.Range(0, values.Count)];

        if (node1.type == node2.type && node1.type != NodeGene.Type.Hidden)
        {
            //try again
            AddConnectionMutation();
            return;
        }

        bool reversed = false;

        if (node1.type == NodeGene.Type.Hidden && node2.type == NodeGene.Type.Input)
        {
            reversed = true;
        }
        else if (node1.type == NodeGene.Type.Output && node2.type == NodeGene.Type.Input)
        {
            reversed = true;
        }
        else if (node1.type == NodeGene.Type.Output && node2.type == NodeGene.Type.Hidden)
        {
            reversed = true;
        }


        float weight = GenomeUtils.RandomWeight();

        bool connectionExists = false;

        foreach (ConnectionGene con in connections.Values)
        {
            if (con.inNode == node1.id && con.outNode == node2.id || con.inNode == node2.id && con.outNode == node1.id)
            {
                connectionExists = true;
                break;
            }
        }

        if (connectionExists)
        {
            return;
        }

        ConnectionGene newCon = new ConnectionGene(reversed? node2.id : node1.id, reversed? node1.id : node2.id, weight, true, Counter.NextConnection());

        connections.Add(newCon.innovation, newCon);
    }
Ejemplo n.º 7
0
 public void Mutate()
 {
     foreach (ConnectionGene con in connections.Values)
     {
         if (Random.value < GenomeUtils.PERTURB_RATE) //TODO CHANGE TO NORMAL DISTRIBUTION
         {
             con.weight = con.weight += Random.Range(GenomeUtils.WEIGHT_MIN / 2, GenomeUtils.WEIGHT_MAX / 2);
             //con.weight = Mathf.Clamp(con.weight, GenomeUtils.WEIGHT_MIN, GenomeUtils.WEIGHT_MAX);
         }
         else
         {
             con.weight = GenomeUtils.RandomWeight();
         }
     }
 }
        public override IEnumerable <string> Process()
        {
            Progress.SetMessage("Reading sequences from: " + _options.InputFile + "...");
            var seqs = SequenceUtils.Read(_options.InputFile);

            seqs.Sort((m1, m2) =>
            {
                var chr1    = m1.Name.StringBefore("_").StringAfter("chr");
                var suffix1 = m1.Name.Contains("_") ? m1.Name.StringAfter("_") : string.Empty;
                var chr2    = m2.Name.StringBefore("_").StringAfter("chr");
                var suffix2 = m2.Name.Contains("_") ? m2.Name.StringAfter("_") : string.Empty;

                if (string.IsNullOrWhiteSpace(suffix1))
                {
                    if (string.IsNullOrWhiteSpace(suffix2))
                    {
                        return(GenomeUtils.CompareChromosome(chr1, chr2));
                    }
                    else
                    {
                        return(-1);
                    }
                }
                else
                {
                    if (string.IsNullOrWhiteSpace(suffix2))
                    {
                        return(1);
                    }
                    else
                    {
                        var ret = GenomeUtils.CompareChromosome(chr1, chr2);
                        if (ret == 0)
                        {
                            ret = suffix1.CompareTo(suffix2);
                        }
                        return(ret);
                    }
                }
            });

            Progress.SetMessage("Writing sequences to: " + _options.OutputFile + "...");
            SequenceUtils.Write(new FastaFormat(), _options.OutputFile, seqs);

            Progress.SetMessage("Finished.");

            return(new[] { _options.OutputFile });
        }
Ejemplo n.º 9
0
    private void AssignSpecies()
    {
        speciesMap = new Dictionary <Genome, Species>();
        foreach (Genome gen in genomes)
        {
            bool found = false;
            foreach (Species species in speciesList)
            {
                float distance = GenomeUtils.CompatiblityDistance(gen, species.GetMascot(), C1, C2, C3);
                if (distance < compatiblityThreshold)
                {
                    species.AddMember(gen);
                    speciesMap.Add(gen, species);
                    found = true;
                    break;
                }
            }

            if (!found)
            {
                Species species = new Species(gen);
                speciesList.Add(species);
                speciesMap.Add(gen, species);
            }
        }

        System.Random r = new System.Random();

        for (int i = speciesList.Count - 1; i >= 0; i--)
        {
            if (speciesList[i].GetCount() == 0)
            {
                speciesList.RemoveAt(i);
            }
            else
            {
                speciesList[i].RandomizeMascot(r);
            }
        }

        //Debug.Log("Gen: " + generation + ", Population: " + population + ", Species: " + speciesList.Count);
    }
        public static Dictionary <string, List <ItemRange> > MergeRange(List <CnMOPsItem> data)
        {
            GenomeUtils.SortChromosome(data, m => m.Seqname, m => m.Start);

            Dictionary <string, List <ItemRange> > result = new Dictionary <string, List <ItemRange> >();

            foreach (var d in data)
            {
                if (!result.ContainsKey(d.Seqname))
                {
                    result[d.Seqname] = new List <ItemRange>();
                }

                var ranges = result[d.Seqname];

                bool bFound = false;
                foreach (var range in ranges)
                {
                    if (range.Overlap(d, 0))
                    {
                        range.Items.Add(d);
                        range.Start = Math.Min(range.Start, d.Start);
                        range.End   = Math.Max(range.End, d.End);
                        bFound      = true;
                        break;
                    }
                }

                if (!bFound)
                {
                    var range = new ItemRange();
                    range.Items.Add(d);
                    range.Seqname = d.Seqname;
                    range.Start   = d.Start;
                    range.End     = d.End;
                    ranges.Add(range);
                }
            }

            return(result);
        }
Ejemplo n.º 11
0
 public void ReceiveValue()
 {
     value = 0;
     if (activation == GenomeUtils.Activation.Multiply)
     {
         value = 1;
     }
     foreach (Connection con in inConnections)
     {
         if (activation == GenomeUtils.Activation.Multiply)
         {
             value *= con.value * con.weight;
         }
         else
         {
             value += con.value * con.weight;
         }
         con.Reset();
     }
     value = GenomeUtils.Activate(value, activation);
 }
Ejemplo n.º 12
0
    public Genome(int inputNodes, int outputNodes)
    {
        Counter.Reset();

        nodes       = new Dictionary <int, NodeGene>();
        connections = new Dictionary <int, ConnectionGene>();

        for (int i = 0; i < inputNodes; i++)
        {
            AddNodeGene(new NodeGene(NodeGene.Type.Input, Counter.NextNode(), GenomeUtils.Activation.None));
        }

        for (int j = inputNodes + 1; j <= inputNodes + outputNodes; j++)
        {
            AddNodeGene(new NodeGene(NodeGene.Type.Output, Counter.NextNode(), GenomeUtils.Activation.None));
            for (int i = 1; i <= inputNodes; i++)
            {
                float weight = GenomeUtils.RandomWeight();
                AddConnectionGene(new ConnectionGene(i, j, weight, true, Counter.NextConnection()));
            }
        }
    }
Ejemplo n.º 13
0
        public override IEnumerable <string> Process()
        {
            var result = new List <string>();

            var bimfile = Path.ChangeExtension(options.InputFile, ".bim");

            var snps = PlinkLocus.ReadFromBimFile(bimfile, false, false);

            snps.RemoveAll(m => IsIndel(m) || IsMissing(m));

            var snpItems = (from snp in snps
                            select new SNPItem()
            {
                Chrom = snp.Chromosome,
                Name = snp.MarkerId,
                Position = snp.PhysicalPosition,
                Allele1 = snp.Allele1[0],
                Allele2 = snp.Allele2
            }).ToList();

            var nameMap = snpItems.FillDbsnpIdByPosition(options.DbsnpFile, this.Progress);

            using (var sw = new StreamWriter(options.OutputPrefix + ".namemap"))
            {
                sw.WriteLine("NewName\tOldName");
                foreach (var n in nameMap)
                {
                    sw.WriteLine("{0}\t{1}", n.Key, n.Value);
                }
            }

            //remove all snps without corresponding dbsnp entry
            snpItems.RemoveAll(m => m.DbsnpRefAllele == ' ');

            var nameDic = snpItems.ToGroupDictionary(m => m.Name);

            foreach (var n in nameDic)
            {
                if (n.Value.Count > 1)
                {
                    Console.Error.WriteLine("Duplicated SNP:" + n.Key);
                    foreach (var v in n.Value)
                    {
                        Console.Error.WriteLine("{0}:{1}-{2}:{3},{4}:{5},{6}", n.Key, v.Chrom, v.Position, v.Allele1, v.Allele2, v.DbsnpRefAllele, v.DbsnpAltAllele);
                    }
                }
            }

            if (File.Exists(options.G1000File))
            {
                snpItems.FindAllele2FrequencyFrom1000GomeByName(options.G1000File, this.Progress);
            }

            if (File.Exists(options.FastaFile))
            {
                snpItems.FillReferenceAlleleFromFasta(options.FastaFile, this.Progress);
            }

            Dictionary <string, StrandAction> actionMap = new Dictionary <string, StrandAction>();

            var statFile = options.OutputPrefix + ".stat";

            result.Add(statFile);
            using (var sw = new StreamWriter(statFile))
            {
                sw.WriteLine("Name\tChromosome\tPosition\tSource_Allele1\tSource_Allele2\tReference_Allele\tDbsnp_RefAllele\tDbsnp_AltAllele\tDbsnp_IsReversed\tG1000_RefAllele\tG1000_AltAllele\tG1000_MAF\tAction");

                foreach (var v in snpItems)
                {
                    StrandAction action = v.SuggestAction();
                    sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11:0.####}\t{12}", v.Name, v.Chrom, v.Position, v.Allele1, v.Allele2, v.RefChar, v.DbsnpRefAllele, v.DbsnpAltAllele, v.DbsnpIsReversed, v.G1000Allele1, v.G1000Allele2, v.G1000Allele2Frequency, action);
                    actionMap[v.Name] = action;
                }
            }

            using (var reader = new PlinkBedRandomFile(options.InputFile)
            {
                Progress = this.Progress
            })
            {
                var data = reader.Data;

                var chrs = (from v in snpItems select v.Chrom).Distinct().OrderBy(m => m).ToArray();
                foreach (var chr in chrs)
                {
                    var genfile = string.Format("{0}.{1}.gen", options.OutputPrefix, chr.ToString().PadLeft(2, '0'));
                    result.Add(genfile);
                    var map = FileUtils.ChangeExtension(genfile, ".sample");

                    new GwasSampleFormat().WriteToFile(map, data.Individual);

                    //save gen file
                    using (var sw = new StreamWriter(genfile))
                    {
                        sw.NewLine = Environment.NewLine;
                        var chrItems = snpItems.Where(m => m.Chrom == chr).ToList();
                        GenomeUtils.SortChromosome(chrItems, m => chr.ToString(), m => m.Position);
                        foreach (var snp in chrItems)
                        {
                            var ldata  = reader.Read(nameMap[snp.Name]);
                            var action = actionMap[snp.Name];

                            sw.Write("{0} {1} {2} {3} {4}", snp.Chrom, snp.Name, snp.Position, snp.DbsnpRefAllele, snp.DbsnpAltAllele);
                            for (int individualIndex = 0; individualIndex < data.Individual.Count; individualIndex++)
                            {
                                if (PlinkData.IsMissing(ldata[0, individualIndex], ldata[1, individualIndex]))
                                {
                                    sw.Write(" 0 0 0");
                                }
                                else
                                {
                                    char alle1, alle2;
                                    if (StrandAction.Switch == action || StrandAction.FlipSwitch == action)
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpAltAllele : snp.DbsnpRefAllele;
                                    }
                                    else
                                    {
                                        alle1 = ldata[0, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                        alle2 = ldata[1, individualIndex] ? snp.DbsnpRefAllele : snp.DbsnpAltAllele;
                                    }

                                    if (alle1 != alle2)
                                    {
                                        sw.Write(" 0 1 0");
                                    }
                                    else if (alle1 == snp.DbsnpRefAllele)
                                    {
                                        sw.Write(" 1 0 0");
                                    }
                                    else
                                    {
                                        sw.Write(" 0 0 1");
                                    }
                                }
                            }
                            sw.WriteLine();
                        }
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 14
0
    public void LoadGenome(string filename, bool all)
    {
        string path = filename;

        Genome genome = new Genome();

        using (StreamReader reader = new StreamReader(path))
        {
            bool   node       = false;
            bool   connection = false;
            string line;
            while ((line = reader.ReadLine()) != null)
            {
                if (line.Equals("Nodes"))
                {
                    node       = true;
                    connection = false;
                }
                else if (line.Equals("Connections"))
                {
                    node       = false;
                    connection = true;
                }
                else
                {
                    string[] info = line.Split(',');

                    if (node)
                    {
                        int                    id         = int.Parse(info[0]);
                        NodeGene.Type          type       = (NodeGene.Type)System.Enum.Parse(typeof(NodeGene.Type), info[1]);
                        GenomeUtils.Activation activation = (GenomeUtils.Activation)System.Enum.Parse(typeof(GenomeUtils.Activation), info[2]);
                        genome.AddNodeGene(new NodeGene(type, id, activation));
                        Counter.SetNodeCounter(id);
                    }
                    else if (connection)
                    {
                        int   innovation = int.Parse(info[0]);
                        bool  expressed  = bool.Parse(info[1]);
                        int   inNode     = int.Parse(info[2]);
                        int   outNode    = int.Parse(info[3]);
                        float weight     = float.Parse(info[4]);
                        genome.AddConnectionGene(new ConnectionGene(inNode, outNode, weight, expressed, innovation));
                        Counter.SetConnectionCounter(innovation);
                    }
                    else
                    {
                        Debug.LogError("Invalid genome file");
                    }
                }
            }
        }

        genomes.Clear();
        genomes.Add(genome);

        if (all)
        {
            for (int i = 1; i < GenomeUtils.POP_SIZE; i++)
            {
                genomes.Add(GenomeUtils.Clone(genome));
            }
        }
        else
        {
            for (int i = 1; i < GenomeUtils.POP_SIZE; i++)
            {
                genomes.Add(new Genome(inputNodes, outputNodes));
            }
        }

        SetSpecies();
        MakeNNets();
    }
        public override IEnumerable <string> Process()
        {
            var items = ReadGtfItems();

            items.RemoveAll(m => m.Feature.Equals("region"));
            for (int i = items.Count - 1; i > 0; i--)
            {
                for (int j = i - 1; j >= 0; j--)
                {
                    var res = items[i].Contains(items[j]);
                    if (res == -1)
                    {
                        items.RemoveAt(i);
                        break;
                    }
                }
            }

            var groups = items.ToGroupDictionary(m => m.GetLocation());

            foreach (var g in groups.Values)
            {
                if (g.Any(l => !IsCDS(l) && !IsExon(l) && !IsGene(l)))
                {
                    g.RemoveAll(l => IsCDS(l) || IsExon(l) || IsGene(l));
                    g.ForEach(l =>
                    {
                        l.Name = l.Feature + ":" + l.Attributes.StringAfter("ID=").StringBefore(";");
                        if (l.Attributes.Contains("product="))
                        {
                            var product = l.Attributes.StringAfter("product=").StringBefore(";");
                            if (!product.Contains(" "))
                            {
                                l.Name = l.Name + ":" + product;
                            }
                        }
                    });
                }
                else
                {
                    if (g.Any(l => IsCDS(l)))
                    {
                        g.RemoveAll(l => IsGene(l));
                    }
                    g.ForEach(l => l.Name = l.Feature + ":" + l.Attributes.StringAfter("Name=").StringBefore(";"));
                }

                if (g.Count > 1)
                {
                    Console.WriteLine(g[0].GetLocation() + " : " + (from l in g select l.Feature).Merge("/"));
                }
            }

            var values = groups.Values.ToList();

            GenomeUtils.SortChromosome(values, m => m[0].Seqname, m => m[0].Start);
            using (StreamWriter sw = new StreamWriter(options.OutputFile))
            {
                foreach (var value in values)
                {
                    foreach (var gtf in value)
                    {
                        sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                     gtf.Seqname,
                                     gtf.Start - 1,
                                     gtf.End,
                                     gtf.Name,
                                     0,
                                     gtf.Strand);
                    }
                }
            }

            return(new string[] { options.OutputFile });
        }
        public override IEnumerable <string> Process()
        {
            if (!_options.PrepareOptions())
            {
                throw new Exception(_options.ParsingErrors.Merge("\n"));
            }

            HashSet <int>       sampleCodes   = new HashSet <int>(_options.GetTCGASampleCodes().ToList().ConvertAll(m => m.Code));
            Func <string, bool> acceptBarcode = m => sampleCodes.Contains(new BarInfo(m, null).Sample);
            var tec = _options.GetTechnology();

            var items = new List <MutationItem>();

            foreach (var tumor in _options.TumorTypes)
            {
                var dir = Path.Combine(_options.TCGADirectory, tumor);
                if (!Directory.Exists(dir))
                {
                    continue;
                }

                var tecdir = tec.GetTechnologyDirectory(dir);

                if (!Directory.Exists(tecdir))
                {
                    continue;
                }

                foreach (var platform in _options.Platforms)
                {
                    var platdir = Path.Combine(tecdir, platform);

                    var datadirs = Directory.GetDirectories(platdir, "*Level_2*");
                    foreach (var datadir in datadirs)
                    {
                        var maffiles = Directory.GetFiles(datadir, "*.somatic.maf");
                        if (maffiles.Length == 0)
                        {
                            continue;
                        }

                        foreach (var maffile in maffiles)
                        {
                            using (var sr = new StreamReader(maffile))
                            {
                                string line;

                                //skip comments
                                while ((line = sr.ReadLine()) != null && line.StartsWith("#"))
                                {
                                }

                                if (string.IsNullOrEmpty(line))
                                {
                                    continue;
                                }

                                //read header
                                var headers                    = line.Split('\t');
                                var nameIndex                  = Array.IndexOf(headers, "Hugo_Symbol");
                                var ncbiIndex                  = Array.IndexOf(headers, "NCBI_Build");
                                var chromosomeIndex            = Array.IndexOf(headers, "Chromosome");
                                var startIndex                 = Array.IndexOf(headers, "Start_position");
                                var endIndex                   = Array.IndexOf(headers, "End_position");
                                var strandIndex                = Array.IndexOf(headers, "Strand");
                                var variantClassificationIndex = Array.IndexOf(headers, "Variant_Classification");
                                var variantTypeIndex           = Array.IndexOf(headers, "Variant_Type");
                                var barcodeIndex               = Array.IndexOf(headers, "Tumor_Sample_Barcode");

                                while ((line = sr.ReadLine()) != null)
                                {
                                    var parts = line.Split('\t');
                                    var item  = new MutationItem()
                                    {
                                        Tumor                 = tumor,
                                        Platform              = platform,
                                        Name                  = parts[nameIndex],
                                        NcbiBuild             = parts[ncbiIndex],
                                        Chromosome            = parts[chromosomeIndex],
                                        Start                 = parts[startIndex],
                                        End                   = parts[endIndex],
                                        Strand                = parts[strandIndex],
                                        VariantClassification = parts[variantClassificationIndex],
                                        VariantType           = parts[variantTypeIndex],
                                        TumorBarcode          = parts[barcodeIndex]
                                    };
                                    item.InitLocus();
                                    item.InitPaticipant();

                                    items.Add(item);
                                }
                            }
                        }
                    }
                }
            }

            using (var sw = new StreamWriter(_options.OutputFile))
            {
                var paticipants = (from item in items
                                   select item.Paticipant).Distinct().OrderBy(m => m).ToList();
                var itemMap   = items.ToDoubleDictionaryGroup(m => m.Locus, m => m.Paticipant);
                var locusList = itemMap.Keys.ToList();
                GenomeUtils.SortChromosome(locusList, m => m.StringBefore(":"), m => int.Parse(m.StringAfter(":").StringBefore("-")));

                sw.WriteLine("Hugo_Symbol\tNCBI_Build\tChromosome\tStart_position\tEnd_position\tStrand\tVariant_Classification\tVariant_Type\t{0}", paticipants.Merge("\t"));
                foreach (var locus in locusList)
                {
                    var dic  = itemMap[locus];
                    var item = dic.Values.First().First();
                    sw.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}",
                             item.Name,
                             item.NcbiBuild,
                             item.Chromosome,
                             item.Start,
                             item.End,
                             item.Start,
                             item.VariantClassification,
                             item.VariantType);

                    foreach (var paticipant in paticipants)
                    {
                        if (dic.ContainsKey(paticipant))
                        {
                            sw.Write("\t1");
                        }
                        else
                        {
                            sw.Write("\t0");
                        }
                    }
                    sw.WriteLine();
                }
            }

            var genefile = FileUtils.ChangeExtension(_options.OutputFile, ".gene.tsv");

            using (var sw = new StreamWriter(genefile))
            {
                var paticipants = (from item in items
                                   select item.Paticipant).Distinct().OrderBy(m => m).ToList();
                var itemMap  = items.ToDoubleDictionaryGroup(m => m.Name, m => m.Paticipant);
                var nameList = itemMap.Keys.OrderBy(m => m).ToList();

                sw.WriteLine("Hugo_Symbol\t{0}", paticipants.Merge("\t"));
                foreach (var name in nameList)
                {
                    var dic  = itemMap[name];
                    var item = dic.Values.First().First();
                    sw.Write("{0}", item.Name);

                    foreach (var paticipant in paticipants)
                    {
                        if (dic.ContainsKey(paticipant))
                        {
                            sw.Write("\t1");
                        }
                        else
                        {
                            sw.Write("\t0");
                        }
                    }
                    sw.WriteLine();
                }
            }

            return(new[] { _options.OutputFile, genefile });
        }
Ejemplo n.º 17
0
        public override IEnumerable <string> Process()
        {
            if (!File.Exists(_options.BaseFilename) || new FileInfo(_options.BaseFilename).Length == 0)
            {
                base.Process();
            }
            else
            {
                Progress.SetMessage("Base file {0} exists, ignore pileup ...", _options.BaseFilename);
            }

            var filterOptions = options.GetFilterOptions();

            if (new FileInfo(_options.BaseFilename).Length > 0)
            {
                if (!filterOptions.PrepareOptions())
                {
                    throw new Exception("Filter options failed: " + filterOptions.ParsingErrors.Merge("\n"));
                }

                new FilterProcessor(filterOptions).Process();

                var lines        = File.ReadAllLines(filterOptions.ROutputFile).Skip(1).ToArray();
                var glmfailed    = lines.Count(m => m.Contains("GLM_PVALUE"));
                var summarylines = File.ReadAllLines(_options.SummaryFilename).ToList();
                if (summarylines.Last().StartsWith("glm pvalue"))
                {
                    summarylines.RemoveAt(summarylines.Count - 1);
                }
                summarylines.Add(string.Format("glm pvalue > {0}\t{1}\t{2}", options.GlmPvalue, glmfailed, lines.Length - glmfailed));
                File.WriteAllLines(_options.SummaryFilename, summarylines);
            }

            var mutationList = GetValidationList();
            var candidates   = new MpileupFisherResultFileFormat().ReadFromFile(options.CandidatesFilename).ToDictionary(m => GenomeUtils.GetKey(m.Item.SequenceIdentifier, m.Item.Position));
            var items        = new FilterItemTextFormat().ReadFromFile(filterOptions.ROutputFile).ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Start));

            var result = new List <FilterItem>();

            foreach (var mutation in mutationList.Items)
            {
                var key = GenomeUtils.GetKey(mutation.Chr, mutation.Pos);
                if (items.ContainsKey(key))
                {
                    result.Add(items[key]);
                }
                else
                {
                    var item = new FilterItem();
                    item.Chr            = mutation.Chr;
                    item.Start          = mutation.Pos.ToString();
                    item.End            = item.Start;
                    item.FisherNormal   = string.Empty;
                    item.BrglmConverged = string.Empty;
                    item.BrglmGroup     = 1.0;
                    item.BrglmGroupFdr  = 1.0;
                    item.BrglmScore     = string.Empty;
                    item.BrglmStrand    = string.Empty;
                    item.BrglmPosition  = string.Empty;
                    item.Identity       = string.Empty;
                    result.Add(item);

                    if (candidates.ContainsKey(key))
                    {
                        var cand = candidates[key];
                        item.ReferenceAllele  = cand.Item.Nucleotide.ToString();
                        item.MajorAllele      = cand.Group.SucceedName;
                        item.MinorAllele      = cand.Group.FailedName;
                        item.NormalMajorCount = cand.Group.Sample1.Succeed;
                        item.NormalMinorCount = cand.Group.Sample1.Failed;
                        item.TumorMajorCount  = cand.Group.Sample2.Succeed;
                        item.TumorMinorCount  = cand.Group.Sample2.Failed;
                        item.FisherGroup      = cand.Group.PValue;
                        item.Filter           = cand.FailedReason;
                        Console.WriteLine("In candidates : " + item.Filter);
                    }
                    else
                    {
                        item.NormalMajorCount = 0;
                        item.NormalMinorCount = 0;
                        item.TumorMajorCount  = 0;
                        item.TumorMinorCount  = 0;
                        item.Filter           = "No coverage";
                        Console.WriteLine("No read : " + item.Filter);
                    }
                }
            }

            new FilterItemVcfWriter(filterOptions).WriteToFile(_options.OutputSuffix + ".vcf", result);
            new FilterItemTextFormat().WriteToFile(_options.OutputSuffix + ".tsv", result);

            return(new string[] { _options.OutputSuffix + ".tsv", _options.OutputSuffix + ".vcf" });
        }
Ejemplo n.º 18
0
        public override IEnumerable <string> Process()
        {
            var itemMap = new Dictionary <string, Dictionary <string, SomaticItem> >();
            var files   = (from line in File.ReadAllLines(options.InputFile)
                           where !string.IsNullOrWhiteSpace(line)
                           let parts = line.Split('\t')
                                       select new { Key = parts[0], File = parts[1] }).ToList();

            if (files.Count > 0 && !File.Exists(files[0].File))
            {//maybe header
                files.RemoveAt(0);
            }


            foreach (var file in files)
            {
                var items = SomaticMutationUtils.ParseGlmvcFile(file.File, options.AcceptChromosome);
                itemMap[file.Key] = items.ToDictionary(m => m.Key);
            }

            using (var sw = new StreamWriter(options.OutputFile))
            {
                var samples = itemMap.Keys.OrderBy(m => m).ToArray();

                List <Tuple <string, Func <SomaticItem, string> > > funcs = new List <Tuple <string, Func <SomaticItem, string> > >();
                funcs.Add(new Tuple <string, Func <SomaticItem, string> >("#chr", m => m.Chrom));
                funcs.Add(new Tuple <string, Func <SomaticItem, string> >("start", m => m.StartPosition.ToString()));
                funcs.Add(new Tuple <string, Func <SomaticItem, string> >("end", m => m.StartPosition.ToString()));

                if (itemMap.Values.Any(m => m.Values.Any(l => !string.IsNullOrWhiteSpace(l.RefGeneName))))
                {
                    funcs.Add(new Tuple <string, Func <SomaticItem, string> >("gene", m => m.RefGeneName));
                    funcs.Add(new Tuple <string, Func <SomaticItem, string> >("func", m => m.RefGeneFunc));
                    funcs.Add(new Tuple <string, Func <SomaticItem, string> >("exonic_func", m => m.RefGeneExonicFunc));
                    funcs.Add(new Tuple <string, Func <SomaticItem, string> >("aa_change", m => m.RefGeneAAChange));
                }

                sw.Write(funcs.ConvertAll(l => l.Item1).Merge("\t"));
                foreach (var sample in samples)
                {
                    sw.Write("\t{0}", sample);
                }
                sw.WriteLine("\tDetectedTimes");

                var locus = (from v in itemMap.Values from vv in v.Keys select vv).Distinct().ToList();
                GenomeUtils.SortChromosome(locus, m => m.StringBefore("_"), m => int.Parse(m.StringAfter("_")));

                foreach (var loc in locus)
                {
                    var item = (from v in itemMap.Values from vv in v where vv.Key.Equals(loc) select vv.Value).First();
                    sw.Write("{0}", funcs.ConvertAll(l => l.Item2(item)).Merge("\t"));
                    var count = 0;
                    foreach (var sample in samples)
                    {
                        SomaticItem curitem;
                        if (itemMap[sample].TryGetValue(loc, out curitem))
                        {
                            sw.Write("\t{0}/{1}={2}/{3}|{4}/{5}", curitem.RefAllele, curitem.AltAllele, curitem.NormalMajorCount, curitem.NormalMinorCount, curitem.TumorMajorCount, curitem.TumorMinorCount);
                            count++;
                        }
                        else
                        {
                            sw.Write("\t");
                        }
                    }
                    sw.WriteLine("\t{0}", count);
                }
            }

            return(new[] { options.OutputFile });
        }
Ejemplo n.º 19
0
        public override IEnumerable <string> Process()
        {
            var files    = _options.GetAnnovarFiles();
            var filelist = files.Keys.ToArray();

            using (var sw = new StreamWriter(_options.OutputFile))
            {
                //deal with comments
                using (var sr = new StreamReader(filelist[0]))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        if (line.StartsWith("##MuTect="))
                        {
                            sw.WriteLine(line);
                            for (var i = 1; i < filelist.Length; i++)
                            {
                                using (var sr2 = new StreamReader(filelist[i]))
                                {
                                    while ((line = sr2.ReadLine()) != null)
                                    {
                                        if (!line.StartsWith("##MuTect="))
                                        {
                                            continue;
                                        }

                                        sw.WriteLine(line);
                                        break;
                                    }
                                }
                            }
                        }
                        else if (!line.StartsWith("#"))
                        {
                            break;
                        }
                        else
                        {
                            sw.WriteLine(line);
                        }
                    }
                }

                //deal with data
                var data = new List <FileData>();
                foreach (var file in filelist)
                {
                    var    lines = File.ReadAllLines(file);
                    var    mutect = lines.FirstOrDefault(m => m.StartsWith("##MuTect="));
                    string normal, tumor, normalName, tumorName;
                    if (mutect != null)
                    {
                        normal     = mutect.StringAfter("normal_sample_name=").StringBefore(" ");
                        tumor      = mutect.StringAfter("tumor_sample_name=").StringBefore(" ");
                        normalName = normal;
                        tumorName  = tumor;
                    }
                    else
                    {
                        normal     = "NORMAL";
                        tumor      = "TUMOR";
                        normalName = Path.GetFileName(file).StringBefore(".") + "_normal";
                        tumorName  = Path.GetFileName(file).StringBefore(".") + "_tumor";
                    }
                    var header      = lines.First(m => !m.StartsWith("#"));
                    var headers     = header.Split('\t');
                    var infoIndex   = Array.IndexOf(headers, "INFO");
                    var formatIndex = Array.IndexOf(headers, "FORMAT");
                    var normalIndex = Array.IndexOf(headers, normal);
                    var tumorIndex  = Array.IndexOf(headers, tumor);
                    var dictionary  = new Dictionary <string, FileDataValue>();
                    foreach (var line in lines)
                    {
                        if (string.IsNullOrWhiteSpace(line) || line.StartsWith("#") || line.StartsWith("Chr"))
                        {
                            continue;
                        }

                        var parts = line.Split('\t');
                        if (parts.Length != headers.Length)
                        {
                            continue;
                        }

                        var vnormal = GetAllele(parts, normalIndex);
                        var vtumor  = GetAllele(parts, tumorIndex);
                        var value   = new FileDataValue()
                        {
                            Key     = parts[0] + "_" + parts[1],
                            Parts   = parts,
                            VNormal = vnormal,
                            VTumor  = vtumor
                        };
                        dictionary.Add(value.Key, value);
                    }
                    data.Add(new FileData()
                    {
                        File        = file,
                        Normal      = normalName,
                        Tumor       = tumorName,
                        Headers     = headers,
                        InfoIndex   = infoIndex,
                        FormatIndex = formatIndex,
                        NormalIndex = normalIndex,
                        TumorIndex  = tumorIndex,
                        Data        = dictionary
                    });
                }

                //get all positions
                var keys = (from d in data
                            from k in d.Data.Keys
                            select k).Distinct().ToList().ConvertAll(m =>
                {
                    var p = m.Split('_');
                    return(new
                    {
                        Key = m,
                        Chr = p[0],
                        Position = int.Parse(p[1])
                    });
                }
                                                                     );

                GenomeUtils.SortChromosome(keys, m => m.Chr, m => m.Position);

                var keyMap = keys.ToDictionary(m => m.Key);

                //check by original vcf file to fill the other columns
                foreach (var d in data)
                {
                    var vcf = files[d.File];
                    if (string.IsNullOrEmpty(vcf))
                    {
                        continue;
                    }

                    var vd = d.Data;

                    using (var sr = new StreamReader(vcf))
                    {
                        string line;
                        var    normalIndex = -1;
                        var    tumorIndex  = -1;
                        while ((line = sr.ReadLine()) != null)
                        {
                            if (!line.StartsWith("#CHROM"))
                            {
                                continue;
                            }

                            var parts = line.Split('\t');
                            normalIndex = Array.IndexOf(parts, d.Normal);
                            tumorIndex  = Array.IndexOf(parts, d.Tumor);
                            break;
                        }
                        if (normalIndex == -1)
                        {
                            throw new Exception(string.Format("Normal {0} is not included in detail vcf file {1} but in annovar result {1}", d.Normal, vcf, d.File));
                        }
                        if (tumorIndex == -1)
                        {
                            throw new Exception(string.Format("Tumor {0} is not included in detail vcf file {1} but in annovar result {1}", d.Tumor, vcf, d.File));
                        }

                        var minIndex = Math.Max(normalIndex, tumorIndex) + 1;

                        while ((line = sr.ReadLine()) != null)
                        {
                            var parts = line.Split('\t');
                            if (parts.Length < minIndex)
                            {
                                break;
                            }

                            var key = parts[0] + "_" + parts[1];
                            if (!keyMap.ContainsKey(key))
                            {
                                continue;
                            }

                            FileDataValue fdv;
                            if (!vd.TryGetValue(key, out fdv))
                            {
                                fdv = new FileDataValue()
                                {
                                    Key   = key,
                                    Parts = null
                                };
                                vd[key] = fdv;
                            }
                            fdv.VNormal = GetAllele(parts, normalIndex);
                            fdv.VTumor  = GetAllele(parts, tumorIndex);
                        }
                    }
                }

                //write header
                for (var i = 0; i < data[0].Headers.Length; i++)
                {
                    if (i == data[0].NormalIndex || i == data[0].TumorIndex || i == data[0].InfoIndex || i == data[0].FormatIndex)
                    {
                        continue;
                    }
                    else
                    {
                        if (i != 0)
                        {
                            sw.Write("\t");
                        }
                        sw.Write(data[0].Headers[i]);
                    }
                }

                var normalnames = (from d in data
                                   select d.Normal).Distinct().ToArray();
                sw.Write("\t{0}", normalnames.Merge('\t'));
                var tumornames = (from d in data
                                  select d.Tumor).Distinct().ToArray();
                sw.WriteLine("\t{0}", tumornames.Merge('\t'));

                foreach (var key in keys)
                {
                    var d1 = data.First(d => d.Data.ContainsKey(key.Key) && d.Data[key.Key].Parts != null);
                    var v1 = d1.Data[key.Key];
                    for (var i = 0; i < v1.Parts.Length; i++)
                    {
                        if (i == 0)
                        {
                            sw.Write("{0}", v1.Parts[0]);
                        }
                        else if (i == d1.InfoIndex || i == d1.FormatIndex || i == d1.NormalIndex || i == d1.TumorIndex)
                        {
                            continue;
                        }
                        else
                        {
                            sw.Write("\t{0}", v1.Parts[i]);
                        }
                    }

                    foreach (var name in normalnames)
                    {
                        var dn = (from d in data
                                  where d.Normal.Equals(name) && d.Data.ContainsKey(key.Key)
                                  select d).FirstOrDefault();
                        if (dn == null)
                        {
                            sw.Write("\t");
                        }
                        else
                        {
                            var vn = dn.Data[key.Key].VNormal;
                            sw.Write("\t{0}", vn);
                        }
                    }

                    foreach (var name in tumornames)
                    {
                        var dn = (from d in data
                                  where d.Tumor.Equals(name) && d.Data.ContainsKey(key.Key)
                                  select d).FirstOrDefault();
                        if (dn == null)
                        {
                            sw.Write("\t");
                        }
                        else
                        {
                            var vn = dn.Data[key.Key].VTumor;
                            sw.Write("\t{0}", vn);
                        }
                    }
                    sw.WriteLine();
                }
            }

            return(new[] { _options.OutputFile });
        }
Ejemplo n.º 20
0
    private void NextGen()
    {
        Global.game.restart();
        generation++;
        float         totalFitness   = 0;
        float         leftPopulation = population * (1 - survivalChance);
        List <Genome> nextGenomes    = new List <Genome>();

        foreach (Species species in speciesList)
        {
            totalFitness += species.GetFitness();
        }

        for (int i = 0; i < (int)(population * survivalChance); i++)
        {
            nextGenomes.Add(nets[i].GetGenome());
        }

        foreach (Species species in speciesList)
        {
            for (int i = 0; i < (int)(species.GetFitness() / totalFitness * leftPopulation); i++)
            {
                Genome parent1 = species.GetRandomGenome(random);
                Genome parent2 = species.GetRandomGenome(random);
                Genome child   = new Genome();

                if (networkMap[parent1].GetFitness() > networkMap[parent2].GetFitness())
                {
                    child = GenomeUtils.Crossover(parent1, parent2, random);
                }
                else
                {
                    child = GenomeUtils.Crossover(parent2, parent1, random);
                }
                nextGenomes.Add(child);
            }
        }

        while (nextGenomes.Count < population)
        {
            Genome parent1 = speciesList[0].GetRandomGenome(random);
            Genome parent2 = speciesList[0].GetRandomGenome(random);
            Genome child   = new Genome();

            if (networkMap[parent1].GetFitness() > networkMap[parent2].GetFitness())
            {
                child = GenomeUtils.Crossover(parent1, parent2, random);
            }

            else
            {
                child = GenomeUtils.Crossover(parent2, parent1, random);
            }

            nextGenomes.Add(child);
        }

        foreach (Genome genome in nextGenomes)
        {
            double roll = random.NextDouble();

            if (roll < weightMutationChance)
            {
                genome.Mutate(randomWeightChance, random);
            }
            else if (roll < weightMutationChance + addNodeChance)
            {
                genome.AddNodeMutation(random);
            }
            else if (roll < weightMutationChance + addNodeChance + addConnectionChance)
            {
                genome.AddConnectionMutation(random);
            }
        }

        foreach (Species species in speciesList)
        {
            species.Reset();
        }
        genomes = nextGenomes;
    }
Ejemplo n.º 21
0
        protected override MpileupResult GetMpileupResult()
        {
            var result = new MpileupResult(string.Empty, _options.CandidatesDirectory);

            Progress.SetMessage("Single thread mode ...");
            var parser = _options.GetPileupItemParser(false);
            var pfile  = new PileupFile(parser);

            var mutationList = GetValidationList();

            result.TotalCount = mutationList.Items.Length;

            var map = mutationList.Items.ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Pos));

            switch (_options.From)
            {
            case DataSourceType.Mpileup:
                pfile.Open(_options.MpileupFile);
                break;

            case DataSourceType.BAM:
                var posFile = Path.Combine(_options.CandidatesDirectory, "pos.bed");
                mutationList.WriteToFile(posFile, 500);
                var proc = new MpileupProcessor(_options).ExecuteSamtools(new[] { _options.NormalBam, _options.TumorBam }, "", posFile);
                if (proc == null)
                {
                    throw new Exception("Cannot execute mpileup.");
                }

                pfile.Open(proc.StandardOutput);
                pfile.Samtools = proc;
                break;

            case DataSourceType.Console:
                pfile.Open(Console.In);
                break;
            }

            Progress.SetMessage("Total {0} entries in validation list", mutationList.Items.Length);
            foreach (var m in map)
            {
                Console.WriteLine(m.Key);
            }

            using (pfile)
            {
                try
                {
                    IMpileupParser proc = new ValidationParser(_options, result);

                    string line;
                    while ((line = pfile.ReadLine()) != null)
                    {
                        try
                        {
                            var locus    = parser.GetSequenceIdentifierAndPosition(line);
                            var locusKey = GenomeUtils.GetKey(locus.SequenceIdentifier, locus.Position);

                            //Console.WriteLine(locusKey);
                            ValidationItem vitem = null;
                            if (!map.TryGetValue(locusKey, out vitem))
                            {
                                continue;
                            }

                            //Console.WriteLine("Parsing " + line);

                            var parres = proc.Parse(line, true);
                            if (!string.IsNullOrEmpty(parres.FailedReason))
                            {
                                Progress.SetMessage("{0}\t{1}\t{2} ~ {3}\t{4}", parres.Item.SequenceIdentifier, parres.Item.Position, parres.Group.Sample1, parres.Group.Sample2, parres.FailedReason);
                            }
                            result.Results.Add(parres);
                        }
                        catch (Exception ex)
                        {
                            var error = string.Format("parsing error {0}\n{1}", ex.Message, line);
                            Progress.SetMessage(error);
                            Console.Error.WriteLine(ex.StackTrace);
                            throw new Exception(error);
                        }
                    }
                }
                finally
                {
                    if (pfile.Samtools != null)
                    {
                        try
                        {
                            pfile.Samtools.Kill();
                        }
                        // ReSharper disable once EmptyGeneralCatchClause
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            result.NotCovered = result.TotalCount - result.Results.Count;

            return(result);
        }
Ejemplo n.º 22
0
        public override IEnumerable <string> Process()
        {
            options.PrintParameter(Console.Out);

            Progress.SetMessage("Single thread mode ...");
            var parser = options.GetPileupItemParser();
            var pfile  = new PileupFile(parser);

            var mutationList = new ValidationFile().ReadFromFile(options.BedFile);
            var map          = mutationList.Items.ToDictionary(m => GenomeUtils.GetKey(m.Chr, m.Pos));

            var posFile = Path.Combine(options.OutputFile + ".pos.bed");

            mutationList.WriteToFile(posFile, 500);
            var proc = new MpileupProcessor(options).ExecuteSamtools(options.BamFiles, "", posFile);

            if (proc == null)
            {
                return(null);
            }

            pfile.Open(proc.StandardOutput);
            pfile.Samtools = proc;

            Progress.SetMessage("Total {0} entries in extraction list", mutationList.Items.Length);

            var result = new Dictionary <ValidationItem, PileupItem>();

            using (pfile)
            {
                try
                {
                    string line;
                    string lastChrom = string.Empty;
                    while ((line = pfile.ReadLine()) != null)
                    {
                        try
                        {
                            var locus    = parser.GetSequenceIdentifierAndPosition(line);
                            var locusKey = GenomeUtils.GetKey(locus.SequenceIdentifier, locus.Position);

                            if (!locus.SequenceIdentifier.Equals(lastChrom))
                            {
                                Progress.SetMessage("Processing chromosome " + locus.SequenceIdentifier + " ...");
                                lastChrom = locus.SequenceIdentifier;
                            }

                            ValidationItem vitem = null;
                            if (!map.TryGetValue(locusKey, out vitem))
                            {
                                continue;
                            }

                            result[vitem] = parser.GetValue(line);
                        }
                        catch (Exception ex)
                        {
                            var error = string.Format("Parsing error {0}\n{1}", ex.Message, line);
                            Progress.SetMessage(error);
                            Console.Error.WriteLine(ex.StackTrace);
                            throw new Exception(error);
                        }
                    }
                }
                finally
                {
                    if (pfile.Samtools != null)
                    {
                        try
                        {
                            pfile.Samtools.Kill();
                        }
                        // ReSharper disable once EmptyGeneralCatchClause
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            if (result.Count == 0)
            {
                throw new Exception("Nothing found. Look at the log file for error please.");
            }

            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("{0}\t{1}", mutationList.Header, options.GetBamNames().Merge("\t"));
                var emptyevents = new string('\t', options.BamFiles.Count);
                foreach (var mu in mutationList.Items)
                {
                    sw.Write("{0}", mu.Line);
                    PileupItem item;
                    if (result.TryGetValue(mu, out item))
                    {
                        foreach (var sample in item.Samples)
                        {
                            sample.InitEventCountList(false);
                            if (sample.EventCountList.Count > 0)
                            {
                                sw.Write("\t{0}", (from ecl in sample.EventCountList
                                                   let v = string.Format("{0}:{1}", ecl.Event, ecl.Count)
                                                           select v).Merge(","));
                            }
                            else
                            {
                                sw.Write("\t");
                            }
                        }
                    }
                    else
                    {
                        sw.Write(emptyevents);
                    }
                    sw.WriteLine();
                }
            }

            return(new[] { options.OutputFile });
        }
Ejemplo n.º 23
0
 public NodeGene(Type type, int id)
 {
     this.type       = type;
     this.id         = id;
     this.activation = GenomeUtils.RandomActivation();
 }
Ejemplo n.º 24
0
        public override IEnumerable <string> Process()
        {
            var candidates = options.ReadSeeds();

            Progress.SetMessage("Total {0} seeds readed.", candidates.Length);
            var offsets = GetPossibleOffsets(string.Empty);

            Progress.SetMessage("Build target {0} mers...", options.MinimumSeedLength);

            var seeds = (from seq in candidates
                         from offset in offsets
                         select seq.Substring(offset, options.MinimumSeedLength)).ToList();
            var targetSeedMap = ParclipUtils.BuildTargetSeedMap(options, seeds, this.Progress);

            //var seeds = new HashSet<string>(from seq in candidates
            //                                from offset in offsets
            //                                select seq.Substring(offset, options.MinimumSeedLength));
            //var targetSeedMap = ParclipUtils.BuildTargetSeedMap(options, m => seeds.Contains(m.Sequence), this.Progress);

            Progress.SetMessage("Finding target...");
            using (var sw = new StreamWriter(options.OutputFile))
            {
                sw.WriteLine("Sequence\tSeed\tSeedOffset\tSeedLength\tTarget\tTargetCoverage\tTargetGeneSymbol\tTargetName");

                foreach (var seq in candidates)
                {
                    foreach (var offset in offsets)
                    {
                        if (seq.Length < offset + options.MinimumSeedLength)
                        {
                            break;
                        }

                        var seed = seq.Substring(offset, options.MinimumSeedLength);

                        List <SeedItem> target;

                        if (targetSeedMap.TryGetValue(seed, out target))
                        {
                            if (target.ConvertAll(l => l.Coverage).Distinct().Count() == 1)
                            {
                                GenomeUtils.SortChromosome(target, m => m.Seqname, m => m.Start);
                            }
                            else
                            {
                                target.Sort((m1, m2) =>
                                {
                                    return(m2.Coverage.CompareTo(m1.Coverage));
                                });
                            }

                            var longest = ParclipUtils.ExtendToLongestTarget(target, null, seq, offset, options.MinimumSeedLength, int.MaxValue, options.MinimumCoverage);

                            for (int j = 0; j < longest.Count; j++)
                            {
                                var t         = longest[j];
                                var finalSeed = seq.Substring(offset, (int)t.Length);
                                sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}:{5}-{6}:{7}\t{8}\t{9}\t{10}",
                                             seq,
                                             finalSeed,
                                             offset,
                                             finalSeed.Length,
                                             t.Seqname,
                                             t.Start,
                                             t.End,
                                             t.Strand,
                                             t.Coverage,
                                             t.GeneSymbol,
                                             t.Name);
                            }
                        }
                    }
                }
            }

            return(new[] { options.OutputFile });
        }
Ejemplo n.º 25
0
        public override IEnumerable <string> Process()
        {
            var paramFile = options.OutputFile + ".param";

            options.SaveToFile(options.OutputFile + ".param");

            var bedfile = new BedItemFile <BedItem>(6);

            Progress.SetMessage("building chromosome name map ...");

            var mitoName = "M";
            Dictionary <string, string> chrNameMap = new Dictionary <string, string>();
            var ff = new FastaFormat(int.MaxValue);

            var faiFile = options.FastaFile + ".fai";

            if (File.Exists(faiFile))
            {
                using (StreamReader sr = new StreamReader(faiFile))
                {
                    string line;
                    while ((line = sr.ReadLine()) != null)
                    {
                        var name = line.Split('\t')[0];
                        chrNameMap[name] = name;
                        if (name.StartsWith("chr"))
                        {
                            chrNameMap[name.StringAfter("chr")] = name;
                        }
                        if (!name.StartsWith("chr"))
                        {
                            chrNameMap["chr" + name] = name;
                        }

                        if (name.Equals("chrMT") || name.Equals("MT"))
                        {
                            mitoName = "MT";
                        }
                        if (name.Equals("chrM") || name.Equals("M"))
                        {
                            mitoName = "M";
                        }
                    }
                }
            }
            else
            {
                using (StreamReader sr = new StreamReader(options.FastaFile))
                {
                    Sequence seq;
                    while ((seq = ff.ReadSequence(sr)) != null)
                    {
                        var name = seq.Name;
                        chrNameMap[name] = name;
                        if (name.StartsWith("chr"))
                        {
                            chrNameMap[name.StringAfter("chr")] = name;
                        }
                        if (!name.StartsWith("chr"))
                        {
                            chrNameMap["chr" + name] = name;
                        }

                        if (name.Equals("chrMT") || name.Equals("MT"))
                        {
                            mitoName = "MT";
                        }
                        if (name.Equals("chrM") || name.Equals("M"))
                        {
                            mitoName = "M";
                        }
                    }
                }
            }
            var longMitoName = chrNameMap[mitoName];

            Progress.SetMessage("mitochondral chromosome name = {0}", longMitoName);

            var mirnas = new List <BedItem>();

            if (File.Exists(options.MiRBaseFile))
            {
                Progress.SetMessage("Processing {0} ...", options.MiRBaseFile);

                if (options.MiRBaseFile.EndsWith(".bed"))
                {
                    mirnas = bedfile.ReadFromFile(options.MiRBaseFile);
                    mirnas.ForEach(m =>
                    {
                        m.Seqname = m.Seqname.StringAfter("chr");
                        m.Name    = options.MiRBaseKey + ":" + m.Name;
                    });
                }
                else
                {
                    using (var gf = new GtfItemFile(options.MiRBaseFile))
                    {
                        GtfItem item;
                        while ((item = gf.Next(options.MiRBaseKey)) != null)
                        {
                            BedItem loc = new BedItem();
                            loc.Seqname = item.Seqname.StringAfter("chr");
                            loc.Start   = item.Start - 1;
                            loc.End     = item.End;
                            loc.Name    = options.MiRBaseKey + ":" + item.Attributes.StringAfter("Name=").StringBefore(";");
                            loc.Score   = 1000;
                            loc.Strand  = item.Strand;
                            mirnas.Add(loc);
                        }
                    }
                }

                Progress.SetMessage("{0} miRNA readed.", mirnas.Count);
            }

            List <BedItem> trnas = new List <BedItem>();

            if (File.Exists(options.UcscTrnaFile))
            {
                //reading tRNA from ucsc table without mitocondrom tRNA
                Progress.SetMessage("Processing {0} ...", options.UcscTrnaFile);
                trnas = bedfile.ReadFromFile(options.UcscTrnaFile);
                trnas.ForEach(m => m.Seqname = m.Seqname.StringAfter("chr"));

                var removed = trnas.Where(m => (m.Seqname.Length > 1) && !m.Seqname.All(n => char.IsDigit(n))).ToList();
                if (removed.Count != trnas.Count)
                {
                    //remove the tRNA not from 1-22, X and Y
                    trnas.RemoveAll(m => (m.Seqname.Length > 1) && !m.Seqname.All(n => char.IsDigit(n)));

                    //mitocondrom tRNA will be extracted from ensembl gtf file
                    trnas.RemoveAll(m => m.Seqname.Equals("M") || m.Seqname.Equals("MT"));
                }

                trnas.ForEach(m => m.Name = GetTRNAName(m.Name));

                Progress.SetMessage("{0} tRNA from ucsc readed.", trnas.Count);

                if (File.Exists(options.UcscMatureTrnaFastaFile))
                {
                    var seqs = SequenceUtils.Read(options.UcscMatureTrnaFastaFile);
                    foreach (var seq in seqs)
                    {
                        var tRNAName = GetTRNAName(seq.Name);
                        trnas.Add(new BedItem()
                        {
                            Seqname  = seq.Name,
                            Start    = 0,
                            End      = seq.SeqString.Length,
                            Strand   = '+',
                            Name     = tRNAName,
                            Sequence = seq.SeqString
                        });
                    }
                }
            }

            var others = new List <BedItem>();

            if (File.Exists(options.EnsemblGtfFile))
            {
                //reading smallRNA/tRNA from ensembl gtf file
                Progress.SetMessage("Processing {0} ...", options.EnsemblGtfFile);
                using (var gf = new GtfItemFile(options.EnsemblGtfFile))
                {
                    var biotypes = new HashSet <string>(SmallRNAConsts.Biotypes);
                    biotypes.Remove(SmallRNAConsts.miRNA);

                    GtfItem item;
                    int     count = 0;
                    while ((item = gf.Next("gene")) != null)
                    {
                        string biotype;
                        if (item.Attributes.Contains("gene_biotype"))
                        {
                            biotype = item.Attributes.StringAfter("gene_biotype \"").StringBefore("\"");
                        }
                        else if (item.Attributes.Contains("gene_type"))
                        {
                            biotype = item.Attributes.StringAfter("gene_type \"").StringBefore("\"");
                        }
                        else
                        {
                            continue;
                        }

                        if (File.Exists(options.UcscTrnaFile) && biotype.Equals(SmallRNAConsts.tRNA))
                        {
                            continue;
                        }

                        if (biotype.Equals("Mt_tRNA"))
                        {
                            count++;
                            var     gene_name = item.Attributes.Contains("gene_name") ? item.Attributes.StringAfter("gene_name \"").StringBefore("\"") : item.GeneId;
                            BedItem loc       = new BedItem();
                            loc.Seqname = mitoName;
                            loc.Start   = item.Start - 1;
                            loc.End     = item.End;
                            loc.Name    = string.Format(SmallRNAConsts.mt_tRNA + ":" + longMitoName + ".tRNA{0}-{1}", count, gene_name.StringAfter("-"));
                            loc.Score   = 1000;
                            loc.Strand  = item.Strand;
                            trnas.Add(loc);
                        }
                        else if (biotypes.Contains(biotype))
                        {
                            string seqName;
                            if (item.Seqname.ToLower().StartsWith("chr"))
                            {
                                seqName = item.Seqname.Substring(3);
                            }
                            else
                            {
                                seqName = item.Seqname;
                            }
                            if (seqName.Equals("M") || seqName.Equals("MT"))
                            {
                                seqName = mitoName;
                            }

                            //ignore all smallRNA coordinates on scaffold or contig.
                            //if (seqName.Length > 5)
                            //{
                            //  continue;
                            //}

                            var gene_name   = item.Attributes.StringAfter("gene_name \"").StringBefore("\"");
                            var lowGeneName = gene_name.ToLower();
                            if (lowGeneName.StartsWith("rny") || lowGeneName.Equals("y_rna"))
                            {
                                biotype = "yRNA";
                            }

                            BedItem loc = new BedItem();
                            loc.Seqname = seqName;
                            loc.Start   = item.Start - 1;
                            loc.End     = item.End;

                            //if (lowGeneName.EndsWith("_rrna") && loc.Length < 200)
                            //{
                            //  biotype = "rRNA";
                            //}

                            loc.Name   = biotype + ":" + gene_name + ":" + item.GeneId;
                            loc.Score  = 1000;
                            loc.Strand = item.Strand;

                            others.Add(loc);
                        }
                    }
                }
            }

            var all = new List <BedItem>();

            all.AddRange(mirnas);
            all.AddRange(trnas);
            all.AddRange(others);

            foreach (var bi in all)
            {
                if (chrNameMap.ContainsKey(bi.Seqname))
                {
                    bi.Seqname = chrNameMap[bi.Seqname];
                }
            }

            if (File.Exists(options.RRNAFile))
            {
                var seqs = SequenceUtils.Read(options.RRNAFile);
                foreach (var seq in seqs)
                {
                    all.Add(new BedItem()
                    {
                        Seqname = seq.Name,
                        Start   = 0,
                        End     = seq.SeqString.Length,
                        Strand  = '+',
                        Name    = "rRNA:" + SmallRNAConsts.rRNADB_KEY + seq.Name
                    });
                }
            }

            Progress.SetMessage("Saving smallRNA coordinates to " + options.OutputFile + "...");
            using (var sw = new StreamWriter(options.OutputFile))
            {
                foreach (var pir in SmallRNAConsts.Biotypes)
                {
                    var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                    Progress.SetMessage("{0} : {1}", pir, locs.Count);

                    GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                    foreach (var loc in locs)
                    {
                        sw.WriteLine(bedfile.GetValue(loc));
                    }
                }
            }

            var miRNA_bed = FileUtils.ChangeExtension(options.OutputFile, ".miRNA.bed");

            Progress.SetMessage("Saving miRNA coordinates to " + miRNA_bed + "...");
            using (var sw = new StreamWriter(miRNA_bed))
            {
                var pir  = SmallRNAConsts.miRNA;
                var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                Progress.SetMessage("{0} : {1}", pir, locs.Count);

                GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                foreach (var loc in locs)
                {
                    sw.WriteLine(bedfile.GetValue(loc));
                }
            }

            Progress.SetMessage("Saving smallRNA miss1 coordinates to " + options.OutputFile + ".miss1 ...");
            using (var sw = new StreamWriter(options.OutputFile + ".miss1"))
            {
                foreach (var pir in SmallRNAConsts.Biotypes)
                {
                    if (pir == SmallRNABiotype.lincRNA.ToString() || pir == SmallRNABiotype.lncRNA.ToString())
                    {
                        continue;
                    }
                    var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                    locs.RemoveAll(l => l.Name.Contains(SmallRNAConsts.rRNADB_KEY));

                    Progress.SetMessage("{0} : {1}", pir, locs.Count);

                    GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                    foreach (var loc in locs)
                    {
                        sw.WriteLine(bedfile.GetValue(loc));
                    }
                }
            }

            Progress.SetMessage("Saving smallRNA miss1 coordinates to " + options.OutputFile + ".miss0 ...");
            using (var sw = new StreamWriter(options.OutputFile + ".miss0"))
            {
                foreach (var pir in SmallRNAConsts.Biotypes)
                {
                    if (pir != SmallRNABiotype.lincRNA.ToString() && pir != SmallRNABiotype.lncRNA.ToString() && pir != SmallRNABiotype.rRNA.ToString())
                    {
                        continue;
                    }
                    var locs = all.Where(m => m.Name.StartsWith(pir)).ToList();
                    if (pir == SmallRNABiotype.rRNA.ToString())
                    {
                        locs.RemoveAll(l => !l.Name.Contains(SmallRNAConsts.rRNADB_KEY));
                    }

                    Progress.SetMessage("{0} : {1}", pir, locs.Count);

                    GenomeUtils.SortChromosome(locs, m => m.Seqname, m => (int)m.Start);

                    foreach (var loc in locs)
                    {
                        sw.WriteLine(bedfile.GetValue(loc));
                    }
                }
            }

            var summaryFile = options.OutputFile + ".info";

            Progress.SetMessage("Writing summary to " + summaryFile + "...");
            using (var sw = new StreamWriter(summaryFile))
            {
                sw.WriteLine("Biotype\tCount");

                all.ConvertAll(m => m.Name).Distinct().GroupBy(m => m.StringBefore(":")).OrderByDescending(m => m.Count()).ToList().ForEach(m => sw.WriteLine("{0}\t{1}", m.Key, m.Count()));
            }

            var result = new List <string>(new[] { options.OutputFile });

            var fasta = Path.ChangeExtension(options.OutputFile, ".fasta");

            if ((File.Exists(options.UcscTrnaFile) && File.Exists(options.UcscMatureTrnaFastaFile)) || File.Exists(options.RRNAFile))
            {
                result.Add(fasta);
                using (var sw = new StreamWriter(fasta))
                {
                    string line;
                    using (var sr = new StreamReader(options.FastaFile))
                    {
                        while ((line = sr.ReadLine()) != null)
                        {
                            sw.WriteLine(line);
                        }
                    }

                    if (File.Exists(options.UcscTrnaFile) && File.Exists(options.UcscMatureTrnaFastaFile))
                    {
                        using (var sr = new StreamReader(options.UcscMatureTrnaFastaFile))
                        {
                            while ((line = sr.ReadLine()) != null)
                            {
                                sw.WriteLine(line);
                            }
                        }
                    }

                    if (File.Exists(options.RRNAFile))
                    {
                        using (var sr = new StreamReader(options.RRNAFile))
                        {
                            while ((line = sr.ReadLine()) != null)
                            {
                                sw.WriteLine(line);
                            }
                        }
                    }
                }
            }

            var faFile = options.OutputFile + ".fa";

            Progress.SetMessage("Extracting sequence from " + options.FastaFile + "...");
            var b2foptions = new Bed2FastaProcessorOptions()
            {
                GenomeFastaFile = options.FastaFile,
                InputFile       = options.OutputFile,
                OutputFile      = faFile,
                KeepChrInName   = false,
            };

            if (!File.Exists(options.UcscMatureTrnaFastaFile))
            {
                b2foptions.AcceptName = m => m.StartsWith(SmallRNAConsts.miRNA) || m.StartsWith(SmallRNAConsts.mt_tRNA) || m.StartsWith(SmallRNAConsts.tRNA);
            }
            else
            {
                b2foptions.AcceptName = m => m.StartsWith(SmallRNAConsts.miRNA) || m.StartsWith(SmallRNAConsts.mt_tRNA);
            }

            new Bed2FastaProcessor(b2foptions)
            {
                Progress = this.Progress
            }.Process();

            if (File.Exists(options.UcscMatureTrnaFastaFile))
            {
                Progress.SetMessage("Extracting sequence from " + options.UcscMatureTrnaFastaFile + " ...");

                using (var sw = new StreamWriter(faFile, true))
                {
                    foreach (var tRNA in trnas)
                    {
                        if (!string.IsNullOrEmpty(tRNA.Sequence))
                        {
                            sw.WriteLine(">{0}", tRNA.Name);
                            sw.WriteLine("{0}", tRNA.Sequence);
                        }
                    }
                }
            }

            return(result);
        }