Ejemplo n.º 1
0
    static void Main()
    {
        int[] data = { 1, 2, 3, 4, 5, 6 };

        int lastOdd = SequenceUtil.Last <int>(
            data, delegate(int i) { return((i % 2) == 1); });
    }
        public static void Run()
        {
            //Load IMDb dataset
            var((x_train, y_train), (x_test, y_test)) = IMDB.LoadData();

            var X = np.concatenate(new NDarray[] { x_train, x_test }, axis: 0);
            var Y = np.concatenate(new NDarray[] { y_train, y_test }, axis: 0);

            Console.WriteLine("Shape of X: " + X.shape);
            Console.WriteLine("Shape of Y: " + Y.shape);

            //We can get an idea of the total number of unique words in the dataset.
            Console.WriteLine("Number of words: ");
            var hstack = np.hstack(new NDarray[] { X });
            //var unique = hstack.unique();
            //Console.WriteLine(np.unique(np.hstack(new NDarray[] { X })).Item1);

            // Load the dataset but only keep the top n words, zero the rest
            int top_words = 1000;// 5000;

            ((x_train, y_train), (x_test, y_test)) = IMDB.LoadData(num_words: top_words);

            int max_words = 500;

            x_train = SequenceUtil.PadSequences(x_train, maxlen: max_words);
            x_test  = SequenceUtil.PadSequences(x_test, maxlen: max_words);

            //Create model
            Sequential model = new Sequential();

            model.Add(new Embedding(top_words, 32, input_length: max_words));
            model.Add(new Conv1D(filters: 32, kernel_size: 3, padding: "same", activation: "relu"));
            model.Add(new MaxPooling1D(pool_size: 2));
            model.Add(new Flatten());
            model.Add(new Dense(250, activation: "relu"));
            model.Add(new Dense(1, activation: "sigmoid"));

            model.Compile(loss: "binary_crossentropy", optimizer: "adam", metrics: new string[] { "accuracy" });
            model.Summary();

            // Fit the model
            model.Fit(x_train, y_train, validation_data: new NDarray[] { x_test, y_test },
                      epochs: 1 /*10*/, batch_size: 128, verbose: 2);
            // Final evaluation of the model
            var scores = model.Evaluate(x_test, y_test, verbose: 0);

            Console.WriteLine("Accuracy: " + (scores[1] * 100));

            model.Save("model.h5");
            File.WriteAllText("model.json", model.ToJson());    //save model
            //model.SaveTensorflowJSFormat("./");   //error - Cannot perform runtime binding on a null reference
        }
Ejemplo n.º 3
0
        public void Predict(string text, Accord.MachineLearning.TFIDF codebook, int max_news_len)
        {
            var    model  = Sequential.LoadModel("best_model_gru.h5");
            string result = "";

            string[] words   = TextUtil.TextToWordSequence(text);
            double[] tokens  = codebook.Transform(words);
            var      newItem = tokens.Where(value => value != 0).ToArray();
            NDarray  x       = np.array(newItem);

            x = x.reshape(1, x.shape[0]);
            x = SequenceUtil.PadSequences(x, maxlen: max_news_len, dtype: "double");

            var y = model.Predict(x);

            Console.WriteLine(y.str);
        }
Ejemplo n.º 4
0
        private void TestNeuralNetwork(string testCsvPath, int nb_classes, Dictionary <string, int> dictionaryLikeIMDB, int max_news_len)
        {
            NDarray x_test = null;
            NDarray y_test = null;

            var testCSV          = Frame.ReadCsv(testCsvPath, false, separators: ";");
            var testYFloat       = testCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <float>("Column1")); }).ValuesAll.ToList();
            var testXString      = testCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <string>("Column2")); }).ValuesAll.ToList();
            var testXStringArray = testXString.ToArray();

            y_test = np.array(testYFloat.ToArray());

            y_test = Util.ToCategorical(y_test, nb_classes);

            string[][] tokens_test = testXStringArray.Tokenize();

            int[][] bow_test = FrequencyDictionary.Transform(tokens_test, dictionaryLikeIMDB);
            //double[][] bow_test = codebook.Transform(tokens_test);

            var list_test = new List <NDarray>();

            foreach (var item in bow_test)
            {
                //var newItem = item.Take(100).ToArray();
                //var ndarray = np.array(newItem);
                var ndarray = np.array(item);
                list_test.Add(ndarray);
            }

            var sequences_test = np.array(list_test);

            x_test = SequenceUtil.PadSequences(sequences_test, maxlen: max_news_len);

            //Load model and weight
            var loaded_model = Sequential.ModelFromJson(File.ReadAllText("model.json"));

            loaded_model.LoadWeight("best_model_gru.h5");

            loaded_model.Compile(optimizer: "adam", loss: "categorical_crossentropy", metrics: new string[] { "accuracy" });
            loaded_model.Summary();

            var scores = loaded_model.Evaluate(x_test, y_test, verbose: 0);

            Console.WriteLine("Test loss:" + scores[0] * 100);
            Console.WriteLine("Test accuracy:" + scores[1] * 100);
        }
Ejemplo n.º 5
0
        public static void Predict(string text)
        {
            var    model  = Sequential.LoadModel("model.h5");
            string result = "";

            var indexes = IMDB.GetWordIndex();

            string[] words  = TextUtil.TextToWordSequence(text);
            float[]  tokens = words.Select(i => ((float)indexes[i])).ToArray();

            NDarray x = np.array(tokens);

            x = x.reshape(1, x.shape[0]);
            x = SequenceUtil.PadSequences(x, maxlen: 500);
            var y      = model.Predict(x);
            var binary = Math.Round(y[0].asscalar <float>());

            result = binary == 0 ? "Negative" : "Positive";
            Console.WriteLine("Sentiment for \"{0}\": {1}", text, result);
        }
Ejemplo n.º 6
0
        private (History, Sequential, Dictionary <string, int>) LearnNeuralNetwork(string trainCsvPath, int num_words, int max_news_len, int nb_classes)
        {
            NDarray x_train = null;
            NDarray y_train = null;

            var trainCSV          = Frame.ReadCsv(trainCsvPath, false, separators: ";");
            var trainYFloat       = trainCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <float>("Column1")); }).ValuesAll.ToList();
            var trainXString      = trainCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <string>("Column2")); }).ValuesAll.ToList();
            var trainXStringArray = trainXString.ToArray();

            //x_train = np.array(new float[,] { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } });
            y_train = np.array(trainYFloat.ToArray());

            y_train = Util.ToCategorical(y_train, nb_classes);

            string[][] tokens = trainXStringArray.Tokenize();

            var dictionaryLikeIMDB = FrequencyDictionary.Learn(tokens);

            var bow = FrequencyDictionary.Transform(tokens, dictionaryLikeIMDB);

            // Create a new TF-IDF with options:

            /*var codebook = new Accord.MachineLearning.TFIDF()
             * {
             *  Tf = TermFrequency.Log,
             *  Idf = InverseDocumentFrequency.Default
             * };
             *
             * codebook.Learn(tokens);
             *
             * double[][] bow = codebook.Transform(tokens);*/

            var list = new List <NDarray>();

            foreach (var item in bow)
            {
                //var newItem = item.Take(max_news_len).ToArray();
                //var ndarray = np.array(newItem);
                var ndarray = np.array(item);
                list.Add(ndarray);
            }

            var sequences = np.array(list);

            //x_train = SequenceUtil.PadSequences(sequences, maxlen: max_news_len, dtype: "double");
            x_train = SequenceUtil.PadSequences(sequences, maxlen: max_news_len);

            var model = new Sequential();

            model.Add(new Embedding(num_words, 32, null, null, null, null, false, max_news_len));
            model.Add(new GRU(138));//16
            model.Add(new Dense(12, activation: "softmax"));

            model.Compile(optimizer: "adam", loss: "categorical_crossentropy", metrics: new string[] { "accuracy" });

            model.Summary();

            var model_gru_save_path     = "best_model_gru.h5";
            var checkpoint_callback_gru = new ModelCheckpoint(
                model_gru_save_path,
                "val_accuracy",
                1,
                true
                );

            var callbacks = new List <Callback>()
            {
                checkpoint_callback_gru
            };

            float validation_split = (float)0.1;

            var history_gru = model.Fit(x_train,
                                        y_train,
                                        batch_size: 128,
                                        epochs: 10,
                                        validation_split: validation_split,
                                        callbacks: callbacks.ToArray());

            //Save model and weights
            string json = model.ToJson();

            File.WriteAllText("model.json", json);

            return(history_gru, model, dictionaryLikeIMDB);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Executes the cross-link search for LC-IMS-TOF data.
        /// </summary>
        /// <param name="settings">Settings object to control parameters for cross-linking.</param>
        /// <param name="proteinSequenceEnumerable">IEnumerable of protein sequences, as a .NET Bio ISequence object.</param>
        /// <param name="featureList">List of LC-IMS-MS Features, as LcImsMsFeature.</param>
        /// <param name="peakList">List of Isotopic Peaks, as IsotopicPeak.</param>
        /// <returns>An enumerable of CrossLinkResult objects.</returns>
        public static IList <CrossLinkResult> Execute(
            CrossLinkSettings settings,
            IEnumerable <ISequence> proteinSequenceEnumerable,
            List <LcImsMsFeature> featureList,
            List <IsotopicPeak> peakList)
        {
            var massToleranceBase  = settings.MassTolerance;
            var maxMissedCleavages = settings.MaxMissedCleavages;
            var digestionRule      = settings.TrypticType;

            CrossLinkUtil.StaticDeltaMass = settings.StaticDeltaMass;
            CrossLinkUtil.UseC13          = settings.UseC13;
            CrossLinkUtil.UseN15          = settings.UseN15;

            Console.WriteLine();
            Console.WriteLine("Mass Tolerance:          " + massToleranceBase + " ppm");
            Console.WriteLine("Max missed cleavages:    " + maxMissedCleavages);
            Console.WriteLine("Digestion rule:          " + settings.TrypticType);
            Console.WriteLine("Delta mass uses C13:     " + settings.UseC13);
            Console.WriteLine("Delta mass uses N15:     " + settings.UseN15);
            Console.WriteLine("Static delta mass addon: " + settings.StaticDeltaMass + " Da");

            // Used for finding Isotopic Profiles in the data
            var msFeatureFinder = new BasicTFF();

            var crossLinkList     = new List <CrossLink>();
            var lastProgress      = DateTime.UtcNow;
            var proteinsProcessed = 0;

            // Create CrossLink objects from all of the protein sequences
            foreach (var proteinSequence in proteinSequenceEnumerable)
            {
                var proteinSequenceString = new string(proteinSequence.Select((a => (char)a)).ToArray());
                var proteinId             = proteinSequence.ID;

                // Get a List of Peptides from the Protein Sequence
                var peptideList = SequenceUtil.DigestProtein(proteinSequenceString, digestionRule, maxMissedCleavages);

                // Find all possible cross links from the peptide list
                var crossLinkEnumerable = CrossLinkUtil.GenerateTheoreticalCrossLinks(peptideList, proteinSequenceString, proteinId);
                crossLinkList.AddRange(crossLinkEnumerable);

                proteinsProcessed++;
                if (DateTime.UtcNow.Subtract(lastProgress).TotalSeconds >= 15)
                {
                    lastProgress = DateTime.UtcNow;
                    Console.WriteLine("Creating cross linked peptide list; " + proteinsProcessed + " proteins processed");
                }
            }

            Console.WriteLine("Sorting cross-linked peptides");

            // Sort the CrossLinks by mass so that the results are ordered in a friendly way
            IEnumerable <CrossLink> orderedCrossLinkEnumerable = crossLinkList.OrderBy(x => x.Mass);

            // Sort Feature by mass so we can use binary search
            featureList = featureList.OrderBy(x => x.MassMonoisotopic).ToList();

            // Set up a Feature Comparer and Peak Comparer to use for binary search later on
            var featureComparer = new AnonymousComparer <LcImsMsFeature>((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic));
            var peakComparer    = new AnonymousComparer <IsotopicPeak>((x, y) => x.ScanLc != y.ScanLc ? x.ScanLc.CompareTo(y.ScanLc) : x.ScanIms != y.ScanIms ? x.ScanIms.CompareTo(y.ScanIms) : x.Mz.CompareTo(y.Mz));

            // Sort the Isotopic Peaks by LC Scan, IMS Scan, and m/z to set them up for binary search later on
            peakList.Sort(peakComparer);

            var crossLinkResultList    = new List <CrossLinkResult>();
            var totalCandidatePeptides = crossLinkList.Count;

            Console.WriteLine("Searching isotopic data vs. " + totalCandidatePeptides.ToString("#,##0") + " candidate cross-linked peptides");
            lastProgress = DateTime.UtcNow;
            var crosslinkCandidatesProcessed = 0;

            // Search the data for the existence of cross-links
            foreach (var crossLink in orderedCrossLinkEnumerable)
            {
                // Calculate mass tolerance to use for binary search
                var massTolerance = massToleranceBase * crossLink.Mass / GeneralConstants.PPM_DIVISOR;

                var lowFeature = new LcImsMsFeature {
                    MassMonoisotopic = crossLink.Mass - massTolerance
                };
                var highFeature = new LcImsMsFeature {
                    MassMonoisotopic = crossLink.Mass + massTolerance
                };

                var lowFeaturePosition  = featureList.BinarySearch(lowFeature, featureComparer);
                var highFeaturePosition = featureList.BinarySearch(highFeature, featureComparer);

                lowFeaturePosition  = lowFeaturePosition < 0 ? ~lowFeaturePosition : lowFeaturePosition;
                highFeaturePosition = highFeaturePosition < 0 ? ~highFeaturePosition : highFeaturePosition;

                // Iterate over all LC-IMS-MS Features that match the Unmodified cross-link mass
                for (var i = lowFeaturePosition; i < highFeaturePosition; i++)
                {
                    var feature = featureList[i];

                    // Search for a mass shift in each of the LC Scans the unmodified cross-link mass was found
                    for (var currentScanLc = feature.ScanLcStart; currentScanLc <= feature.ScanLcEnd; currentScanLc++)
                    {
                        var crossLinkResult = new CrossLinkResult(crossLink, feature, currentScanLc);

                        var candidatePeaks  = PeakUtil.FindCandidatePeaks(peakList, feature.MzMonoisotopic, currentScanLc, feature.ScanImsRep);
                        var massShiftList   = crossLink.MassShiftList;
                        var shiftedMassList = new List <double>();

                        // Calculate the shifted mass values that we want to search for
                        switch (massShiftList.Count)
                        {
                        case 1:
                        {
                            var firstNewMass = feature.MassMonoisotopic + massShiftList[0];
                            shiftedMassList.Add(firstNewMass);
                        }
                        break;

                        case 2:
                        {
                            var firstNewMass  = feature.MassMonoisotopic + massShiftList[0];
                            var secondNewMass = feature.MassMonoisotopic + massShiftList[1];
                            var thirdNewMass  = feature.MassMonoisotopic + massShiftList[0] + massShiftList[1];

                            shiftedMassList.Add(firstNewMass);
                            shiftedMassList.Add(secondNewMass);
                            shiftedMassList.Add(thirdNewMass);
                        }
                        break;
                        }

                        // Search for shifted mass values in Isotopic Peaks
                        foreach (var shiftedMass in shiftedMassList)
                        {
                            var shiftedMz = (shiftedMass / feature.ChargeState) + GeneralConstants.MASS_OF_PROTON;

                            // Create theoretical Isotopic Peaks that will later form a theoretical Isotopic Profile
                            var theoreticalPeakList = new List <MSPeak> {
                                new MSPeak {
                                    XValue = shiftedMz, Height = 1
                                }
                            };
                            for (double k = 1; k < 4; k++)
                            {
                                theoreticalPeakList.Add(new MSPeak {
                                    XValue = shiftedMz + (k * 1.003 / feature.ChargeState), Height = (float)(1.0 - (k / 4))
                                });
                                theoreticalPeakList.Add(new MSPeak {
                                    XValue = shiftedMz - (k * 1.003 / feature.ChargeState), Height = (float)(1.0 - (k / 4))
                                });
                            }

                            // Sort peaks by m/z
                            var sortPeaksQuery = from peak in theoreticalPeakList
                                                 orderby peak.XValue
                                                 select peak;

                            // Create a theoretical Isotopic Profile for DeconTools to search for
                            var isotopicProfile = new IsotopicProfile
                            {
                                MonoIsotopicMass = shiftedMass,
                                MonoPeakMZ       = shiftedMz,
                                ChargeState      = feature.ChargeState,
                                Peaklist         = sortPeaksQuery.ToList()
                            };

                            // Search for the theoretical Isotopic Profile
                            var foundProfile = msFeatureFinder.FindMSFeature(candidatePeaks, isotopicProfile, massToleranceBase, false);

                            /*
                             * It is possible that the set mono pass of the previous theoretical distribution was the right-most peak of the actual distribution
                             * If so, we should be able to shift the theoretical distribution over to the left and find the actual distribution
                             */
                            if (foundProfile == null)
                            {
                                foreach (var msPeak in sortPeaksQuery)
                                {
                                    msPeak.XValue -= (1.003 / feature.ChargeState);
                                }

                                isotopicProfile = new IsotopicProfile
                                {
                                    MonoIsotopicMass = shiftedMass - 1.003,
                                    MonoPeakMZ       = shiftedMz - (1.003 / feature.ChargeState),
                                    ChargeState      = feature.ChargeState,
                                    Peaklist         = sortPeaksQuery.ToList()
                                };

                                foundProfile = msFeatureFinder.FindMSFeature(candidatePeaks, isotopicProfile, massToleranceBase, false);
                            }

                            // Add to results, even if we did not find it.
                            var didFindProfile = foundProfile != null;
                            crossLinkResult.MassShiftResults.KvpList.Add(new KeyValuePair <double, bool>(shiftedMass, didFindProfile));
                        }

                        crossLinkResultList.Add(crossLinkResult);
                    }
                }

                crosslinkCandidatesProcessed++;
                if (DateTime.UtcNow.Subtract(lastProgress).TotalSeconds >= 10)
                {
                    lastProgress = DateTime.UtcNow;
                    var percentComplete = crosslinkCandidatesProcessed / (double)totalCandidatePeptides * 100;

                    Console.WriteLine("Searching isotopic data; " + percentComplete.ToString("0.0") + "% complete");
                }
            }

            return(crossLinkResultList);
        }