static void Main() { int[] data = { 1, 2, 3, 4, 5, 6 }; int lastOdd = SequenceUtil.Last <int>( data, delegate(int i) { return((i % 2) == 1); }); }
public static void Run() { //Load IMDb dataset var((x_train, y_train), (x_test, y_test)) = IMDB.LoadData(); var X = np.concatenate(new NDarray[] { x_train, x_test }, axis: 0); var Y = np.concatenate(new NDarray[] { y_train, y_test }, axis: 0); Console.WriteLine("Shape of X: " + X.shape); Console.WriteLine("Shape of Y: " + Y.shape); //We can get an idea of the total number of unique words in the dataset. Console.WriteLine("Number of words: "); var hstack = np.hstack(new NDarray[] { X }); //var unique = hstack.unique(); //Console.WriteLine(np.unique(np.hstack(new NDarray[] { X })).Item1); // Load the dataset but only keep the top n words, zero the rest int top_words = 1000;// 5000; ((x_train, y_train), (x_test, y_test)) = IMDB.LoadData(num_words: top_words); int max_words = 500; x_train = SequenceUtil.PadSequences(x_train, maxlen: max_words); x_test = SequenceUtil.PadSequences(x_test, maxlen: max_words); //Create model Sequential model = new Sequential(); model.Add(new Embedding(top_words, 32, input_length: max_words)); model.Add(new Conv1D(filters: 32, kernel_size: 3, padding: "same", activation: "relu")); model.Add(new MaxPooling1D(pool_size: 2)); model.Add(new Flatten()); model.Add(new Dense(250, activation: "relu")); model.Add(new Dense(1, activation: "sigmoid")); model.Compile(loss: "binary_crossentropy", optimizer: "adam", metrics: new string[] { "accuracy" }); model.Summary(); // Fit the model model.Fit(x_train, y_train, validation_data: new NDarray[] { x_test, y_test }, epochs: 1 /*10*/, batch_size: 128, verbose: 2); // Final evaluation of the model var scores = model.Evaluate(x_test, y_test, verbose: 0); Console.WriteLine("Accuracy: " + (scores[1] * 100)); model.Save("model.h5"); File.WriteAllText("model.json", model.ToJson()); //save model //model.SaveTensorflowJSFormat("./"); //error - Cannot perform runtime binding on a null reference }
public void Predict(string text, Accord.MachineLearning.TFIDF codebook, int max_news_len) { var model = Sequential.LoadModel("best_model_gru.h5"); string result = ""; string[] words = TextUtil.TextToWordSequence(text); double[] tokens = codebook.Transform(words); var newItem = tokens.Where(value => value != 0).ToArray(); NDarray x = np.array(newItem); x = x.reshape(1, x.shape[0]); x = SequenceUtil.PadSequences(x, maxlen: max_news_len, dtype: "double"); var y = model.Predict(x); Console.WriteLine(y.str); }
private void TestNeuralNetwork(string testCsvPath, int nb_classes, Dictionary <string, int> dictionaryLikeIMDB, int max_news_len) { NDarray x_test = null; NDarray y_test = null; var testCSV = Frame.ReadCsv(testCsvPath, false, separators: ";"); var testYFloat = testCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <float>("Column1")); }).ValuesAll.ToList(); var testXString = testCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <string>("Column2")); }).ValuesAll.ToList(); var testXStringArray = testXString.ToArray(); y_test = np.array(testYFloat.ToArray()); y_test = Util.ToCategorical(y_test, nb_classes); string[][] tokens_test = testXStringArray.Tokenize(); int[][] bow_test = FrequencyDictionary.Transform(tokens_test, dictionaryLikeIMDB); //double[][] bow_test = codebook.Transform(tokens_test); var list_test = new List <NDarray>(); foreach (var item in bow_test) { //var newItem = item.Take(100).ToArray(); //var ndarray = np.array(newItem); var ndarray = np.array(item); list_test.Add(ndarray); } var sequences_test = np.array(list_test); x_test = SequenceUtil.PadSequences(sequences_test, maxlen: max_news_len); //Load model and weight var loaded_model = Sequential.ModelFromJson(File.ReadAllText("model.json")); loaded_model.LoadWeight("best_model_gru.h5"); loaded_model.Compile(optimizer: "adam", loss: "categorical_crossentropy", metrics: new string[] { "accuracy" }); loaded_model.Summary(); var scores = loaded_model.Evaluate(x_test, y_test, verbose: 0); Console.WriteLine("Test loss:" + scores[0] * 100); Console.WriteLine("Test accuracy:" + scores[1] * 100); }
public static void Predict(string text) { var model = Sequential.LoadModel("model.h5"); string result = ""; var indexes = IMDB.GetWordIndex(); string[] words = TextUtil.TextToWordSequence(text); float[] tokens = words.Select(i => ((float)indexes[i])).ToArray(); NDarray x = np.array(tokens); x = x.reshape(1, x.shape[0]); x = SequenceUtil.PadSequences(x, maxlen: 500); var y = model.Predict(x); var binary = Math.Round(y[0].asscalar <float>()); result = binary == 0 ? "Negative" : "Positive"; Console.WriteLine("Sentiment for \"{0}\": {1}", text, result); }
private (History, Sequential, Dictionary <string, int>) LearnNeuralNetwork(string trainCsvPath, int num_words, int max_news_len, int nb_classes) { NDarray x_train = null; NDarray y_train = null; var trainCSV = Frame.ReadCsv(trainCsvPath, false, separators: ";"); var trainYFloat = trainCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <float>("Column1")); }).ValuesAll.ToList(); var trainXString = trainCSV.Rows.Select(kvp => { return(kvp.Value.GetAs <string>("Column2")); }).ValuesAll.ToList(); var trainXStringArray = trainXString.ToArray(); //x_train = np.array(new float[,] { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }); y_train = np.array(trainYFloat.ToArray()); y_train = Util.ToCategorical(y_train, nb_classes); string[][] tokens = trainXStringArray.Tokenize(); var dictionaryLikeIMDB = FrequencyDictionary.Learn(tokens); var bow = FrequencyDictionary.Transform(tokens, dictionaryLikeIMDB); // Create a new TF-IDF with options: /*var codebook = new Accord.MachineLearning.TFIDF() * { * Tf = TermFrequency.Log, * Idf = InverseDocumentFrequency.Default * }; * * codebook.Learn(tokens); * * double[][] bow = codebook.Transform(tokens);*/ var list = new List <NDarray>(); foreach (var item in bow) { //var newItem = item.Take(max_news_len).ToArray(); //var ndarray = np.array(newItem); var ndarray = np.array(item); list.Add(ndarray); } var sequences = np.array(list); //x_train = SequenceUtil.PadSequences(sequences, maxlen: max_news_len, dtype: "double"); x_train = SequenceUtil.PadSequences(sequences, maxlen: max_news_len); var model = new Sequential(); model.Add(new Embedding(num_words, 32, null, null, null, null, false, max_news_len)); model.Add(new GRU(138));//16 model.Add(new Dense(12, activation: "softmax")); model.Compile(optimizer: "adam", loss: "categorical_crossentropy", metrics: new string[] { "accuracy" }); model.Summary(); var model_gru_save_path = "best_model_gru.h5"; var checkpoint_callback_gru = new ModelCheckpoint( model_gru_save_path, "val_accuracy", 1, true ); var callbacks = new List <Callback>() { checkpoint_callback_gru }; float validation_split = (float)0.1; var history_gru = model.Fit(x_train, y_train, batch_size: 128, epochs: 10, validation_split: validation_split, callbacks: callbacks.ToArray()); //Save model and weights string json = model.ToJson(); File.WriteAllText("model.json", json); return(history_gru, model, dictionaryLikeIMDB); }
/// <summary> /// Executes the cross-link search for LC-IMS-TOF data. /// </summary> /// <param name="settings">Settings object to control parameters for cross-linking.</param> /// <param name="proteinSequenceEnumerable">IEnumerable of protein sequences, as a .NET Bio ISequence object.</param> /// <param name="featureList">List of LC-IMS-MS Features, as LcImsMsFeature.</param> /// <param name="peakList">List of Isotopic Peaks, as IsotopicPeak.</param> /// <returns>An enumerable of CrossLinkResult objects.</returns> public static IList <CrossLinkResult> Execute( CrossLinkSettings settings, IEnumerable <ISequence> proteinSequenceEnumerable, List <LcImsMsFeature> featureList, List <IsotopicPeak> peakList) { var massToleranceBase = settings.MassTolerance; var maxMissedCleavages = settings.MaxMissedCleavages; var digestionRule = settings.TrypticType; CrossLinkUtil.StaticDeltaMass = settings.StaticDeltaMass; CrossLinkUtil.UseC13 = settings.UseC13; CrossLinkUtil.UseN15 = settings.UseN15; Console.WriteLine(); Console.WriteLine("Mass Tolerance: " + massToleranceBase + " ppm"); Console.WriteLine("Max missed cleavages: " + maxMissedCleavages); Console.WriteLine("Digestion rule: " + settings.TrypticType); Console.WriteLine("Delta mass uses C13: " + settings.UseC13); Console.WriteLine("Delta mass uses N15: " + settings.UseN15); Console.WriteLine("Static delta mass addon: " + settings.StaticDeltaMass + " Da"); // Used for finding Isotopic Profiles in the data var msFeatureFinder = new BasicTFF(); var crossLinkList = new List <CrossLink>(); var lastProgress = DateTime.UtcNow; var proteinsProcessed = 0; // Create CrossLink objects from all of the protein sequences foreach (var proteinSequence in proteinSequenceEnumerable) { var proteinSequenceString = new string(proteinSequence.Select((a => (char)a)).ToArray()); var proteinId = proteinSequence.ID; // Get a List of Peptides from the Protein Sequence var peptideList = SequenceUtil.DigestProtein(proteinSequenceString, digestionRule, maxMissedCleavages); // Find all possible cross links from the peptide list var crossLinkEnumerable = CrossLinkUtil.GenerateTheoreticalCrossLinks(peptideList, proteinSequenceString, proteinId); crossLinkList.AddRange(crossLinkEnumerable); proteinsProcessed++; if (DateTime.UtcNow.Subtract(lastProgress).TotalSeconds >= 15) { lastProgress = DateTime.UtcNow; Console.WriteLine("Creating cross linked peptide list; " + proteinsProcessed + " proteins processed"); } } Console.WriteLine("Sorting cross-linked peptides"); // Sort the CrossLinks by mass so that the results are ordered in a friendly way IEnumerable <CrossLink> orderedCrossLinkEnumerable = crossLinkList.OrderBy(x => x.Mass); // Sort Feature by mass so we can use binary search featureList = featureList.OrderBy(x => x.MassMonoisotopic).ToList(); // Set up a Feature Comparer and Peak Comparer to use for binary search later on var featureComparer = new AnonymousComparer <LcImsMsFeature>((x, y) => x.MassMonoisotopic.CompareTo(y.MassMonoisotopic)); var peakComparer = new AnonymousComparer <IsotopicPeak>((x, y) => x.ScanLc != y.ScanLc ? x.ScanLc.CompareTo(y.ScanLc) : x.ScanIms != y.ScanIms ? x.ScanIms.CompareTo(y.ScanIms) : x.Mz.CompareTo(y.Mz)); // Sort the Isotopic Peaks by LC Scan, IMS Scan, and m/z to set them up for binary search later on peakList.Sort(peakComparer); var crossLinkResultList = new List <CrossLinkResult>(); var totalCandidatePeptides = crossLinkList.Count; Console.WriteLine("Searching isotopic data vs. " + totalCandidatePeptides.ToString("#,##0") + " candidate cross-linked peptides"); lastProgress = DateTime.UtcNow; var crosslinkCandidatesProcessed = 0; // Search the data for the existence of cross-links foreach (var crossLink in orderedCrossLinkEnumerable) { // Calculate mass tolerance to use for binary search var massTolerance = massToleranceBase * crossLink.Mass / GeneralConstants.PPM_DIVISOR; var lowFeature = new LcImsMsFeature { MassMonoisotopic = crossLink.Mass - massTolerance }; var highFeature = new LcImsMsFeature { MassMonoisotopic = crossLink.Mass + massTolerance }; var lowFeaturePosition = featureList.BinarySearch(lowFeature, featureComparer); var highFeaturePosition = featureList.BinarySearch(highFeature, featureComparer); lowFeaturePosition = lowFeaturePosition < 0 ? ~lowFeaturePosition : lowFeaturePosition; highFeaturePosition = highFeaturePosition < 0 ? ~highFeaturePosition : highFeaturePosition; // Iterate over all LC-IMS-MS Features that match the Unmodified cross-link mass for (var i = lowFeaturePosition; i < highFeaturePosition; i++) { var feature = featureList[i]; // Search for a mass shift in each of the LC Scans the unmodified cross-link mass was found for (var currentScanLc = feature.ScanLcStart; currentScanLc <= feature.ScanLcEnd; currentScanLc++) { var crossLinkResult = new CrossLinkResult(crossLink, feature, currentScanLc); var candidatePeaks = PeakUtil.FindCandidatePeaks(peakList, feature.MzMonoisotopic, currentScanLc, feature.ScanImsRep); var massShiftList = crossLink.MassShiftList; var shiftedMassList = new List <double>(); // Calculate the shifted mass values that we want to search for switch (massShiftList.Count) { case 1: { var firstNewMass = feature.MassMonoisotopic + massShiftList[0]; shiftedMassList.Add(firstNewMass); } break; case 2: { var firstNewMass = feature.MassMonoisotopic + massShiftList[0]; var secondNewMass = feature.MassMonoisotopic + massShiftList[1]; var thirdNewMass = feature.MassMonoisotopic + massShiftList[0] + massShiftList[1]; shiftedMassList.Add(firstNewMass); shiftedMassList.Add(secondNewMass); shiftedMassList.Add(thirdNewMass); } break; } // Search for shifted mass values in Isotopic Peaks foreach (var shiftedMass in shiftedMassList) { var shiftedMz = (shiftedMass / feature.ChargeState) + GeneralConstants.MASS_OF_PROTON; // Create theoretical Isotopic Peaks that will later form a theoretical Isotopic Profile var theoreticalPeakList = new List <MSPeak> { new MSPeak { XValue = shiftedMz, Height = 1 } }; for (double k = 1; k < 4; k++) { theoreticalPeakList.Add(new MSPeak { XValue = shiftedMz + (k * 1.003 / feature.ChargeState), Height = (float)(1.0 - (k / 4)) }); theoreticalPeakList.Add(new MSPeak { XValue = shiftedMz - (k * 1.003 / feature.ChargeState), Height = (float)(1.0 - (k / 4)) }); } // Sort peaks by m/z var sortPeaksQuery = from peak in theoreticalPeakList orderby peak.XValue select peak; // Create a theoretical Isotopic Profile for DeconTools to search for var isotopicProfile = new IsotopicProfile { MonoIsotopicMass = shiftedMass, MonoPeakMZ = shiftedMz, ChargeState = feature.ChargeState, Peaklist = sortPeaksQuery.ToList() }; // Search for the theoretical Isotopic Profile var foundProfile = msFeatureFinder.FindMSFeature(candidatePeaks, isotopicProfile, massToleranceBase, false); /* * It is possible that the set mono pass of the previous theoretical distribution was the right-most peak of the actual distribution * If so, we should be able to shift the theoretical distribution over to the left and find the actual distribution */ if (foundProfile == null) { foreach (var msPeak in sortPeaksQuery) { msPeak.XValue -= (1.003 / feature.ChargeState); } isotopicProfile = new IsotopicProfile { MonoIsotopicMass = shiftedMass - 1.003, MonoPeakMZ = shiftedMz - (1.003 / feature.ChargeState), ChargeState = feature.ChargeState, Peaklist = sortPeaksQuery.ToList() }; foundProfile = msFeatureFinder.FindMSFeature(candidatePeaks, isotopicProfile, massToleranceBase, false); } // Add to results, even if we did not find it. var didFindProfile = foundProfile != null; crossLinkResult.MassShiftResults.KvpList.Add(new KeyValuePair <double, bool>(shiftedMass, didFindProfile)); } crossLinkResultList.Add(crossLinkResult); } } crosslinkCandidatesProcessed++; if (DateTime.UtcNow.Subtract(lastProgress).TotalSeconds >= 10) { lastProgress = DateTime.UtcNow; var percentComplete = crosslinkCandidatesProcessed / (double)totalCandidatePeptides * 100; Console.WriteLine("Searching isotopic data; " + percentComplete.ToString("0.0") + "% complete"); } } return(crossLinkResultList); }