public void UpdateTrainingQuery() { CloudPakForDataAuthenticator authenticator = new CloudPakForDataAuthenticator( url: "https://{cpd_cluster_host}{:port}", username: "******", password: "******" ); DiscoveryService service = new DiscoveryService("2019-11-22", authenticator); service.SetServiceUrl("{https://{cpd_cluster_host}{:port}/discovery/{release}/instances/{instance_id}/api}"); var newFilter = "field:1"; TrainingExample newTrainingExample = new TrainingExample() { CollectionId = "{collection_id}", DocumentId = "{document_id}" }; var result = service.UpdateTrainingQuery( projectId: "{project_id}", queryId: "{query_id}", naturalLanguageQuery: "This is a new example of a query", examples: new List <TrainingExample>() { newTrainingExample }, filter: newFilter ); }
public IEnumerator TestUpdateTrainingQuery() { Log.Debug("DiscoveryServiceV2IntegrationTests", "Attempting to UpdateTrainingQuery..."); TrainingExample trainingExample = new TrainingExample() { CollectionId = collectionId, DocumentId = documentId }; TrainingQuery trainingQueryResponse = null; service.UpdateTrainingQuery( callback: (DetailedResponse <TrainingQuery> response, IBMError error) => { Log.Debug("DiscoveryServiceV2IntegrationTests", "UpdateTrainingQuery result: {0}", response.Response); trainingQueryResponse = response.Result; queryId = trainingQueryResponse.QueryId; Assert.IsNotNull(trainingQueryResponse); Assert.IsNull(error); }, projectId: projectId, queryId: queryId, examples: new List <TrainingExample>() { trainingExample }, filter: "entities.text:IBM", naturalLanguageQuery: "This is a new example of a query" ); while (trainingQueryResponse == null) { yield return(null); } }
public void CreateTrainingQuery() { CloudPakForDataAuthenticator authenticator = new CloudPakForDataAuthenticator( url: "https://{cpd_cluster_host}{:port}", username: "******", password: "******" ); TrainingExample trainingExample = new TrainingExample() { CollectionId = "{collection_id}", DocumentId = "{document_id}" }; DiscoveryService service = new DiscoveryService("2019-11-22", authenticator); service.SetServiceUrl("{https://{cpd_cluster_host}{:port}/discovery/{release}/instances/{instance_id}/api}"); var result = service.CreateTrainingQuery( projectId: "{project_id}", examples: new List <TrainingExample>() { trainingExample }, naturalLanguageQuery: "This is an example of a query" ); Console.WriteLine(result.Response); }
public void Cost_With_Testdata() { TrainingExample[] ex0 = new TrainingExample[] { new TrainingExample(new double[] { }, new double[] { }) }; double expected0 = 0; TrainingExample[] ex1 = new TrainingExample[] { new TrainingExample(new double[] { 4, 3, 5 }, new double[] { 1, 2, 3 }) }; double expected1 = Math.Pow(1 - 4 / 2d, 2) + Math.Pow(2 - 3 / 2d, 2) + Math.Pow(3 - 5 / 2d, 2); TrainingExample[] ex2 = new TrainingExample[] { new TrainingExample(new double[] { 4, 3, 5, 3 }, new double[] { 3, 4, 4, 5 }), new TrainingExample(new double[] { 2, 3, 4, 2 }, new double[] { 8, 4, 6, 4 }), new TrainingExample(new double[] { 1, 5, 2, 3 }, new double[] { 5, 4, 7, 4 }) }; double expected2 = (Math.Pow(3 - 4 / 2d, 2) + Math.Pow(4 - 3 / 2d, 2) + Math.Pow(4 - 5 / 2d, 2) + Math.Pow(5 - 3 / 2d, 2)) + (Math.Pow(8 - 2 / 2d, 2) + Math.Pow(4 - 3 / 2d, 2) + Math.Pow(6 - 4 / 2d, 2) + Math.Pow(4 - 2 / 2d, 2)) + (Math.Pow(5 - 1 / 2d, 2) + Math.Pow(4 - 5 / 2d, 2) + Math.Pow(7 - 2 / 2d, 2) + Math.Pow(4 - 3 / 2d, 2)); TrainingExample[] ex3 = new TrainingExample[] { new TrainingExample(new double[] { 9.234 }, new double[] { 4.5443 }) }; double expected3 = Math.Pow(4.5443 - 9.234 / 2d, 2); Assert.Equal(expected0, Cost(ex0, xs => xs.Select(x => x / 2d).ToArray())); Assert.Equal(expected1, Cost(ex1, xs => xs.Select(x => x / 2d).ToArray())); Assert.Equal(expected2, Cost(ex2, xs => xs.Select(x => x / 2d).ToArray())); Assert.Equal(expected3, Cost(ex3, xs => xs.Select(x => x / 2d).ToArray())); }
public IEnumerator TestGetTrainingQuery() { Log.Debug("DiscoveryServiceV2IntegrationTests", "Attempting to GetTrainingQuery..."); TrainingExample trainingExample = new TrainingExample() { CollectionId = collectionId, DocumentId = documentId }; TrainingQuery trainingQueryResponse = null; service.GetTrainingQuery( callback: (DetailedResponse <TrainingQuery> response, IBMError error) => { Log.Debug("DiscoveryServiceV2IntegrationTests", "GetTrainingQuery result: {0}", response.Response); trainingQueryResponse = response.Result; Assert.IsNotNull(trainingQueryResponse); Assert.IsNull(error); }, projectId: projectId, queryId: queryId ); while (trainingQueryResponse == null) { yield return(null); } }
/// <summary> /// One hot encodes the REBER strings /// </summary> /// <param name="strList">A list of REBER sequences</param> /// <returns>A sequence of tuples of { input, output }</returns> public static IEnumerable <TrainingExample[]> GetOneHot(IEnumerable <string> strList) { // build the following item table HashSet <int> temp; var following = new Dictionary <string, HashSet <int> >(); foreach (var str in strList) { var sb = new StringBuilder(); string prev = null; foreach (var ch in str) { sb.Append(ch); var key = sb.ToString(); if (prev != null) { if (!following.TryGetValue(prev, out temp)) { following.Add(prev, temp = new HashSet <int>()); } temp.Add(_ch[ch]); } prev = key; } } var ret = new List <TrainingExample[]>(); foreach (var str in strList) { var sequence = new TrainingExample[str.Length]; var sb = new StringBuilder(); for (var i = 0; i < str.Length; i++) { var ch = str[i]; sb.Append(ch); var input = new float[_ch.Count]; var output = new float[_ch.Count]; input[_ch[ch]] = 1f; if (following.ContainsKey(sb.ToString())) { foreach (var item in following[sb.ToString()]) { output[item] = 1f; } } sequence[i] = new TrainingExample(input, output); } ret.Add(sequence); } return(ret); }
public void CreateTrainingExample() { Console.WriteLine(string.Format("\nCalling CreateTrainingExample()...")); var trainingExample = new TrainingExample() { DocumentId = _createdDocumentId, Relevance = 1 }; var result = _discovery.CreateTrainingExample(_existingEnvironmentId, _createdCollectionId, _createdTrainingQueryId, trainingExample); if (result != null) { Console.WriteLine(JsonConvert.SerializeObject(result, Formatting.Indented)); _createdTrainingExampleId = result.DocumentId; } else { Console.WriteLine("result is null."); } }
public void AddTrainingExample() { TrainingExample newExample = new TrainingExample(); newExample.input = new double[3 * inputs.Length]; for (int i = 0; i < inputs.Length; i++) { newExample.input[3 * i] = inputs[i].position.x; newExample.input[3 * i + 1] = inputs[i].position.y; newExample.input[3 * i + 2] = inputs[i].position.z; } newExample.output = new double[outputs.Length]; for (int i = 0; i < outputs.Length; i++) { newExample.output[i] = outputs[i]; } //Array.Resize<TrainingExample>(ref trainingExamples, trainingExamples.Length + 1); //trainingExamples[trainingExamples.Length - 1] = newExample; trainingExamples.Add(newExample); }
public static void DecimalBinaryExample() { int[] layers = new int[] { 2, 5, 4 }; FeedforwardNeuralNetwork fnn = new FeedforwardNeuralNetwork(layers, 1.0F, 0.1F); DecimalBinaryTestNetwork(fnn); Matrix[] expectedOutputs = { new Matrix(new float[4,1]{ {1}, {0}, {0}, {0}}), new Matrix(new float[4,1]{ {0}, {1}, {0}, {0}}), new Matrix(new float[4,1]{ {0}, {0}, {1}, {0}}), new Matrix(new float[4,1]{ {0}, {0}, {0}, {1}})}; Matrix[] inputs = { new Matrix(new float[2,1]{ {0}, {0}}), new Matrix(new float[2,1]{ {0}, {1}}), new Matrix(new float[2,1]{ {1}, {0}}), new Matrix(new float[2,1]{ {1}, {1}})}; TrainingExample[] examples = new TrainingExample[4]; for (int i = 0; i < 4; ++i) { examples[i] = new TrainingExample(inputs[i], expectedOutputs[i]); } fnn.TrainEpochs(examples, 1000); DecimalBinaryTestNetwork(fnn); }
public static void LogicGatesExample() { //First layer is input layer //Initialize a 2-layer ANN //Two inputs, one output int[] layers = new int[] { 2, 3, 1 }; FeedforwardNeuralNetwork fnn = new FeedforwardNeuralNetwork(layers, 1.0F, 0.1F); //Train this many cycles int numTrainingEpochs = 10000; TrainingExample ex1 = new TrainingExample(new Matrix(new float[,]{ { 0 }, { 0 } }), new Matrix(new float[,]{ { 0 } })); TrainingExample ex2 = new TrainingExample(new Matrix(new float[,]{ { 0 }, { 1 } }), new Matrix(new float[,]{ { 1 } })); TrainingExample ex3 = new TrainingExample(new Matrix(new float[,]{ { 1 }, { 0 } }), new Matrix(new float[,]{ { 1 } })); TrainingExample ex4 = new TrainingExample(new Matrix(new float[,]{ { 1 }, { 1 } }), new Matrix(new float[,]{ { 0 } })); TrainingExample[] trainingExamples = { ex1, ex2, ex3, ex4 }; for (int i = 0; i < numTrainingEpochs; ++i) { //Sets input matrices and output matrices and trains the network accordingly for all combinations Random rand = new Random(); trainingExamples = trainingExamples.OrderBy(x => rand.Next()).ToArray(); for (int j = 0; j < 4; ++j) { fnn.TrainIteration(trainingExamples[j].input, trainingExamples[j].expectedOutput, 1 - (1.0F * 0.1F / 4)); } } //After training, evaluates the network with respect to all possible inputs //Prints their outputs to the console to see the result of training float[,] inputArray = new float[,] { { 0 }, { 0 } }; Matrix input = new Matrix(inputArray); Matrix output = fnn.Evaluate(input); Console.WriteLine(output[1, 1]); inputArray = new float[,] { { 0 }, { 1 } }; input = new Matrix(inputArray); output = fnn.Evaluate(input); Console.WriteLine(output[1, 1]); inputArray = new float[,] { { 1 }, { 0 } }; input = new Matrix(inputArray); output = fnn.Evaluate(input); Console.WriteLine(output[1, 1]); inputArray = new float[,] { { 1 }, { 1 } }; input = new Matrix(inputArray); output = fnn.Evaluate(input); Console.WriteLine(output[1, 1]); }
private static void MNISTTrainNetwork(FeedforwardNeuralNetwork fnn, TrainingExample[] trainingExamples, TrainingExample[] testExamples) { Console.WriteLine("\nBeginning neural network training procedure.\n"); Console.WriteLine("Enter desired number of epochs."); string epochsStr = Console.ReadLine(); int epochs = Convert.ToInt32(epochsStr); do { Console.WriteLine("Please wait for training cycle to complete..."); var timer = Stopwatch.StartNew(); fnn.TrainEpochs(trainingExamples, epochs); Console.WriteLine("Training cycle completed in " + timer.Elapsed + "."); Console.WriteLine("Test network? Enter Y for yes or anything else for no."); string inputStr = Console.ReadLine(); char input = Convert.ToChar(inputStr); if (input == 'Y') { MNISTTestNetwork(fnn, testExamples); } Console.WriteLine("Continue training? Enter 0 for no or a valid number of epochs for yes."); epochsStr = Console.ReadLine(); epochs = Convert.ToInt32(epochsStr); } while (epochs > 0); }
public void TrainEpochs(TrainingExample[] examples, int numEpochs) { Console.WriteLine("\n"); for (int i = 0; i < numEpochs; ++i) { Console.WriteLine("Beginning epoch " + (i + 1) + " of " + numEpochs + "."); TrainDataSet(examples); } }
public static void MNISTTestNetwork(FeedforwardNeuralNetwork fnn, TrainingExample[] testExamples) { try { Console.WriteLine("\nBeginning neural network test procedure.\n"); int numSuccesses = 0; int numImages = testExamples.Length; int expectedNum = 0; for (int r = 0; r < numImages; ++r) { for (int i = 1; i <= 10; ++i) { if (testExamples[r].expectedOutput[i, 1] == 1) { expectedNum = i % 10; } } Matrix actualOutput = fnn.Evaluate(testExamples[r].input); float largestOutputValue = 0; int index = 0; for (int j = 1; j <= 10; ++j) { if (actualOutput[j, 1] > largestOutputValue) { largestOutputValue = actualOutput[j, 1]; index = j; } } index %= 10; if (index == expectedNum) { ++numSuccesses; } //Console.WriteLine("Test value: " + expectedNum); //Console.WriteLine("Network detected: " + (index)); if ((r + 1) % 1000 == 0) { Console.WriteLine((r + 1) / 100 + "00 test images processed. Current success percentage: " + (float)numSuccesses/(r+1) * 100 + "%"); } } Console.WriteLine("\nTest regimen completed. Network was correct for " + (float)numSuccesses / numImages * 100 + "% of " + numImages + " images.\n"); } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
public TrainingExample CreateTrainingExample(string environmentId, string collectionId, string queryId, TrainingExample body) { try { var result = DiscoveryRepository.CreateTrainingExample(environmentId, collectionId, queryId, body); return(result); } catch (Exception ex) { Logger.Error("DiscoveryService.CreateTrainingExample failed", this, ex); } return(null); }
//[UnityTest, Order(103)] public IEnumerator TestDeleteTrainingQuery() { Log.Debug("DiscoveryServiceV2IntegrationTests", "Attempting to DeleteTrainingQuery..."); DocumentAccepted addDocumentResponse = null; string documentId = ""; string queryId = ""; using (FileStream fs = File.OpenRead(addDocumentFile)) { using (MemoryStream ms = new MemoryStream()) { fs.CopyTo(ms); service.AddDocument( callback: (DetailedResponse <DocumentAccepted> response, IBMError error) => { Log.Debug("DiscoveryServiceV1IntegrationTests", "AddDocument result: {0}", response.Response); addDocumentResponse = response.Result; documentId = addDocumentResponse.DocumentId; Assert.IsNotNull(addDocumentResponse); Assert.IsNotNull(documentId); Assert.IsNull(error); }, projectId: projectId, collectionId: collectionId, file: ms, fileContentType: Utility.GetMimeType(Path.GetExtension(addDocumentFile)), filename: Path.GetFileName(addDocumentFile) ); while (addDocumentResponse == null) { yield return(null); } } } TrainingExample trainingExample = new TrainingExample() { CollectionId = collectionId, DocumentId = documentId, Relevance = 1L }; TrainingQuery trainingQueryResponse = null; service.CreateTrainingQuery( callback: (DetailedResponse <TrainingQuery> response, IBMError error) => { Log.Debug("DiscoveryServiceV2IntegrationTests", "CreateTrainingQuery result: {0}", response.Response); trainingQueryResponse = response.Result; queryId = trainingQueryResponse.QueryId; Assert.IsNotNull(trainingQueryResponse); Assert.IsNull(error); }, projectId: projectId, examples: new List <TrainingExample>() { trainingExample }, filter: "entities.text:IBM", naturalLanguageQuery: "This is an example of a query" ); while (trainingQueryResponse == null) { yield return(null); } bool deleteTrainingQueryResponse = false; service.DeleteTrainingQuery( callback: (DetailedResponse <object> response, IBMError error) => { Assert.IsNull(error); deleteTrainingQueryResponse = true; }, projectId: projectId, queryId: queryId ); while (!deleteTrainingQueryResponse) { yield return(null); } DeleteDocumentResponse deleteDocumentResponse = null; service.DeleteDocument( callback: (DetailedResponse <DeleteDocumentResponse> response, IBMError error) => { Log.Debug("DiscoveryServiceV2IntegrationTests", "DeleteDocument result: {0}", response.Response); deleteDocumentResponse = response.Result; Assert.IsNotNull(deleteDocumentResponse); Assert.IsNull(error); }, projectId: projectId, collectionId: collectionId, documentId: documentId ); while (deleteDocumentResponse == null) { yield return(null); } }
/// <summary> /// Choosing the substitutions for target words. This means we rank the candidates according to several features (similarity with target word, information content reduction, similarity with context words, ...) /// </summary> /// <param name="document">The document whose text is to be simplified</param> /// <param name="substCandidates">All the pairs of target words and collected candidate replacements</param> /// <param name="contextSize">The size of the context of the target word to be considered for measuring the similarity between candidate words and target word context</param> /// <param name="noSubstitutionWords">Stopwords, never to be considered for simplification</param> /// <param name="similarityTreshold">The treshold for semantic similarity between target word and candidate replacement</param> /// <param name="icReplacementCandidateTreshold">Information content treshold for replacing the target word</param> /// <param name="word"></param> /// <returns>The list of substitutions (tuple of target token and candidate replacement word)</returns> public List <Tuple <TokenAnnotation, string> > GetSubstitutions(Document document, List <Tuple <TokenAnnotation, List <Tuple <string, double> > > > substCandidates, int contextSize, List <string> noSubstitutionWords, double similarityTreshold, double icReplacementCandidateTreshold, string word = null) { EngMorphology morph = new EngMorphology(); List <Tuple <TokenAnnotation, string> > substitutions = new List <Tuple <TokenAnnotation, string> >(); List <string> metrics = new List <string> { "sim", "ic-diff", "context-sim", "lm-bigram-pre", "lm-bigram-post", "lm-trigram-pre", "lm-trigram-post" }; //List<string> metrics = new List<string> { "sim", "lm-bigram-pre", "lm-bigram-post", /*"ic-diff", "lm-trigram-pre", "lm-trigram-post"*/ }; substCandidates.ForEach(sc => { if (!noSubstitutionWords.Contains(sc.Item1.Text.ToLower())) { if (word == null || sc.Item1.Text.ToLower() == word) { Dictionary <string, Dictionary <string, double> > scores = new Dictionary <string, Dictionary <string, double> >(); var targetToken = sc.Item1; var sentence = document.Sentences.Where(s => s.Tokens.Any(t => t.StartPosition == targetToken.StartPosition && t.Text == targetToken.Text)).Single(); var targetTokenCopy = sentence.Tokens.Where(t => t.StartPosition == targetToken.StartPosition && t.Text == targetToken.Text).Single(); var preceedingSentencePart = sentence.Text.Substring(0, targetTokenCopy.StartPositionSentence); var followingSentencePart = sentence.Text.Substring(targetTokenCopy.StartPositionSentence + targetTokenCopy.Text.Length); var targetLemmaIC = InformationContent.GetRelativeInformationContent(targetToken.Lemma.ToLower()); var targetWordIC = InformationContent.GetRelativeInformationContent(targetToken.Text.ToLower()); var targetContextTokens = sentence.Tokens.Where(t => Math.Abs(sentence.Tokens.IndexOf(t) - targetTokenCopy.SentenceIndex) > 0 && Math.Abs(sentence.Tokens.IndexOf(t) - targetTokenCopy.SentenceIndex) <= contextSize && t.IsContent()).ToList(); var targetCtxtSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), targetToken.Lemma.ToLower())).Where(x => x >= -1).ToList(); var targetContextSimilarity = targetCtxtSimilarities.Count > 0 ? targetCtxtSimilarities.Average() : 0; if (sc.Item2 != null) { sc.Item2.ForEach(candidate => { try { var candidateLemmaIC = InformationContent.GetRelativeInformationContent(candidate.Item1.ToLower()); string key = candidate.Item1 + "<->" + targetToken.POSTag; //var candidateInPOS = EngMorphology.GetForm(candidate.Item1, targetToken.POSTag); //if (!CandidateInPoSLookup.ContainsKey(key)) CandidateInPoSLookup.Add(key, candidateInPOS); var candidateInPOS = CandidateInPoSLookup.ContainsKey(key) ? CandidateInPoSLookup[key] : candidate.Item1; var candidateWordIC = !string.IsNullOrEmpty(candidateInPOS) ? InformationContent.GetRelativeInformationContent(candidateInPOS.ToLower()) : 1; var candidateIC = candidateWordIC == 1 ? candidateLemmaIC : candidateWordIC; var targetIC = targetWordIC == 1 ? targetLemmaIC : targetWordIC; if (!string.IsNullOrEmpty(candidateInPOS) && targetLemmaIC > icReplacementCandidateTreshold && (candidateIC < targetIC /*|| Math.Abs(targetIC - candidateIC) < 0.05*/)) { var artificialSentence = preceedingSentencePart + candidateInPOS + followingSentencePart; var artTokens = (new EngPOSTagger()).Annotate(artificialSentence).Select(x => (TokenAnnotation)x).ToList(); morph.AnnotateMorphology(artTokens); var candidateToken = artTokens.Where(x => x.StartPositionSentence == targetTokenCopy.StartPositionSentence /*&& x.Text == candidateInPOS*/).Single(); var candidateContextSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), candidateToken.Lemma.ToLower())).Where(x => x >= -1).ToList(); var candidateContextSimilarity = candidateContextSimilarities.Count > 0 ? candidateContextSimilarities.Average() : targetContextSimilarity; // POS-tag compatibility is a second prerequisite bool sameWord = candidate.Item1.Contains(targetToken.Text) || targetToken.Text.Contains(candidate.Item1) || candidate.Item1.Contains(targetToken.Lemma) || targetToken.Lemma.Contains(candidate.Item1) || candidateInPOS.Contains(targetToken.Text) || targetToken.Text.Contains(candidateInPOS) || candidateInPOS.Contains(targetToken.Lemma) || targetToken.Lemma.Contains(candidateInPOS); bool sameAsContext = targetContextTokens.Any(ct => candidate.Item1.Contains(ct.Text) || ct.Text.Contains(candidate.Item1) || candidate.Item1.Contains(ct.Lemma) || ct.Lemma.Contains(candidate.Item1) || candidateInPOS.Contains(ct.Text) || ct.Text.Contains(candidateInPOS) || candidateInPOS.Contains(ct.Lemma) || ct.Lemma.Contains(candidateInPOS)); if (candidate.Item2 >= similarityTreshold && (candidateToken.POSTag == targetToken.POSTag) && !sameWord && !sameAsContext) { if (!scores.ContainsKey(candidateInPOS)) { scores.Add(candidateInPOS, new Dictionary <string, double>()); scores[candidateInPOS].Add("sim", candidate.Item2); scores[candidateInPOS].Add("ic-diff", targetIC - candidateIC); scores[candidateInPOS].Add("context-sim", candidateContextSimilarity); scores[candidateInPOS].Add("length", candidateInPOS.Length); var tokenIndex = sentence.Tokens.IndexOf(targetTokenCopy); // bigram LM if (tokenIndex > 0) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 1].Text.ToLower(), candidateInPOS); scores[candidateInPOS].Add("lm-bigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-bigram-pre", 0); } if (tokenIndex < sentence.Tokens.Count - 1) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(candidateInPOS, sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 1].Text.ToLower()); scores[candidateInPOS].Add("lm-bigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-bigram-post", 0); } // trigram LM if (tokenIndex > 1) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 2].Text.ToLower(), sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 1].Text.ToLower(), candidateInPOS); scores[candidateInPOS].Add("lm-trigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-trigram-pre", 0); } if (tokenIndex < sentence.Tokens.Count - 2) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(candidateInPOS, sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 1].Text.ToLower(), sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 2].Text.ToLower()); scores[candidateInPOS].Add("lm-trigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidateInPOS].Add("lm-trigram-post", 0); } } } } } catch { } }); } LastSubstitutionCandidates = new List <Tuple <TokenAnnotation, List <string> > >(); LastSubstitutionCandidates.Add(new Tuple <TokenAnnotation, List <string> >(targetToken, scores.Select(x => x.Key).ToList())); if (scores.Count > 0) { var allRanks = new List <Dictionary <string, int> >(); metrics.ForEach(m => { var featDict = scores.ToDictionary(x => x.Key, x => x.Value[m]); allRanks.Add(TrainingExample.RankExamplesByNumericFeature(featDict, m == "length")); }); var allCandidates = scores.Select(x => x.Key).ToList(); Dictionary <string, double> averageRankings = allCandidates.ToDictionary(x => x, x => allRanks.Select(r => r[x]).Average()); var finalRanking = averageRankings.OrderBy(r => r.Value).ToList(); double topScore = finalRanking[0].Value; var equal = new List <string>(); finalRanking.ForEach(fr => { if (fr.Value == topScore) { equal.Add(fr.Key); } }); var finalChoice = equal.Where(eq => equal.Where(eq2 => eq2 != eq).All(eq2 => scores[eq]["sim"] >= scores[eq2]["sim"])).First(); substitutions.Add(new Tuple <TokenAnnotation, string>(targetToken, finalChoice)); } } } }); return(substitutions); }
/// <summary> /// Oredring the candidates for the ranking task, when the candidates are given. The ordering is somewhat different /// </summary> /// <param name="document">The document whose text is to be simplified</param> /// <param name="substitutionCandidates">Substitution candidates</param> /// <param name="target">Target word</param> /// <param name="contextSize">The size of the context of the target word to be compared semantically with candidate replacements</param> /// <returns></returns> public List <string> OrderGivenSubstitutionCandidates(Document document, List <string> substitutionCandidates, string target, int contextSize) { EngMorphology morph = new EngMorphology(); List <Tuple <TokenAnnotation, string> > substitutions = new List <Tuple <TokenAnnotation, string> >(); //List<string> metrics = new List<string> { "sim", "ic-diff", "context-sim", "length", "lm-bigram-pre", "lm-bigram-post", "lm-trigram-pre", "lm-trigram-post" }; List <string> metrics = new List <string> { "context-sim" /*, "lm-bigram-pre", "lm-bigram-post"*/, "ic-diff" /*, "lm-trigram-pre", "lm-trigram-post"*/ }; //List<string> metrics = new List<string> { "ic-diff" }; Dictionary <string, string> candidateChanges = new Dictionary <string, string>(); var targetToken = document.AllTokens.Where(t => t.Text == target).Last(); var preceedingSentencePart = document.Text.Substring(0, targetToken.StartPositionSentence); var followingSentencePart = document.Text.Substring(targetToken.StartPositionSentence + targetToken.Text.Length); var targetLemmaIC = InformationContent.GetRelativeInformationContent(targetToken.Lemma.ToLower()); var targetWordIC = InformationContent.GetRelativeInformationContent(targetToken.Text.ToLower()); var targetContextTokens = document.AllTokens.Where(t => Math.Abs(document.AllTokens.IndexOf(t) - targetToken.SentenceIndex) > 0 && Math.Abs(document.AllTokens.IndexOf(t) - targetToken.SentenceIndex) <= contextSize && t.IsContent()).ToList(); Dictionary <string, Dictionary <string, double> > scores = new Dictionary <string, Dictionary <string, double> >(); substitutionCandidates.ForEach(candidate => { var candidateText = candidate; if (candidateText.Contains(",")) { var splitCand = candidateText.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries).ToList(); string selected = string.Empty; double maxIC = double.MinValue; splitCand.ForEach(sc => { var scic = InformationContent.GetRelativeInformationContent(sc.Trim().ToLower()); if (scic > maxIC) { selected = sc.Trim(); maxIC = scic; } }); candidateText = selected; } if (candidateText.Trim().Contains(" ")) { var tokens = (new EngPOSTagger()).Annotate(candidateText.Trim()).ToList(); var contentTokens = tokens.Where(x => ((TokenAnnotation)x).IsContent()).ToList(); var change = contentTokens.Count > 0 ? ((TokenAnnotation)(contentTokens.Last())).Text.Trim() : ((TokenAnnotation)(tokens.First())).Text.Trim(); candidateText = change; } var candidateLemmaIC = InformationContent.GetRelativeInformationContent(candidateText.ToLower()); var candidateWordIC = !string.IsNullOrEmpty(candidateText) ? InformationContent.GetRelativeInformationContent(candidateText.ToLower()) : 1; var candidateIC = candidateWordIC == 1 ? candidateLemmaIC : candidateWordIC; var targetIC = targetWordIC == 1 ? targetLemmaIC : targetWordIC; var candidateContextSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), candidateText.ToLower())).Where(x => x >= -1).ToList(); var candidateContextSimilarity = candidateContextSimilarities.Count > 0 ? candidateContextSimilarities.Average() : 0; scores.Add(candidate, new Dictionary <string, double>()); var sim = VectorSpace.Similarity(targetToken.Text.ToLower().Trim(), candidateText.ToLower().Trim()); if (sim < 1) { scores[candidate].Add("sim", sim); } scores[candidate].Add("ic-diff", candidateIC); scores[candidate].Add("context-sim", candidateContextSimilarity); scores[candidate].Add("length", candidateText.Length); var tokenIndex = document.AllTokens.IndexOf(targetToken); // bigram LM if (tokenIndex > 0) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(document.AllTokens[document.AllTokens.IndexOf(targetToken) - 1].Text.ToLower(), candidateText); scores[candidate].Add("lm-bigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-bigram-pre", 0); } if (tokenIndex < document.AllTokens.Count - 1) { var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(candidateText, document.AllTokens[document.AllTokens.IndexOf(targetToken) + 1].Text.ToLower()); scores[candidate].Add("lm-bigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-bigram-pre", 0); } // trigram LM if (tokenIndex > 1) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(document.AllTokens[document.AllTokens.IndexOf(targetToken) - 2].Text.ToLower(), document.AllTokens[document.AllTokens.IndexOf(targetToken) - 1].Text.ToLower(), candidateText); scores[candidate].Add("lm-trigram-pre", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-trigram-pre", 0); } if (tokenIndex < document.AllTokens.Count - 2) { var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(candidateText, document.AllTokens[document.AllTokens.IndexOf(targetToken) + 1].Text.ToLower(), document.AllTokens[document.AllTokens.IndexOf(targetToken) + 2].Text.ToLower()); scores[candidate].Add("lm-trigram-post", lmScore.HasValue ? lmScore.Value : -100); } else { scores[candidate].Add("lm-trigram-post", 0); } }); LastSubstitutionCandidates = new List <Tuple <TokenAnnotation, List <string> > >(); LastSubstitutionCandidates.Add(new Tuple <TokenAnnotation, List <string> >(targetToken, scores.Select(x => x.Key).ToList())); var allRanks = new List <Dictionary <string, int> >(); metrics.ForEach(m => { var featDict = scores.Where(x => x.Value.ContainsKey(m)).ToDictionary(x => x.Key, x => x.Value[m]); allRanks.Add(TrainingExample.RankExamplesByNumericFeature(featDict, m == "length" || m == "ic-diff")); }); var allCandidates = scores.Select(x => x.Key).ToList(); Dictionary <string, double> averageRankings = allCandidates.ToDictionary(x => x, x => allRanks.Where(y => y.ContainsKey(x)).Select(r => r[x]).Average()); return(averageRankings.OrderBy(r => r.Value).Select(x => x.Key).ToList()); }
private static void displayImage(TrainingExample example) { for (int i = 1; i <= 784; ++i) { if (i - 1 % 28 == 0) { Console.Write("\n"); } Console.Write(example.input[i, 1] + ", "); } Console.Write("\n"); string s = ""; for (int i = 1; i <= 784; ++i) { if (example.input[i, 1] == 0) { s += " "; } else if (example.input[i, 1] < 0.5F) { s += "."; } else if (example.input[i, 1] <= 1.0F) { s += "O"; } if (i % 28 == 0) { s += "\n"; } } Console.WriteLine(s); Console.WriteLine("\n\n" + example.expectedOutput[1, 1] + " " + example.expectedOutput[2, 1] + " " + example.expectedOutput[3, 1] + " " + example.expectedOutput[4, 1] + " " + example.expectedOutput[5, 1] + " " + example.expectedOutput[6, 1] + " " + example.expectedOutput[7, 1] + " " + example.expectedOutput[8, 1] + " " + example.expectedOutput[9, 1] + " " + example.expectedOutput[10, 1] + " "); }
private static TrainingExample[] GetTrainingExamples() { try { Console.WriteLine("Searching for training datasets."); FileStream labelsStream = new FileStream(@"E:\Users\Alexander Weaver\My Documents\Programs\MNIST\train-labels.idx1-ubyte", FileMode.Open); FileStream imagesStream = new FileStream(@"E:\Users\Alexander Weaver\My Documents\Programs\MNIST\train-images.idx3-ubyte", FileMode.Open); Console.WriteLine("Training datasets found."); BinaryReader labelsReader = new BinaryReader(labelsStream); BinaryReader imagesReader = new BinaryReader(imagesStream); int magic1 = imagesReader.ReadInt32(); int numImages = (imagesReader.ReadByte() << 24) | (imagesReader.ReadByte() << 16) | (imagesReader.ReadByte() << 8) | (imagesReader.ReadByte()); int numRows = imagesReader.ReadInt32(); int numColumns = imagesReader.ReadInt32(); int magic2 = labelsReader.ReadInt32(); int numLabels = labelsReader.ReadInt32(); Console.WriteLine("Popluating training examples."); TrainingExample[] trainingExamples = new TrainingExample[numImages]; //List<TrainingExample> trainingExamples = new List<TrainingExample>(); for (int r = 0; r < numImages; ++r) { Matrix input = new Matrix(784, 1); Matrix expectedOutput = new Matrix(10, 1); for (int i = 1; i <= 784; ++i) { byte b = imagesReader.ReadByte(); input[i, 1] = (float)b / (float)256; } int expectedNum = labelsReader.ReadByte(); if (expectedNum == 0) { expectedOutput[10, 1] = 1; } else { expectedOutput[expectedNum, 1] = 1; } trainingExamples[r] = new TrainingExample(input, expectedOutput); /*if ((expectedNum == 2 || expectedNum == 6)) { trainingExamples.Add(new TrainingExample(input, expectedOutput)); }*/ } Console.WriteLine("Training examples populated."); return trainingExamples.ToArray(); } catch (Exception ex) { Console.WriteLine(ex.ToString()); return null; } }
public void TrainDataSet(TrainingExample[] examples) { Random rand = new Random(); examples = examples.OrderBy(x => rand.Next()).ToArray(); for (int i = 0; i < examples.Length; ++i) { TrainIteration(examples[i].input, examples[i].expectedOutput, 1 - (learningRate * regParameter / examples.Length)); //If there are over 5000 examples, provide status updates in console if (examples.Length >= 5000) { if(i == 0) Console.WriteLine("\n"); if (i % 1000 == 0 && i != 0) { Console.WriteLine(i + "/" + examples.Length + " objects trained. Epoch " + Math.Round((double)i/examples.Length, 3) * 100 + "% complete."); } } } Console.WriteLine(examples.Length + "/" + examples.Length + " objects trained. Epoch 100% complete."); Console.WriteLine("\n"); }