Example #1
0
        public void UpdateTrainingQuery()
        {
            CloudPakForDataAuthenticator authenticator = new CloudPakForDataAuthenticator(
                url: "https://{cpd_cluster_host}{:port}",
                username: "******",
                password: "******"
                );

            DiscoveryService service = new DiscoveryService("2019-11-22", authenticator);

            service.SetServiceUrl("{https://{cpd_cluster_host}{:port}/discovery/{release}/instances/{instance_id}/api}");

            var             newFilter          = "field:1";
            TrainingExample newTrainingExample = new TrainingExample()
            {
                CollectionId = "{collection_id}",
                DocumentId   = "{document_id}"
            };

            var result = service.UpdateTrainingQuery(
                projectId: "{project_id}",
                queryId: "{query_id}",
                naturalLanguageQuery: "This is a new example of a query",
                examples: new List <TrainingExample>()
            {
                newTrainingExample
            },
                filter: newFilter
                );
        }
Example #2
0
        public IEnumerator TestUpdateTrainingQuery()
        {
            Log.Debug("DiscoveryServiceV2IntegrationTests", "Attempting to UpdateTrainingQuery...");
            TrainingExample trainingExample = new TrainingExample()
            {
                CollectionId = collectionId,
                DocumentId   = documentId
            };
            TrainingQuery trainingQueryResponse = null;

            service.UpdateTrainingQuery(
                callback: (DetailedResponse <TrainingQuery> response, IBMError error) =>
            {
                Log.Debug("DiscoveryServiceV2IntegrationTests", "UpdateTrainingQuery result: {0}", response.Response);
                trainingQueryResponse = response.Result;
                queryId = trainingQueryResponse.QueryId;
                Assert.IsNotNull(trainingQueryResponse);
                Assert.IsNull(error);
            },
                projectId: projectId,
                queryId: queryId,
                examples: new List <TrainingExample>()
            {
                trainingExample
            },
                filter: "entities.text:IBM",
                naturalLanguageQuery: "This is a new example of a query"
                );

            while (trainingQueryResponse == null)
            {
                yield return(null);
            }
        }
Example #3
0
        public void CreateTrainingQuery()
        {
            CloudPakForDataAuthenticator authenticator = new CloudPakForDataAuthenticator(
                url: "https://{cpd_cluster_host}{:port}",
                username: "******",
                password: "******"
                );

            TrainingExample trainingExample = new TrainingExample()
            {
                CollectionId = "{collection_id}",
                DocumentId   = "{document_id}"
            };

            DiscoveryService service = new DiscoveryService("2019-11-22", authenticator);

            service.SetServiceUrl("{https://{cpd_cluster_host}{:port}/discovery/{release}/instances/{instance_id}/api}");

            var result = service.CreateTrainingQuery(
                projectId: "{project_id}",
                examples: new List <TrainingExample>()
            {
                trainingExample
            },
                naturalLanguageQuery: "This is an example of a query"
                );

            Console.WriteLine(result.Response);
        }
Example #4
0
        public void Cost_With_Testdata()
        {
            TrainingExample[] ex0 = new TrainingExample[] {
                new TrainingExample(new double[] { }, new double[] { })
            };
            double expected0 = 0;

            TrainingExample[] ex1 = new TrainingExample[] {
                new TrainingExample(new double[] { 4, 3, 5 }, new double[] { 1, 2, 3 })
            };
            double expected1 = Math.Pow(1 - 4 / 2d, 2) + Math.Pow(2 - 3 / 2d, 2) + Math.Pow(3 - 5 / 2d, 2);

            TrainingExample[] ex2 = new TrainingExample[] {
                new TrainingExample(new double[] { 4, 3, 5, 3 }, new double[] { 3, 4, 4, 5 }),
                new TrainingExample(new double[] { 2, 3, 4, 2 }, new double[] { 8, 4, 6, 4 }),
                new TrainingExample(new double[] { 1, 5, 2, 3 }, new double[] { 5, 4, 7, 4 })
            };
            double expected2 =
                (Math.Pow(3 - 4 / 2d, 2) + Math.Pow(4 - 3 / 2d, 2) + Math.Pow(4 - 5 / 2d, 2) + Math.Pow(5 - 3 / 2d, 2)) +
                (Math.Pow(8 - 2 / 2d, 2) + Math.Pow(4 - 3 / 2d, 2) + Math.Pow(6 - 4 / 2d, 2) + Math.Pow(4 - 2 / 2d, 2)) +
                (Math.Pow(5 - 1 / 2d, 2) + Math.Pow(4 - 5 / 2d, 2) + Math.Pow(7 - 2 / 2d, 2) + Math.Pow(4 - 3 / 2d, 2));

            TrainingExample[] ex3 = new TrainingExample[] {
                new TrainingExample(new double[] { 9.234 }, new double[] { 4.5443 })
            };
            double expected3 = Math.Pow(4.5443 - 9.234 / 2d, 2);

            Assert.Equal(expected0, Cost(ex0, xs => xs.Select(x => x / 2d).ToArray()));
            Assert.Equal(expected1, Cost(ex1, xs => xs.Select(x => x / 2d).ToArray()));
            Assert.Equal(expected2, Cost(ex2, xs => xs.Select(x => x / 2d).ToArray()));
            Assert.Equal(expected3, Cost(ex3, xs => xs.Select(x => x / 2d).ToArray()));
        }
Example #5
0
        public IEnumerator TestGetTrainingQuery()
        {
            Log.Debug("DiscoveryServiceV2IntegrationTests", "Attempting to GetTrainingQuery...");
            TrainingExample trainingExample = new TrainingExample()
            {
                CollectionId = collectionId,
                DocumentId   = documentId
            };
            TrainingQuery trainingQueryResponse = null;

            service.GetTrainingQuery(
                callback: (DetailedResponse <TrainingQuery> response, IBMError error) =>
            {
                Log.Debug("DiscoveryServiceV2IntegrationTests", "GetTrainingQuery result: {0}", response.Response);
                trainingQueryResponse = response.Result;
                Assert.IsNotNull(trainingQueryResponse);
                Assert.IsNull(error);
            },
                projectId: projectId,
                queryId: queryId
                );

            while (trainingQueryResponse == null)
            {
                yield return(null);
            }
        }
Example #6
0
        /// <summary>
        /// One hot encodes the REBER strings
        /// </summary>
        /// <param name="strList">A list of REBER sequences</param>
        /// <returns>A sequence of tuples of { input, output }</returns>
        public static IEnumerable <TrainingExample[]> GetOneHot(IEnumerable <string> strList)
        {
            // build the following item table
            HashSet <int> temp;
            var           following = new Dictionary <string, HashSet <int> >();

            foreach (var str in strList)
            {
                var    sb   = new StringBuilder();
                string prev = null;
                foreach (var ch in str)
                {
                    sb.Append(ch);
                    var key = sb.ToString();
                    if (prev != null)
                    {
                        if (!following.TryGetValue(prev, out temp))
                        {
                            following.Add(prev, temp = new HashSet <int>());
                        }
                        temp.Add(_ch[ch]);
                    }
                    prev = key;
                }
            }

            var ret = new List <TrainingExample[]>();

            foreach (var str in strList)
            {
                var sequence = new TrainingExample[str.Length];
                var sb       = new StringBuilder();
                for (var i = 0; i < str.Length; i++)
                {
                    var ch = str[i];
                    sb.Append(ch);
                    var input  = new float[_ch.Count];
                    var output = new float[_ch.Count];
                    input[_ch[ch]] = 1f;
                    if (following.ContainsKey(sb.ToString()))
                    {
                        foreach (var item in following[sb.ToString()])
                        {
                            output[item] = 1f;
                        }
                    }
                    sequence[i] = new TrainingExample(input, output);
                }
                ret.Add(sequence);
            }
            return(ret);
        }
Example #7
0
        public void CreateTrainingExample()
        {
            Console.WriteLine(string.Format("\nCalling CreateTrainingExample()..."));

            var trainingExample = new TrainingExample()
            {
                DocumentId = _createdDocumentId,
                Relevance  = 1
            };

            var result = _discovery.CreateTrainingExample(_existingEnvironmentId, _createdCollectionId, _createdTrainingQueryId, trainingExample);

            if (result != null)
            {
                Console.WriteLine(JsonConvert.SerializeObject(result, Formatting.Indented));
                _createdTrainingExampleId = result.DocumentId;
            }
            else
            {
                Console.WriteLine("result is null.");
            }
        }
Example #8
0
    public void AddTrainingExample()
    {
        TrainingExample newExample = new TrainingExample();

        newExample.input = new double[3 * inputs.Length];

        for (int i = 0; i < inputs.Length; i++)
        {
            newExample.input[3 * i]     = inputs[i].position.x;
            newExample.input[3 * i + 1] = inputs[i].position.y;
            newExample.input[3 * i + 2] = inputs[i].position.z;
        }

        newExample.output = new double[outputs.Length];
        for (int i = 0; i < outputs.Length; i++)
        {
            newExample.output[i] = outputs[i];
        }

        //Array.Resize<TrainingExample>(ref trainingExamples, trainingExamples.Length + 1);
        //trainingExamples[trainingExamples.Length - 1] = newExample;
        trainingExamples.Add(newExample);
    }
 public static void DecimalBinaryExample()
 {
     int[] layers = new int[] { 2, 5, 4 };
     FeedforwardNeuralNetwork fnn = new FeedforwardNeuralNetwork(layers, 1.0F, 0.1F);
     DecimalBinaryTestNetwork(fnn);
     Matrix[] expectedOutputs = { new Matrix(new float[4,1]{ {1},
                                                             {0},
                                                             {0},
                                                             {0}}),
                                  new Matrix(new float[4,1]{ {0},
                                                             {1},
                                                             {0},
                                                             {0}}),
                                  new Matrix(new float[4,1]{ {0},
                                                             {0},
                                                             {1},
                                                             {0}}),
                                  new Matrix(new float[4,1]{ {0},
                                                             {0},
                                                             {0},
                                                             {1}})};
     Matrix[] inputs =          { new Matrix(new float[2,1]{ {0},
                                                             {0}}),
                                  new Matrix(new float[2,1]{ {0},
                                                             {1}}),
                                  new Matrix(new float[2,1]{ {1},
                                                             {0}}),
                                  new Matrix(new float[2,1]{ {1},
                                                             {1}})};
     TrainingExample[] examples = new TrainingExample[4];
     for (int i = 0; i < 4; ++i) {
         examples[i] = new TrainingExample(inputs[i], expectedOutputs[i]);
     }
     fnn.TrainEpochs(examples, 1000);
     DecimalBinaryTestNetwork(fnn);
 }
        public static void LogicGatesExample()
        {
            //First layer is input layer
            //Initialize a 2-layer ANN
            //Two inputs, one output
            int[] layers = new int[] { 2, 3, 1 };
            FeedforwardNeuralNetwork fnn = new FeedforwardNeuralNetwork(layers, 1.0F, 0.1F);
            //Train this many cycles
            int numTrainingEpochs = 10000;
            TrainingExample ex1 = new TrainingExample(new Matrix(new float[,]{ { 0 },
                                                                               { 0 } }),
                                                      new Matrix(new float[,]{ { 0 } }));

            TrainingExample ex2 = new TrainingExample(new Matrix(new float[,]{ { 0 },
                                                                               { 1 } }),
                                                      new Matrix(new float[,]{ { 1 } }));

            TrainingExample ex3 = new TrainingExample(new Matrix(new float[,]{ { 1 },
                                                                               { 0 } }),
                                                      new Matrix(new float[,]{ { 1 } }));

            TrainingExample ex4 = new TrainingExample(new Matrix(new float[,]{ { 1 },
                                                                               { 1 } }),
                                                      new Matrix(new float[,]{ { 0 } }));

            TrainingExample[] trainingExamples = { ex1, ex2, ex3, ex4 };
            for (int i = 0; i < numTrainingEpochs; ++i) {
                //Sets input matrices and output matrices and trains the network accordingly for all combinations
                Random rand = new Random();
                trainingExamples = trainingExamples.OrderBy(x => rand.Next()).ToArray();
                for (int j = 0; j < 4; ++j) {
                    fnn.TrainIteration(trainingExamples[j].input, trainingExamples[j].expectedOutput, 1 - (1.0F * 0.1F / 4));
                }
            }
            //After training, evaluates the network with respect to all possible inputs
            //Prints their outputs to the console to see the result of training
            float[,] inputArray = new float[,] { { 0 },
                                                 { 0 } };
            Matrix input = new Matrix(inputArray);
            Matrix output = fnn.Evaluate(input);
            Console.WriteLine(output[1, 1]);
            inputArray = new float[,] { { 0 },
                                        { 1 } };
            input = new Matrix(inputArray);
            output = fnn.Evaluate(input);
            Console.WriteLine(output[1, 1]);
            inputArray = new float[,] { { 1 },
                                        { 0 } };
            input = new Matrix(inputArray);
            output = fnn.Evaluate(input);
            Console.WriteLine(output[1, 1]);
            inputArray = new float[,] { { 1 },
                                        { 1 } };
            input = new Matrix(inputArray);
            output = fnn.Evaluate(input);
            Console.WriteLine(output[1, 1]);
        }
 private static void MNISTTrainNetwork(FeedforwardNeuralNetwork fnn, TrainingExample[] trainingExamples, TrainingExample[] testExamples)
 {
     Console.WriteLine("\nBeginning neural network training procedure.\n");
     Console.WriteLine("Enter desired number of epochs.");
     string epochsStr = Console.ReadLine();
     int epochs = Convert.ToInt32(epochsStr);
     do {
         Console.WriteLine("Please wait for training cycle to complete...");
         var timer = Stopwatch.StartNew();
         fnn.TrainEpochs(trainingExamples, epochs);
         Console.WriteLine("Training cycle completed in " + timer.Elapsed + ".");
         Console.WriteLine("Test network? Enter Y for yes or anything else for no.");
         string inputStr = Console.ReadLine();
         char input = Convert.ToChar(inputStr);
         if (input == 'Y') {
             MNISTTestNetwork(fnn, testExamples);
         }
         Console.WriteLine("Continue training? Enter 0 for no or a valid number of epochs for yes.");
         epochsStr = Console.ReadLine();
         epochs = Convert.ToInt32(epochsStr);
     } while (epochs > 0);
 }
 public void TrainEpochs(TrainingExample[] examples, int numEpochs)
 {
     Console.WriteLine("\n");
     for (int i = 0; i < numEpochs; ++i) {
         Console.WriteLine("Beginning epoch " + (i + 1) + " of " + numEpochs + ".");
         TrainDataSet(examples);
     }
 }
        public static void MNISTTestNetwork(FeedforwardNeuralNetwork fnn, TrainingExample[] testExamples)
        {
            try {
                Console.WriteLine("\nBeginning neural network test procedure.\n");
                int numSuccesses = 0;
                int numImages = testExamples.Length;
                int expectedNum = 0;
                for (int r = 0; r < numImages; ++r) {
                    for (int i = 1; i <= 10; ++i) {
                        if (testExamples[r].expectedOutput[i, 1] == 1) {
                            expectedNum = i % 10;
                        }
                    }
                    Matrix actualOutput = fnn.Evaluate(testExamples[r].input);
                    float largestOutputValue = 0;
                    int index = 0;
                    for (int j = 1; j <= 10; ++j) {
                        if (actualOutput[j, 1] > largestOutputValue) {
                            largestOutputValue = actualOutput[j, 1];
                            index = j;
                        }
                    }
                    index %= 10;
                    if (index == expectedNum) {
                        ++numSuccesses;
                    }

                    //Console.WriteLine("Test value: " + expectedNum);
                    //Console.WriteLine("Network detected: " + (index));

                    if ((r + 1) % 1000 == 0) {
                        Console.WriteLine((r + 1) / 100 + "00 test images processed.  Current success percentage: " + (float)numSuccesses/(r+1) * 100 + "%");
                    }
                }
                Console.WriteLine("\nTest regimen completed.  Network was correct for " + (float)numSuccesses / numImages * 100 + "% of " + numImages + " images.\n");

            } catch (Exception ex) {
                Console.WriteLine(ex.ToString());
            }
        }
Example #14
0
        public TrainingExample CreateTrainingExample(string environmentId, string collectionId, string queryId, TrainingExample body)
        {
            try
            {
                var result = DiscoveryRepository.CreateTrainingExample(environmentId, collectionId, queryId, body);

                return(result);
            }
            catch (Exception ex)
            {
                Logger.Error("DiscoveryService.CreateTrainingExample failed", this, ex);
            }

            return(null);
        }
        //[UnityTest, Order(103)]
        public IEnumerator TestDeleteTrainingQuery()
        {
            Log.Debug("DiscoveryServiceV2IntegrationTests", "Attempting to DeleteTrainingQuery...");

            DocumentAccepted addDocumentResponse = null;
            string           documentId          = "";
            string           queryId             = "";

            using (FileStream fs = File.OpenRead(addDocumentFile))
            {
                using (MemoryStream ms = new MemoryStream())
                {
                    fs.CopyTo(ms);
                    service.AddDocument(
                        callback: (DetailedResponse <DocumentAccepted> response, IBMError error) =>
                    {
                        Log.Debug("DiscoveryServiceV1IntegrationTests", "AddDocument result: {0}", response.Response);
                        addDocumentResponse = response.Result;
                        documentId          = addDocumentResponse.DocumentId;
                        Assert.IsNotNull(addDocumentResponse);
                        Assert.IsNotNull(documentId);
                        Assert.IsNull(error);
                    },
                        projectId: projectId,
                        collectionId: collectionId,
                        file: ms,
                        fileContentType: Utility.GetMimeType(Path.GetExtension(addDocumentFile)),
                        filename: Path.GetFileName(addDocumentFile)
                        );

                    while (addDocumentResponse == null)
                    {
                        yield return(null);
                    }
                }
            }

            TrainingExample trainingExample = new TrainingExample()
            {
                CollectionId = collectionId,
                DocumentId   = documentId,
                Relevance    = 1L
            };
            TrainingQuery trainingQueryResponse = null;

            service.CreateTrainingQuery(
                callback: (DetailedResponse <TrainingQuery> response, IBMError error) =>
            {
                Log.Debug("DiscoveryServiceV2IntegrationTests", "CreateTrainingQuery result: {0}", response.Response);
                trainingQueryResponse = response.Result;
                queryId = trainingQueryResponse.QueryId;
                Assert.IsNotNull(trainingQueryResponse);
                Assert.IsNull(error);
            },
                projectId: projectId,
                examples: new List <TrainingExample>()
            {
                trainingExample
            },
                filter: "entities.text:IBM",
                naturalLanguageQuery: "This is an example of a query"
                );

            while (trainingQueryResponse == null)
            {
                yield return(null);
            }

            bool deleteTrainingQueryResponse = false;

            service.DeleteTrainingQuery(
                callback: (DetailedResponse <object> response, IBMError error) =>
            {
                Assert.IsNull(error);
                deleteTrainingQueryResponse = true;
            },
                projectId: projectId,
                queryId: queryId
                );

            while (!deleteTrainingQueryResponse)
            {
                yield return(null);
            }

            DeleteDocumentResponse deleteDocumentResponse = null;

            service.DeleteDocument(
                callback: (DetailedResponse <DeleteDocumentResponse> response, IBMError error) =>
            {
                Log.Debug("DiscoveryServiceV2IntegrationTests", "DeleteDocument result: {0}", response.Response);
                deleteDocumentResponse = response.Result;
                Assert.IsNotNull(deleteDocumentResponse);
                Assert.IsNull(error);
            },
                projectId: projectId,
                collectionId: collectionId,
                documentId: documentId
                );

            while (deleteDocumentResponse == null)
            {
                yield return(null);
            }
        }
Example #16
0
        /// <summary>
        /// Choosing the substitutions for target words. This means we rank the candidates according to several features (similarity with target word, information content reduction, similarity with context words, ...)
        /// </summary>
        /// <param name="document">The document whose text is to be simplified</param>
        /// <param name="substCandidates">All the pairs of target words and collected candidate replacements</param>
        /// <param name="contextSize">The size of the context of the target word to be considered for measuring the similarity between candidate words and target word context</param>
        /// <param name="noSubstitutionWords">Stopwords, never to be considered for simplification</param>
        /// <param name="similarityTreshold">The treshold for semantic similarity between target word and candidate replacement</param>
        /// <param name="icReplacementCandidateTreshold">Information content treshold for replacing the target word</param>
        /// <param name="word"></param>
        /// <returns>The list of substitutions (tuple of target token and candidate replacement word)</returns>
        public List <Tuple <TokenAnnotation, string> > GetSubstitutions(Document document, List <Tuple <TokenAnnotation, List <Tuple <string, double> > > > substCandidates, int contextSize, List <string> noSubstitutionWords, double similarityTreshold, double icReplacementCandidateTreshold, string word = null)
        {
            EngMorphology morph = new EngMorphology();
            List <Tuple <TokenAnnotation, string> > substitutions = new List <Tuple <TokenAnnotation, string> >();
            List <string> metrics = new List <string> {
                "sim", "ic-diff", "context-sim", "lm-bigram-pre", "lm-bigram-post", "lm-trigram-pre", "lm-trigram-post"
            };

            //List<string> metrics = new List<string> { "sim", "lm-bigram-pre", "lm-bigram-post", /*"ic-diff", "lm-trigram-pre", "lm-trigram-post"*/ };

            substCandidates.ForEach(sc => {
                if (!noSubstitutionWords.Contains(sc.Item1.Text.ToLower()))
                {
                    if (word == null || sc.Item1.Text.ToLower() == word)
                    {
                        Dictionary <string, Dictionary <string, double> > scores = new Dictionary <string, Dictionary <string, double> >();

                        var targetToken     = sc.Item1;
                        var sentence        = document.Sentences.Where(s => s.Tokens.Any(t => t.StartPosition == targetToken.StartPosition && t.Text == targetToken.Text)).Single();
                        var targetTokenCopy = sentence.Tokens.Where(t => t.StartPosition == targetToken.StartPosition && t.Text == targetToken.Text).Single();

                        var preceedingSentencePart = sentence.Text.Substring(0, targetTokenCopy.StartPositionSentence);
                        var followingSentencePart  = sentence.Text.Substring(targetTokenCopy.StartPositionSentence + targetTokenCopy.Text.Length);

                        var targetLemmaIC = InformationContent.GetRelativeInformationContent(targetToken.Lemma.ToLower());
                        var targetWordIC  = InformationContent.GetRelativeInformationContent(targetToken.Text.ToLower());

                        var targetContextTokens = sentence.Tokens.Where(t => Math.Abs(sentence.Tokens.IndexOf(t) - targetTokenCopy.SentenceIndex) > 0 && Math.Abs(sentence.Tokens.IndexOf(t) - targetTokenCopy.SentenceIndex) <= contextSize && t.IsContent()).ToList();

                        var targetCtxtSimilarities  = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), targetToken.Lemma.ToLower())).Where(x => x >= -1).ToList();
                        var targetContextSimilarity = targetCtxtSimilarities.Count > 0 ? targetCtxtSimilarities.Average() : 0;


                        if (sc.Item2 != null)
                        {
                            sc.Item2.ForEach(candidate =>
                            {
                                try
                                {
                                    var candidateLemmaIC = InformationContent.GetRelativeInformationContent(candidate.Item1.ToLower());
                                    string key           = candidate.Item1 + "<->" + targetToken.POSTag;

                                    //var candidateInPOS = EngMorphology.GetForm(candidate.Item1, targetToken.POSTag);
                                    //if (!CandidateInPoSLookup.ContainsKey(key)) CandidateInPoSLookup.Add(key, candidateInPOS);

                                    var candidateInPOS  = CandidateInPoSLookup.ContainsKey(key) ? CandidateInPoSLookup[key] : candidate.Item1;
                                    var candidateWordIC = !string.IsNullOrEmpty(candidateInPOS) ? InformationContent.GetRelativeInformationContent(candidateInPOS.ToLower()) : 1;

                                    var candidateIC = candidateWordIC == 1 ? candidateLemmaIC : candidateWordIC;
                                    var targetIC    = targetWordIC == 1 ? targetLemmaIC : targetWordIC;

                                    if (!string.IsNullOrEmpty(candidateInPOS) && targetLemmaIC > icReplacementCandidateTreshold && (candidateIC < targetIC /*|| Math.Abs(targetIC - candidateIC) < 0.05*/))
                                    {
                                        var artificialSentence = preceedingSentencePart + candidateInPOS + followingSentencePart;
                                        var artTokens          = (new EngPOSTagger()).Annotate(artificialSentence).Select(x => (TokenAnnotation)x).ToList();
                                        morph.AnnotateMorphology(artTokens);
                                        var candidateToken = artTokens.Where(x => x.StartPositionSentence == targetTokenCopy.StartPositionSentence /*&& x.Text == candidateInPOS*/).Single();
                                        var candidateContextSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), candidateToken.Lemma.ToLower())).Where(x => x >= -1).ToList();

                                        var candidateContextSimilarity = candidateContextSimilarities.Count > 0 ? candidateContextSimilarities.Average() : targetContextSimilarity;
                                        // POS-tag compatibility is a second prerequisite

                                        bool sameWord = candidate.Item1.Contains(targetToken.Text) || targetToken.Text.Contains(candidate.Item1) ||
                                                        candidate.Item1.Contains(targetToken.Lemma) || targetToken.Lemma.Contains(candidate.Item1) ||
                                                        candidateInPOS.Contains(targetToken.Text) || targetToken.Text.Contains(candidateInPOS) ||
                                                        candidateInPOS.Contains(targetToken.Lemma) || targetToken.Lemma.Contains(candidateInPOS);

                                        bool sameAsContext = targetContextTokens.Any(ct =>
                                                                                     candidate.Item1.Contains(ct.Text) || ct.Text.Contains(candidate.Item1) ||
                                                                                     candidate.Item1.Contains(ct.Lemma) || ct.Lemma.Contains(candidate.Item1) ||
                                                                                     candidateInPOS.Contains(ct.Text) || ct.Text.Contains(candidateInPOS) ||
                                                                                     candidateInPOS.Contains(ct.Lemma) || ct.Lemma.Contains(candidateInPOS));

                                        if (candidate.Item2 >= similarityTreshold && (candidateToken.POSTag == targetToken.POSTag) && !sameWord && !sameAsContext)
                                        {
                                            if (!scores.ContainsKey(candidateInPOS))
                                            {
                                                scores.Add(candidateInPOS, new Dictionary <string, double>());
                                                scores[candidateInPOS].Add("sim", candidate.Item2);
                                                scores[candidateInPOS].Add("ic-diff", targetIC - candidateIC);
                                                scores[candidateInPOS].Add("context-sim", candidateContextSimilarity);
                                                scores[candidateInPOS].Add("length", candidateInPOS.Length);

                                                var tokenIndex = sentence.Tokens.IndexOf(targetTokenCopy);

                                                // bigram LM
                                                if (tokenIndex > 0)
                                                {
                                                    var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 1].Text.ToLower(), candidateInPOS);
                                                    scores[candidateInPOS].Add("lm-bigram-pre", lmScore.HasValue ? lmScore.Value : -100);
                                                }
                                                else
                                                {
                                                    scores[candidateInPOS].Add("lm-bigram-pre", 0);
                                                }

                                                if (tokenIndex < sentence.Tokens.Count - 1)
                                                {
                                                    var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(candidateInPOS, sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 1].Text.ToLower());
                                                    scores[candidateInPOS].Add("lm-bigram-post", lmScore.HasValue ? lmScore.Value : -100);
                                                }
                                                else
                                                {
                                                    scores[candidateInPOS].Add("lm-bigram-post", 0);
                                                }

                                                // trigram LM
                                                if (tokenIndex > 1)
                                                {
                                                    var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 2].Text.ToLower(), sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) - 1].Text.ToLower(), candidateInPOS);
                                                    scores[candidateInPOS].Add("lm-trigram-pre", lmScore.HasValue ? lmScore.Value : -100);
                                                }
                                                else
                                                {
                                                    scores[candidateInPOS].Add("lm-trigram-pre", 0);
                                                }

                                                if (tokenIndex < sentence.Tokens.Count - 2)
                                                {
                                                    var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(candidateInPOS, sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 1].Text.ToLower(), sentence.Tokens[sentence.Tokens.IndexOf(targetTokenCopy) + 2].Text.ToLower());
                                                    scores[candidateInPOS].Add("lm-trigram-post", lmScore.HasValue ? lmScore.Value : -100);
                                                }
                                                else
                                                {
                                                    scores[candidateInPOS].Add("lm-trigram-post", 0);
                                                }
                                            }
                                        }
                                    }
                                }
                                catch { }
                            });
                        }

                        LastSubstitutionCandidates = new List <Tuple <TokenAnnotation, List <string> > >();
                        LastSubstitutionCandidates.Add(new Tuple <TokenAnnotation, List <string> >(targetToken, scores.Select(x => x.Key).ToList()));

                        if (scores.Count > 0)
                        {
                            var allRanks = new List <Dictionary <string, int> >();
                            metrics.ForEach(m => {
                                var featDict = scores.ToDictionary(x => x.Key, x => x.Value[m]);
                                allRanks.Add(TrainingExample.RankExamplesByNumericFeature(featDict, m == "length"));
                            });

                            var allCandidates = scores.Select(x => x.Key).ToList();
                            Dictionary <string, double> averageRankings = allCandidates.ToDictionary(x => x, x => allRanks.Select(r => r[x]).Average());

                            var finalRanking = averageRankings.OrderBy(r => r.Value).ToList();
                            double topScore  = finalRanking[0].Value;
                            var equal        = new List <string>();
                            finalRanking.ForEach(fr => {
                                if (fr.Value == topScore)
                                {
                                    equal.Add(fr.Key);
                                }
                            });

                            var finalChoice = equal.Where(eq => equal.Where(eq2 => eq2 != eq).All(eq2 => scores[eq]["sim"] >= scores[eq2]["sim"])).First();
                            substitutions.Add(new Tuple <TokenAnnotation, string>(targetToken, finalChoice));
                        }
                    }
                }
            });

            return(substitutions);
        }
Example #17
0
        /// <summary>
        /// Oredring the candidates for the ranking task, when the candidates are given. The ordering is somewhat different
        /// </summary>
        /// <param name="document">The document whose text is to be simplified</param>
        /// <param name="substitutionCandidates">Substitution candidates</param>
        /// <param name="target">Target word</param>
        /// <param name="contextSize">The size of the context of the target word to be compared semantically with candidate replacements</param>
        /// <returns></returns>
        public List <string> OrderGivenSubstitutionCandidates(Document document, List <string> substitutionCandidates, string target, int contextSize)
        {
            EngMorphology morph = new EngMorphology();
            List <Tuple <TokenAnnotation, string> > substitutions = new List <Tuple <TokenAnnotation, string> >();
            //List<string> metrics = new List<string> { "sim", "ic-diff", "context-sim", "length", "lm-bigram-pre", "lm-bigram-post", "lm-trigram-pre", "lm-trigram-post" };
            List <string> metrics = new List <string> {
                "context-sim" /*, "lm-bigram-pre", "lm-bigram-post"*/, "ic-diff"                                      /*, "lm-trigram-pre", "lm-trigram-post"*/
            };
            //List<string> metrics = new List<string> { "ic-diff" };
            Dictionary <string, string> candidateChanges = new Dictionary <string, string>();

            var targetToken = document.AllTokens.Where(t => t.Text == target).Last();

            var preceedingSentencePart = document.Text.Substring(0, targetToken.StartPositionSentence);
            var followingSentencePart  = document.Text.Substring(targetToken.StartPositionSentence + targetToken.Text.Length);

            var targetLemmaIC = InformationContent.GetRelativeInformationContent(targetToken.Lemma.ToLower());
            var targetWordIC  = InformationContent.GetRelativeInformationContent(targetToken.Text.ToLower());

            var targetContextTokens = document.AllTokens.Where(t => Math.Abs(document.AllTokens.IndexOf(t) - targetToken.SentenceIndex) > 0 && Math.Abs(document.AllTokens.IndexOf(t) - targetToken.SentenceIndex) <= contextSize && t.IsContent()).ToList();

            Dictionary <string, Dictionary <string, double> > scores = new Dictionary <string, Dictionary <string, double> >();

            substitutionCandidates.ForEach(candidate => {
                var candidateText = candidate;
                if (candidateText.Contains(","))
                {
                    var splitCand   = candidateText.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries).ToList();
                    string selected = string.Empty;
                    double maxIC    = double.MinValue;
                    splitCand.ForEach(sc =>
                    {
                        var scic = InformationContent.GetRelativeInformationContent(sc.Trim().ToLower());
                        if (scic > maxIC)
                        {
                            selected = sc.Trim();
                            maxIC    = scic;
                        }
                    });
                    candidateText = selected;
                }
                if (candidateText.Trim().Contains(" "))
                {
                    var tokens        = (new EngPOSTagger()).Annotate(candidateText.Trim()).ToList();
                    var contentTokens = tokens.Where(x => ((TokenAnnotation)x).IsContent()).ToList();

                    var change = contentTokens.Count > 0 ? ((TokenAnnotation)(contentTokens.Last())).Text.Trim() : ((TokenAnnotation)(tokens.First())).Text.Trim();

                    candidateText = change;
                }

                var candidateLemmaIC = InformationContent.GetRelativeInformationContent(candidateText.ToLower());
                var candidateWordIC  = !string.IsNullOrEmpty(candidateText) ? InformationContent.GetRelativeInformationContent(candidateText.ToLower()) : 1;

                var candidateIC = candidateWordIC == 1 ? candidateLemmaIC : candidateWordIC;
                var targetIC    = targetWordIC == 1 ? targetLemmaIC : targetWordIC;

                var candidateContextSimilarities = targetContextTokens.Select(x => VectorSpace.Similarity(x.Lemma.ToLower(), candidateText.ToLower())).Where(x => x >= -1).ToList();
                var candidateContextSimilarity   = candidateContextSimilarities.Count > 0 ? candidateContextSimilarities.Average() : 0;

                scores.Add(candidate, new Dictionary <string, double>());
                var sim = VectorSpace.Similarity(targetToken.Text.ToLower().Trim(), candidateText.ToLower().Trim());
                if (sim < 1)
                {
                    scores[candidate].Add("sim", sim);
                }
                scores[candidate].Add("ic-diff", candidateIC);
                scores[candidate].Add("context-sim", candidateContextSimilarity);
                scores[candidate].Add("length", candidateText.Length);

                var tokenIndex = document.AllTokens.IndexOf(targetToken);

                // bigram LM
                if (tokenIndex > 0)
                {
                    var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(document.AllTokens[document.AllTokens.IndexOf(targetToken) - 1].Text.ToLower(), candidateText);
                    scores[candidate].Add("lm-bigram-pre", lmScore.HasValue ? lmScore.Value : -100);
                }
                else
                {
                    scores[candidate].Add("lm-bigram-pre", 0);
                }

                if (tokenIndex < document.AllTokens.Count - 1)
                {
                    var lmScore = EnglishLanguageModel.Instance.GetBigramLMScore(candidateText, document.AllTokens[document.AllTokens.IndexOf(targetToken) + 1].Text.ToLower());
                    scores[candidate].Add("lm-bigram-post", lmScore.HasValue ? lmScore.Value : -100);
                }
                else
                {
                    scores[candidate].Add("lm-bigram-pre", 0);
                }

                // trigram LM
                if (tokenIndex > 1)
                {
                    var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(document.AllTokens[document.AllTokens.IndexOf(targetToken) - 2].Text.ToLower(), document.AllTokens[document.AllTokens.IndexOf(targetToken) - 1].Text.ToLower(), candidateText);
                    scores[candidate].Add("lm-trigram-pre", lmScore.HasValue ? lmScore.Value : -100);
                }
                else
                {
                    scores[candidate].Add("lm-trigram-pre", 0);
                }

                if (tokenIndex < document.AllTokens.Count - 2)
                {
                    var lmScore = EnglishLanguageModel.Instance.GetTrigramLMScore(candidateText, document.AllTokens[document.AllTokens.IndexOf(targetToken) + 1].Text.ToLower(), document.AllTokens[document.AllTokens.IndexOf(targetToken) + 2].Text.ToLower());
                    scores[candidate].Add("lm-trigram-post", lmScore.HasValue ? lmScore.Value : -100);
                }
                else
                {
                    scores[candidate].Add("lm-trigram-post", 0);
                }
            });

            LastSubstitutionCandidates = new List <Tuple <TokenAnnotation, List <string> > >();
            LastSubstitutionCandidates.Add(new Tuple <TokenAnnotation, List <string> >(targetToken, scores.Select(x => x.Key).ToList()));

            var allRanks = new List <Dictionary <string, int> >();

            metrics.ForEach(m =>
            {
                var featDict = scores.Where(x => x.Value.ContainsKey(m)).ToDictionary(x => x.Key, x => x.Value[m]);
                allRanks.Add(TrainingExample.RankExamplesByNumericFeature(featDict, m == "length" || m == "ic-diff"));
            });

            var allCandidates = scores.Select(x => x.Key).ToList();
            Dictionary <string, double> averageRankings = allCandidates.ToDictionary(x => x, x => allRanks.Where(y => y.ContainsKey(x)).Select(r => r[x]).Average());

            return(averageRankings.OrderBy(r => r.Value).Select(x => x.Key).ToList());
        }
        private static void displayImage(TrainingExample example)
        {
            for (int i = 1; i <= 784; ++i) {
                if (i - 1 % 28 == 0) {
                    Console.Write("\n");
                }
                Console.Write(example.input[i, 1] + ", ");
            }
            Console.Write("\n");
            string s = "";
            for (int i = 1; i <= 784; ++i) {

                if (example.input[i, 1] == 0) {
                    s += " ";
                } else if (example.input[i, 1] < 0.5F) {
                    s += ".";
                } else if (example.input[i, 1] <= 1.0F) {
                    s += "O";
                }
                if (i % 28 == 0) {
                    s += "\n";
                }
            }
            Console.WriteLine(s);
            Console.WriteLine("\n\n" + example.expectedOutput[1, 1] + " " + example.expectedOutput[2, 1] + " " + example.expectedOutput[3, 1] + " " + example.expectedOutput[4, 1] + " " + example.expectedOutput[5, 1] + " " + example.expectedOutput[6, 1] + " " + example.expectedOutput[7, 1] + " " + example.expectedOutput[8, 1] + " " + example.expectedOutput[9, 1] + " " + example.expectedOutput[10, 1] + " ");
        }
 private static TrainingExample[] GetTrainingExamples()
 {
     try {
         Console.WriteLine("Searching for training datasets.");
         FileStream labelsStream = new FileStream(@"E:\Users\Alexander Weaver\My Documents\Programs\MNIST\train-labels.idx1-ubyte", FileMode.Open);
         FileStream imagesStream = new FileStream(@"E:\Users\Alexander Weaver\My Documents\Programs\MNIST\train-images.idx3-ubyte", FileMode.Open);
         Console.WriteLine("Training datasets found.");
         BinaryReader labelsReader = new BinaryReader(labelsStream);
         BinaryReader imagesReader = new BinaryReader(imagesStream);
         int magic1 = imagesReader.ReadInt32();
         int numImages = (imagesReader.ReadByte() << 24) | (imagesReader.ReadByte() << 16) | (imagesReader.ReadByte() << 8) | (imagesReader.ReadByte());
         int numRows = imagesReader.ReadInt32();
         int numColumns = imagesReader.ReadInt32();
         int magic2 = labelsReader.ReadInt32();
         int numLabels = labelsReader.ReadInt32();
         Console.WriteLine("Popluating training examples.");
         TrainingExample[] trainingExamples = new TrainingExample[numImages];
         //List<TrainingExample> trainingExamples = new List<TrainingExample>();
         for (int r = 0; r < numImages; ++r) {
             Matrix input = new Matrix(784, 1);
             Matrix expectedOutput = new Matrix(10, 1);
             for (int i = 1; i <= 784; ++i) {
                 byte b = imagesReader.ReadByte();
                 input[i, 1] = (float)b / (float)256;
             }
             int expectedNum = labelsReader.ReadByte();
             if (expectedNum == 0) {
                 expectedOutput[10, 1] = 1;
             } else {
                 expectedOutput[expectedNum, 1] = 1;
             }
             trainingExamples[r] = new TrainingExample(input, expectedOutput);
             /*if ((expectedNum == 2 || expectedNum == 6)) {
                 trainingExamples.Add(new TrainingExample(input, expectedOutput));
             }*/
         }
         Console.WriteLine("Training examples populated.");
         return trainingExamples.ToArray();
     } catch (Exception ex) {
         Console.WriteLine(ex.ToString());
         return null;
     }
 }
        public void TrainDataSet(TrainingExample[] examples)
        {
            Random rand = new Random();
            examples = examples.OrderBy(x => rand.Next()).ToArray();
            for (int i = 0; i < examples.Length; ++i) {
                TrainIteration(examples[i].input, examples[i].expectedOutput, 1 - (learningRate * regParameter / examples.Length));
                //If there are over 5000 examples, provide status updates in console
                if (examples.Length >= 5000) {
                    if(i == 0) Console.WriteLine("\n");
                    if (i % 1000 == 0 && i != 0) {
                        Console.WriteLine(i + "/" + examples.Length + " objects trained. Epoch " + Math.Round((double)i/examples.Length, 3) * 100 + "% complete.");
                    }
                }

            }
            Console.WriteLine(examples.Length + "/" + examples.Length + " objects trained. Epoch 100% complete.");
            Console.WriteLine("\n");
        }