/// <summary>
        /// guesses a language based on the entered word. It will always have an answer, however it might be wrong sometimes.
        /// it compares all words in all languages with the word and computes the relative difference.
        /// </summary>
        /// <param name="referenceWord"></param>
        /// <returns></returns>
        public Language GuessLanguage(string referenceWord)
        {
            if (Languages == null || string.IsNullOrWhiteSpace(referenceWord))
            {
                return(null);
            }
            referenceWord = referenceWord.Trim().ToLower();

            double   mostMatchingSimilarity = 0;
            double   s = 0;
            Language mostMatchingLanguage = Languages[0];

            foreach (Language lan in Languages)
            {
                foreach (Word myWord in lan.Words)
                {
                    s = SimilarityCalculator.CalculateSimilarity(referenceWord, myWord.Notation);
                    if (s > mostMatchingSimilarity)
                    {
                        mostMatchingSimilarity = s;
                        mostMatchingLanguage   = lan;
                    }
                }
            }
            return(mostMatchingLanguage);
        }
Ejemplo n.º 2
0
        public void SimpleTest()
        {
            var algorithm  = new MinHashAlgorithm(100, 4, 4, 100, 50);
            var calculator = new SimilarityCalculator();

            var data1 = new int[1000];
            var data2 = new int[1100];

            for (var i = 0; i < data1.Length; i++)
            {
                data1[i] = 37 + i;
            }
            for (var i = 0; i < data2.Length; i++)
            {
                data2[i] = 113 + i;
            }

            var blocks1 = algorithm.CalculateBlocks(data1).ToList();
            var blocks2 = algorithm.CalculateBlocks(data2).ToList();

            for (var i = 0; i < blocks1.Count; i++)
            {
                for (int j = 0; j < blocks2.Count; j++)
                {
                    var similarity = calculator.Calculate(blocks1[i], blocks2[j]);
                    Console.Write((25 * similarity).ToString("00"));
                    Console.Write(" ");
                }

                Console.WriteLine();
            }
        }
Ejemplo n.º 3
0
    protected void OnCalculateButtonClicked(object sender, EventArgs e)
    {
        float sim;

        if (userRadioButton.Active)
        {
            var userSim    = new Similarity(int.Parse(id1Entry.Text), int.Parse(id2Entry.Text));
            var calculator = new SimilarityCalculator(reviewTargetForQueryEntry.Text);
            sim = userSim.Sim = calculator.CalculateForUser(userSim.Id1, userSim.Id2);

            // Save
        }
        else
        {
            var movieSim   = new Similarity(int.Parse(id1Entry.Text), int.Parse(id2Entry.Text));
            var calculator = new SimilarityCalculator(reviewTargetForQueryEntry.Text);
            sim = movieSim.Sim = calculator.CalculateForMovie(movieSim.Id1, movieSim.Id2);

            // Save
        }

        resultTextbox.Text = sim.ToString();
    }
Ejemplo n.º 4
0
        /// <summary>
        /// finds similar words of the same language
        /// </summary>
        /// <param name="word"></param>
        /// <param name="maxResultElements">the maximum length of the result array</param>
        /// <param name="minSimilarity">the minimum required similarity of the result words. 1.0d = must be same word, 0.0d = all words accepted</param>
        /// <returns>array of similar words or empty array if nothing was found</returns>
        public string[] FindSimilarWords(string word, int maxResultElements = 5, double minSimilarity = 0.2d)
        {
            if (string.IsNullOrWhiteSpace(word) || Words == null || Words.Length == 0 || maxResultElements < 1)
            {
                return(new string[0]);
            }

            // 1. calculate the amount of steps to transform one string into the other
            Dictionary <Word, double> wordTransformCosts = new Dictionary <Word, double>();
            // go through the Words and look if one is similar and sort them by relevance.
            int WordCounter = 0;

            foreach (Word w in Words)
            {
                double similarity = SimilarityCalculator.CalculateSimilarity(word, w.Notation);
                if (similarity >= minSimilarity) // it should  be at least 20% similar
                {
                    wordTransformCosts.Add(w, similarity);
                    WordCounter++;
                }
                if (WordCounter >= maxResultElements) // take 5 words at max.
                {
                    break;
                }
            }
            string[] SimilarWords = new string[WordCounter];
            // sort that dictionary, convert the words into the string array.
            int j = 0;

            foreach (KeyValuePair <Word, double> SortedWord in wordTransformCosts.OrderByDescending(key => key.Value))
            {
                SimilarWords[j] = SortedWord.Key.Notation;
                j++;
            }
            return(SimilarWords);
        }
Ejemplo n.º 5
0
        static void Main(string[] args)
        {
            // Some example documents.
            //string[] documents =
            //{
            //    //"The sun in the Stars sky is bright.",
            //    //"We can see the shining sun, the bright sun."
            //    "How much is my balance",
            //    "I need my balance",
            //    "balance for account",
            //    "payment for my account",
            //    "hello need my balance"
            //};

            //// Apply TF*IDF to the documents and get the resulting vectors.
            //double[][] inputs = TFIDF.Transform(documents, 0);
            //inputs = TFIDF.Normalize(inputs);

            //double[] result = inputs[inputs.Length - 1];

            //for (int index = 0; index < inputs.Length-1; index++)
            //{
            //    Console.WriteLine(documents[index]);
            //    int counter = 0;
            //    double cosineValue = 0;
            //    foreach (double value in inputs[index])
            //    {
            //        cosineValue = cosineValue + (value * result[counter]);
            //        counter++;
            //    }
            //    Console.WriteLine(cosineValue);
            //    Console.WriteLine("\n");
            //}

            //Console.WriteLine("------------------------------");

            //for (int index = 0; index < inputs.Length; index++)
            //{
            //    Console.WriteLine(documents[index]);

            //    foreach (double value in inputs[index])
            //    {
            //        Console.Write(value + ", ");
            //    }

            //    Console.WriteLine("\n");
            //}

            Console.WriteLine("Levinstine Algorithm");
            Console.WriteLine("----------------------------------");

            //Console.WriteLine(LevenshteinDistance.Compute("Payments for Account", "Pyment for Account"));
            //Console.WriteLine(LevenshteinDistance.Compute("Payments for Account", "Balance for Account"));

            Console.WriteLine("TF IDF Cosine Similarity");
            Console.WriteLine("----------------------------------");
            int threshold = 0;

            try
            {
                // Create instance of calculator
                SimilarityCalculator sc = new SimilarityCalculator();

                sc.CompareString("Payments for Account", "Payment Balance for Account", vocabularyThreshold: threshold);
                sc.CompareString("Payments for Account", "Balance for Account", vocabularyThreshold: threshold);

                Console.WriteLine("Press any key...");
                Console.ReadKey();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
            Console.ReadKey();
        }
        public override IOperation Apply()
        {
            int      updateInterval = UpdateIntervalParameter.Value.Value;
            IntValue updateCounter  = UpdateCounterParameter.ActualValue;

            // if counter does not yet exist then initialize it with update interval
            // to make sure the solutions are analyzed on the first application of this operator
            if (updateCounter == null)
            {
                updateCounter = new IntValue(updateInterval);
                UpdateCounterParameter.ActualValue = updateCounter;
            }
            else
            {
                updateCounter.Value++;
            }

            //analyze solutions only every 'updateInterval' times
            if (updateCounter.Value == updateInterval)
            {
                updateCounter.Value = 0;

                bool max = MaximizationParameter.ActualValue.Value;
                ItemArray <DoubleValue> qualities = QualityParameter.ActualValue;
                bool storeHistory = StoreHistoryParameter.Value.Value;
                int  count        = CurrentScopeParameter.ActualValue.SubScopes.Count;

                if (count > 1)
                {
                    // calculate solution similarities
                    var similarityMatrix = SimilarityCalculator.CalculateSolutionCrowdSimilarity(CurrentScopeParameter.ActualValue);

                    // sort similarities by quality
                    double[][] sortedSimilarityMatrix = null;
                    if (max)
                    {
                        sortedSimilarityMatrix = similarityMatrix
                                                 .Select((x, index) => new { Solutions = x, Quality = qualities[index] })
                                                 .OrderByDescending(x => x.Quality)
                                                 .Select(x => x.Solutions)
                                                 .ToArray();
                    }
                    else
                    {
                        sortedSimilarityMatrix = similarityMatrix
                                                 .Select((x, index) => new { Solutions = x, Quality = qualities[index] })
                                                 .OrderBy(x => x.Quality)
                                                 .Select(x => x.Solutions)
                                                 .ToArray();
                    }

                    double[,] similarities = new double[similarityMatrix.Length, similarityMatrix[0].Length];
                    for (int i = 0; i < similarityMatrix.Length; i++)
                    {
                        for (int j = 0; j < similarityMatrix[0].Length; j++)
                        {
                            similarities[i, j] = similarityMatrix[i][j];
                        }
                    }

                    // calculate minimum, average and maximum similarities
                    double   similarity;
                    double[] minSimilarities = new double[count];
                    double[] avgSimilarities = new double[count];
                    double[] maxSimilarities = new double[count];
                    for (int i = 0; i < count; i++)
                    {
                        minSimilarities[i] = 1;
                        avgSimilarities[i] = 0;
                        maxSimilarities[i] = 0;
                        for (int j = 0; j < count; j++)
                        {
                            if (i != j)
                            {
                                similarity = similarities[i, j];

                                if ((similarity < 0) || (similarity > 1))
                                {
                                    throw new InvalidOperationException("Solution similarities have to be in the interval [0;1].");
                                }

                                if (minSimilarities[i] > similarity)
                                {
                                    minSimilarities[i] = similarity;
                                }
                                avgSimilarities[i] += similarity;
                                if (maxSimilarities[i] < similarity)
                                {
                                    maxSimilarities[i] = similarity;
                                }
                            }
                        }
                        avgSimilarities[i] = avgSimilarities[i] / (count - 1);
                    }
                    double avgMinSimilarity = minSimilarities.Average();
                    double avgAvgSimilarity = avgSimilarities.Average();
                    double avgMaxSimilarity = maxSimilarities.Average();

                    // fetch results collection
                    ResultCollection results;
                    if (!ResultsParameter.ActualValue.ContainsKey(Name + " Results"))
                    {
                        results = new ResultCollection();
                        ResultsParameter.ActualValue.Add(new Result(Name + " Results", results));
                    }
                    else
                    {
                        results = (ResultCollection)ResultsParameter.ActualValue[Name + " Results"].Value;
                    }

                    // store similarities
                    HeatMap similaritiesHeatMap = new HeatMap(similarities, "Solution Similarities", 0.0, 1.0);
                    if (!results.ContainsKey("Solution Similarities"))
                    {
                        results.Add(new Result("Solution Similarities", similaritiesHeatMap));
                    }
                    else
                    {
                        results["Solution Similarities"].Value = similaritiesHeatMap;
                    }

                    // store similarities history
                    if (storeHistory)
                    {
                        if (!results.ContainsKey("Solution Similarities History"))
                        {
                            HeatMapHistory history = new HeatMapHistory();
                            history.Add(similaritiesHeatMap);
                            results.Add(new Result("Solution Similarities History", history));
                        }
                        else
                        {
                            ((HeatMapHistory)results["Solution Similarities History"].Value).Add(similaritiesHeatMap);
                        }
                    }

                    // store average minimum, average and maximum similarity
                    if (!results.ContainsKey("Average Minimum Solution Similarity"))
                    {
                        results.Add(new Result("Average Minimum Solution Similarity", new DoubleValue(avgMinSimilarity)));
                    }
                    else
                    {
                        ((DoubleValue)results["Average Minimum Solution Similarity"].Value).Value = avgMinSimilarity;
                    }

                    if (!results.ContainsKey("Average Average Solution Similarity"))
                    {
                        results.Add(new Result("Average Average Solution Similarity", new DoubleValue(avgAvgSimilarity)));
                    }
                    else
                    {
                        ((DoubleValue)results["Average Average Solution Similarity"].Value).Value = avgAvgSimilarity;
                    }

                    if (!results.ContainsKey("Average Maximum Solution Similarity"))
                    {
                        results.Add(new Result("Average Maximum Solution Similarity", new DoubleValue(avgMaxSimilarity)));
                    }
                    else
                    {
                        ((DoubleValue)results["Average Maximum Solution Similarity"].Value).Value = avgMaxSimilarity;
                    }

                    // store average minimum, average and maximum solution similarity data table
                    DataTable minAvgMaxSimilarityDataTable;
                    if (!results.ContainsKey("Average Minimum/Average/Maximum Solution Similarity"))
                    {
                        minAvgMaxSimilarityDataTable = new DataTable("Average Minimum/Average/Maximum Solution Similarity");
                        minAvgMaxSimilarityDataTable.VisualProperties.XAxisTitle = "Iteration";
                        minAvgMaxSimilarityDataTable.VisualProperties.YAxisTitle = "Solution Similarity";
                        minAvgMaxSimilarityDataTable.Rows.Add(new DataRow("Average Minimum Solution Similarity", null));
                        minAvgMaxSimilarityDataTable.Rows["Average Minimum Solution Similarity"].VisualProperties.StartIndexZero = true;
                        minAvgMaxSimilarityDataTable.Rows.Add(new DataRow("Average Average Solution Similarity", null));
                        minAvgMaxSimilarityDataTable.Rows["Average Average Solution Similarity"].VisualProperties.StartIndexZero = true;
                        minAvgMaxSimilarityDataTable.Rows.Add(new DataRow("Average Maximum Solution Similarity", null));
                        minAvgMaxSimilarityDataTable.Rows["Average Maximum Solution Similarity"].VisualProperties.StartIndexZero = true;
                        results.Add(new Result("Average Minimum/Average/Maximum Solution Similarity", minAvgMaxSimilarityDataTable));
                    }
                    else
                    {
                        minAvgMaxSimilarityDataTable = (DataTable)results["Average Minimum/Average/Maximum Solution Similarity"].Value;
                    }
                    minAvgMaxSimilarityDataTable.Rows["Average Minimum Solution Similarity"].Values.Add(avgMinSimilarity);
                    minAvgMaxSimilarityDataTable.Rows["Average Average Solution Similarity"].Values.Add(avgAvgSimilarity);
                    minAvgMaxSimilarityDataTable.Rows["Average Maximum Solution Similarity"].Values.Add(avgMaxSimilarity);

                    // store minimum, average, maximum similarities data table
                    DataTable minAvgMaxSimilaritiesDataTable = new DataTable("Minimum/Average/Maximum Solution Similarities");
                    minAvgMaxSimilaritiesDataTable.VisualProperties.XAxisTitle = "Solution Index";
                    minAvgMaxSimilaritiesDataTable.VisualProperties.YAxisTitle = "Solution Similarity";
                    minAvgMaxSimilaritiesDataTable.Rows.Add(new DataRow("Minimum Solution Similarity", null, minSimilarities));
                    minAvgMaxSimilaritiesDataTable.Rows["Minimum Solution Similarity"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
                    minAvgMaxSimilaritiesDataTable.Rows.Add(new DataRow("Average Solution Similarity", null, avgSimilarities));
                    minAvgMaxSimilaritiesDataTable.Rows["Average Solution Similarity"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
                    minAvgMaxSimilaritiesDataTable.Rows.Add(new DataRow("Maximum Solution Similarity", null, maxSimilarities));
                    minAvgMaxSimilaritiesDataTable.Rows["Maximum Solution Similarity"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
                    if (!results.ContainsKey("Minimum/Average/Maximum Solution Similarities"))
                    {
                        results.Add(new Result("Minimum/Average/Maximum Solution Similarities", minAvgMaxSimilaritiesDataTable));
                    }
                    else
                    {
                        results["Minimum/Average/Maximum Solution Similarities"].Value = minAvgMaxSimilaritiesDataTable;
                    }

                    // store minimum, average, maximum similarities history
                    if (storeHistory)
                    {
                        if (!results.ContainsKey("Minimum/Average/Maximum Solution Similarities History"))
                        {
                            DataTableHistory history = new DataTableHistory();
                            history.Add(minAvgMaxSimilaritiesDataTable);
                            results.Add(new Result("Minimum/Average/Maximum Solution Similarities History", history));
                        }
                        else
                        {
                            ((DataTableHistory)results["Minimum/Average/Maximum Solution Similarities History"].Value).Add(minAvgMaxSimilaritiesDataTable);
                        }
                    }
                }
            }
            return(base.Apply());
        }
Ejemplo n.º 7
0
 public TokenListComparer(MinHashAlgorithm algorithm)
 {
     _algorithm  = algorithm;
     _calculator = new SimilarityCalculator();
 }
Ejemplo n.º 8
0
        //ChaT Bot Reponse Main Entry
        public ChatIntent ChatResponseMain()
        {
            string responseMessage = contentManager.NoIntentMatchedResponse;
            TFIDF  getVocab        = new TFIDF();
            Dictionary <string, string> reponseDict   = new Dictionary <string, string>();
            List <ChatIntent>           intentListAll = db.ChatIntent.ToList();

            ChatIntent responseIntent = intentListAll.Where(x => x.ChatIntentId == 0).FirstOrDefault();

            #region 1.CheckIntentGreetingOrGoodbye
            if (hiBye.Greet())
            {
                return(UpdateIntent(Node, contentManager.GreetResponse, responseIntent));
            }
            else if (hiBye.GoodBye())
            {
                return(UpdateIntent(Node, contentManager.GoodbyeResponse, responseIntent));
            }
            #endregion

            List <ChatIntent> intentList = (from intention in intentListAll
                                            where intention.ChatIntentId > 2 && intention.ParentId == Node
                                            select intention).ToList();

            #region 2.CheckIntentFullMatchbySuggestion
            KeyValuePair <int, bool> fullMatch = suggestionMatch.FullSuggestionMatch(intentList);
            if (fullMatch.Value)
            {
                ChatIntent fullMatchIntent = intentList.Where(x => x.ChatIntentId == fullMatch.Key).FirstOrDefault();
                responseMessage = fullMatchIntent.Response;
                var hasEntity = (from ent in db.ChatEntity where ent.ChatIntentId == fullMatchIntent.ChatIntentId
                                 select ent);
                if (hasEntity.Any())
                {
                    AskMeEntityExtraction entity = new AskMeEntityExtraction(Message, fullMatchIntent.ChatIntentId, SessionId);
                    return(entity.GetEntityforIntentfromNLP(fullMatchIntent));
                }
                return(fullMatchIntent);
            }

            KeyValuePair <int, bool> partialMatch = suggestionMatch.PartialSuggestionMatch(intentList);
            if (partialMatch.Value)
            {
                ChatIntent partialMatchIntent = intentList.Where(x => x.ChatIntentId == partialMatch.Key).FirstOrDefault();
                responseMessage = partialMatchIntent.Response;
                var hasEntity = (from ent in db.ChatEntity
                                 where ent.ChatIntentId == partialMatchIntent.ChatIntentId
                                 select ent);
                if (hasEntity.Any())
                {
                    AskMeEntityExtraction entity = new AskMeEntityExtraction(Message, partialMatchIntent.ChatIntentId, SessionId);
                    return(entity.GetEntityforIntentfromNLP(partialMatchIntent));
                }
                return(partialMatchIntent);
            }
            #endregion


            List <string> vocabList = getVocab.GetVocabulary(Message);
            if (vocabList.Count == 0)
            {
                return(UpdateIntent(Node, contentManager.NoIntentMatchedResponse, responseIntent));
            }

            if (Message.ToLower() == "yes" || Message.ToLower() == "no")
            {
                return(UpdateIntent(Node, contentManager.NoIntentMatchedResponse, responseIntent));
            }

            #region 3.TFIDF Match Process
            SimilarityCalculator      similarityCalculator = new SimilarityCalculator();
            List <ChatIntentQuestion> questionList         = db.ChatIntentQuestion.ToList();
            Dictionary <int, double>  scoreDict            = new Dictionary <int, double>();
            foreach (ChatIntentQuestion question in questionList)
            {
                double compare = similarityCalculator.CompareString(Message, question.QuestionDesc, 1);
                KeyValuePair <int, double> score = new KeyValuePair <int, double>(question.ChatIntentId, compare);
                if (scoreDict.ContainsKey(score.Key))
                {
                    if (scoreDict[score.Key] < compare)
                    {
                        scoreDict[score.Key] = compare;
                    }
                }
                else
                {
                    scoreDict.Add(score.Key, score.Value);
                }
            }

            if (scoreDict.Where(x => x.Value > 0.45).Any())
            {
                int        maxScoreChatIntentId = scoreDict.OrderByDescending(x => x.Value).Select(y => y.Key).FirstOrDefault();
                ChatIntent maxIntent            = intentListAll.Where(x => x.ChatIntentId == maxScoreChatIntentId).FirstOrDefault();
                Node = maxScoreChatIntentId;

                var hasEntity = (from ent in db.ChatEntity
                                 where ent.ChatIntentId == maxIntent.ChatIntentId
                                 select ent);
                if (hasEntity.Any())
                {
                    AskMeEntityExtraction entity = new AskMeEntityExtraction(Message, maxIntent.ChatIntentId, SessionId);
                    return(entity.GetEntityforIntentfromNLP(maxIntent));
                }

                //KeyValuePair<int, string> responseIntent = GetEntityforIntent(Node, maxIntent.Response);
                return(maxIntent);
            }
            else if (scoreDict.Where(x => x.Value >= 0.23).Any())
            {
                List <int> possibeMatch = scoreDict.OrderByDescending(x => x.Value).Where(x => x.Value >= 0.23).Select(y => y.Key).ToList();
                responseMessage = contentManager.IntentPossibleMatchedResponse;
                foreach (int match in possibeMatch)
                {
                    responseMessage = responseMessage + ", ";
                    string suggestion = intentListAll.Where(x => x.ChatIntentId == match).Select(y => y.IntentDescription).FirstOrDefault();
                    responseMessage = responseMessage + suggestion;
                }
                responseMessage = responseMessage + ", " + contentManager.IntentSuggestionResponse;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }
            #endregion

            #region 4.Probable Match Process
            KeyValuePair <string, bool> probableMatchCorrect = zPossibleMatch.ProbableMatchCorrectSpelling(vocabList, intentListAll);
            if (probableMatchCorrect.Value)
            {
                common.LogFailureResponse();
                responseMessage = probableMatchCorrect.Key;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }

            KeyValuePair <string, bool> probableMatchTypo = zPossibleMatch.ProbableMatchTypoError(vocabList, intentListAll);
            if (probableMatchTypo.Value)
            {
                common.LogFailureResponse();
                responseMessage = probableMatchTypo.Key;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }
            #endregion

            #region 4.Synonym Match Process
            KeyValuePair <string, bool> synMatch = synonymMatch.SynonymMatch(vocabList, intentListAll);
            if (synMatch.Value)
            {
                common.LogFailureResponse();
                responseMessage = synMatch.Key;
                return(UpdateIntent(Node, responseMessage, responseIntent));
            }
            #endregion

            if (responseIntent != null)
            {
                responseIntent.ChatIntentId = Node;
                responseIntent.Response     = responseMessage;
            }
            else
            {
                responseIntent = new ChatIntent();
                responseIntent.ChatIntentId = Node;
                responseIntent.Response     = "Sorry I did not understand, Please enter one of the suggestions";
            }
            return(responseIntent);
        }