/// <summary>
        /// Gets the cosine similarity of vectors of keywords of two documents.
        /// Tf-Idf and Cosine similarity: https://janav.wordpress.com/2013/10/27/tf-idf-and-cosine-similarity/
        /// </summary>
        /// <param name="documentId1"></param>
        /// <param name="documentId2"></param>
        /// <param name="tfIdfEstimator"></param>
        /// <returns>Cosine similarity</returns>
        public static double GetDocumentSimilarityExt(DocumentTermsData documentTermsData1, string documentId2, TfIdfEstimatorExt tfIdfEstimator)
        {
            //using var db = new LiteDatabase(tfIdfEstimator.TfIdfStorage.ConnectionString);
            //var documentTermsColl = db.GetCollection<DocumentTermsData>(tfIdfEstimator.TfIdfStorage.DocumentTermsColl);

            // Get all keywords from the two documents and make a union of them.
            DocumentTermsData doc1 = documentTermsData1;//2020-12-28T08:49:23 tfIdfEstimator.Storage.DocumentTermsColl.FindOne(x => x.Document == documentId1);
            DocumentTermsData documentTermsData2 = tfIdfEstimator.Storage.DocumentTermsColl.FindOne(x => x.Document == documentId2);
            List <TermData>   terms1             = doc1.Terms;
            List <TermData>   terms2             = documentTermsData2.Terms;
            var set = new HashSet <string>();

            foreach (var keyword in terms1)
            {
                set.Add(keyword.Term);
            }

            foreach (var keyword in terms2)
            {
                set.Add(keyword.Term);
            }

            // Get term scores of keywords in a union for both documents.
            List <double> list1 = new List <double>();
            List <double> list2 = new List <double>();

            foreach (var keyword in set)
            {
                list1.Add(tfIdfEstimator.GetOneTermInDocument(doc1.Document, keyword).TermScore);
                list2.Add(tfIdfEstimator.GetOneTermInDocument(documentId2, keyword).TermScore);
            }

            // Calculate the cosine similarity. CosSimilarity(v1, v2) = dot(v1,v2) / (norm(v1) * norm(v2)) where v1 and v2 are vectors
            double dot   = DotProduct(list1, list2);
            double norm1 = 0;
            double norm2 = 0;

            foreach (var item in list1)
            {
                norm1 += Math.Pow(item, 2);
            }

            foreach (var item in list2)
            {
                norm2 += Math.Pow(item, 2);
            }
            norm1 = Math.Sqrt(norm1);
            norm2 = Math.Sqrt(norm2);
            double similarity = (norm1 * norm2 == 0 ? 0 : dot / (norm1 * norm2));

            return(similarity);
        }
        /// <summary>
        /// Returns top N most similar documents to the first document along with its cosine similarities.
        /// </summary>
        /// <param name="documentId1"></param>
        /// <param name="numberOfDocuments"></param>
        /// <param name="tfIdfEstimator"></param>
        /// <returns>List of most similar documents</returns>
        public static List <DocumentSimilarityScoreData> GetSimilarDocumentsOLD1(string documentId1, int numberOfDocuments, TfIdfEstimatorExt tfIdfEstimator)
        {
            IEnumerable <DocumentTermsData> docs = tfIdfEstimator.Storage.DocumentTermsColl.FindAll();//.ToList();//TODO: optimize it!!! - 2020-12-22T10:04:31
            DocumentTermsData documentTermsData1 = tfIdfEstimator.Storage.DocumentTermsColl.FindOne(x => x.Document == documentId1);

            // Get similarity scores of all those documents.
            var docScores = new List <DocumentSimilarityScoreData>();

            foreach (DocumentTermsData documentTermsData in docs)
            {
                if (documentTermsData1.Document == documentTermsData.Document)
                {
                    continue;//skip it
                }

                docScores.Add(new DocumentSimilarityScoreData()
                {
                    Document = documentTermsData.Document,
                    Score    = GetDocumentSimilarityExt(documentTermsData1, documentTermsData.Document, tfIdfEstimator)
                });
            }

            //Order them by their scores descending and return top N of them.
            docScores.OrderByDescending(x => x.Score);
            return(docScores.GetRange(0, numberOfDocuments));

            /*OLD CODE: 2020-12-28T08:57:09
             * // Get all docs except the one that we are looking the most similar to.
             * var docs = tfIdfEstimator.Storage.DocumentTermsColl.FindAll().ToList();//TODO: optimize it!!! - 2020-12-22T10:04:31
             * docs.Remove(docs.Find(x => x.Document == document1));
             *
             * // Get similarity scores of all those documents.
             * var docScores = new List<DocumentSimilarityScoreData>();
             * foreach (var doc in docs)
             * {
             *  docScores.Add(new DocumentSimilarityScoreData()
             *  {
             *      Document = doc.Document,
             *      Score = GetDocumentSimilarityExt(document1, doc.Document, tfIdfEstimator)
             *  });
             * }
             *
             * //Order them by their scores descending and return top N of them.
             * docScores.OrderByDescending(x => x.Score);
             * return docScores.GetRange(0, numberOfDocuments);
             */
        }
        public static double GetDocumentSimilarityExt(string documentId1, string documentId2, TfIdfEstimatorExt tfIdfEstimator)
        {
            DocumentTermsData documentTermsData1 = tfIdfEstimator.Storage.DocumentTermsColl.FindOne(x => x.Document == documentId1);

            return(GetDocumentSimilarityExt(documentTermsData1, documentId2, tfIdfEstimator));
        }
        /// <summary>
        /// Returns top N most similar documents to the first document along with its cosine similarities.
        /// </summary>
        /// <param name="documentId1"></param>
        /// <param name="numberOfDocuments"></param>
        /// <param name="tfIdfEstimator"></param>
        /// <returns>List of most similar documents</returns>
        public static List <DocumentSimilarityScoreData> GetSimilarDocuments(string documentId1, int numberOfDocuments, TfIdfEstimatorExt tfIdfEstimator)
        {
            DocumentTermsData documentTermsData1 = tfIdfEstimator.Storage.DocumentTermsColl.FindOne(x => x.Document == documentId1);

            //bool Compare(string term)//local function
            //{
            //    TermData termData = documentTermsData1.Terms.Find(t => t.Term == term);
            //    if (termData != null)
            //    {
            //        return true;
            //    }
            //    return false;
            //}
            //Func<string, bool> Predicate = Compare;

            //Example using multi key index https://github.com/mbdavid/LiteDB/blob/master/LiteDB.Tests/Database/MultiKey_Mapper_Tests.cs
            IEnumerable <DocumentTermsData> docs = tfIdfEstimator.Storage.DocumentTermsColl.Find(d => d.Terms.Select(z => z.Term).Any(x => Compare(documentTermsData1.Terms, x)));
            //IEnumerable<DocumentTermsData> docs = tfIdfEstimator.Storage.DocumentTermsColl.FindAll();//.ToList();//optimize it!!! - 2020-12-22T10:04:31

            // Get similarity scores of all those documents.
            var docScores = new List <DocumentSimilarityScoreData>();

            foreach (DocumentTermsData documentTermsData in docs)
            {
                if (documentTermsData1.Document == documentTermsData.Document)
                {
                    continue;//skip it
                }

                docScores.Add(new DocumentSimilarityScoreData()
                {
                    Document = documentTermsData.Document,
                    Score    = GetDocumentSimilarityExt(documentTermsData1, documentTermsData.Document, tfIdfEstimator)
                });
            }

            //Order them by their scores descending and return top N of them.
            docScores.OrderByDescending(x => x.Score);
            return(docScores.GetRange(0, numberOfDocuments));
        }