C# (CSharp) TextClustering.Lib DocumentVector 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: TextClustering.Lib

클래스/타입: DocumentVector

hotexamples.com에서의 예제들: 4

C# (CSharp) TextClustering.Lib DocumentVector - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 TextClustering.Lib.DocumentVector에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

        //returns index of closest cluster centroid
        public static int FindClosestClusterCenter(List <Centroid> clusterCenter, DocumentVector obj, string sim)
        {
            float[] similarityMeasure = new float[clusterCenter.Count()];

            for (int i = 0; i < clusterCenter.Count(); i++)
            {
                if (sim == "Cosine")
                {
                    similarityMeasure[i] = SimilarityMatrics.FindCosineSimilarity(clusterCenter[i].GroupedDocument[0].VectorSpace, obj.VectorSpace);
                }
                else
                {
                    Wnlib.WNCommon.path = "C:\\Program Files\\WordNet\\3.0\\dict\\";
                    SentenceSimilarity semsim = new SentenceSimilarity();
                    similarityMeasure[i] = semsim.GetScore(clusterCenter[i].GroupedDocument[0].Content, obj.Content);
                }
            }

            int   index    = 0;
            float maxValue = similarityMeasure[0];

            for (int i = 0; i < similarityMeasure.Count(); i++)
            {
                //if document is similar assign the document to the lowest index cluster center to avoid the long loop
                if (similarityMeasure[i] > maxValue)
                {
                    maxValue = similarityMeasure[i];
                    index    = i;
                }
            }
            return(index);
        }

예제 #2

파일 보기

파일: VectorSpaceModel.cs 프로젝트: mohsenamoei/AI-project

        /// <summary>
        /// Prepares a collection of document in vector space
        /// </summary>
        /// <param name="collection">Document collection/corpus</param>
        /// <returns>List of, document in vector space</returns>
        public static List <DocumentVector> ProcessDocumentCollection(DocumentCollection collection)
        {
            distinctTerms      = new HashSet <string>();
            documentCollection = collection.DocumentList;

            /*
             * Finds out the total no of distinct terms in the whole corpus so that it will be easy
             * to represent the document in the vector space. The dimension of the vector space will
             * be equal to the total no of distinct terms.
             *
             */

            foreach (string documentContent in collection.DocumentList)
            {
                foreach (string term in r.Split(documentContent))
                {
                    if (!StopWordsHandler.IsStotpWord(term))
                    {
                        distinctTerms.Add(term);
                    }
                    else
                    {
                        continue;
                    }
                }
            }

            List <string> removeList = new List <string>()
            {
                "\"", "\r", "\n", "(", ")", "[", "]", "{", "}", "", ".", " ", ","
            };

            foreach (string s in removeList)
            {
                distinctTerms.Remove(s);
            }


            List <DocumentVector> documentVectorSpace = new List <DocumentVector>();
            DocumentVector        _documentVector;

            float[] space;
            foreach (string document in documentCollection)
            {
                int count = 0;
                space = new float[distinctTerms.Count];
                foreach (string term in distinctTerms)
                {
                    space[count] = FindTFIDF(document, term);
                    count++;
                }
                _documentVector             = new DocumentVector();
                _documentVector.Content     = document;
                _documentVector.VectorSpace = space;
                documentVectorSpace.Add(_documentVector);
            }

            return(documentVectorSpace);
        }

예제 #3

파일 보기

파일: DocumnetClustering.cs 프로젝트: tsaiian/Documents-Clustering-using-K-Means-Algorithm

        //returns index of closest cluster centroid
        private static int FindClosestClusterCenter(List <Centroid> clusterCenter, DocumentVector obj)
        {
            float[] similarityMeasure = new float[clusterCenter.Count()];

            for (int i = 0; i < clusterCenter.Count(); i++)
            {
                similarityMeasure[i] = SimilarityMatrics.FindCosineSimilarity(clusterCenter[i].GroupedDocument[0].VectorSpace, obj.VectorSpace);
            }

            int   index    = 0;
            float maxValue = similarityMeasure[0];

            for (int i = 0; i < similarityMeasure.Count(); i++)
            {
                //if document is similar assign the document to the lowest index cluster center to avoid the long loop
                if (similarityMeasure[i] > maxValue)
                {
                    maxValue = similarityMeasure[i];
                    index    = i;
                }
            }
            return(index);
        }

예제 #4

파일 보기

파일: VectorSpaceModel.cs 프로젝트: priyankasekhar/TextClustering

        /// <summary>
        /// Prepares a collection of document in vector space
        /// </summary>
        /// <param name="collection">Document collection/corpus</param>
        /// <returns>List of, document in vector space</returns>
        public static List <DocumentVector> ProcessDocumentCollection(DocumentCollection collection)
        {
            distinctTerms      = new HashSet <string>();
            documentCollection = collection.DocumentList;

            /*
             * Finds out the total no of distinct terms in the whole corpus so that it will be easy
             * to represent the document in the vector space. The dimension of the vector space will
             * be equal to the total no of distinct terms.
             *
             */

            foreach (string documentContent in collection.DocumentList)
            {
                foreach (string term in r.Split(documentContent))
                {
                    if (!StopWordsHandler.IsStotpWord(term) && distinctTerms.Contains(term, StringComparer.CurrentCultureIgnoreCase) != true)
                    {
                        if (term.Length >= 4 && term.Length <= 25)
                        {
                            distinctTerms.Add(term);
                        }
                    }
                    else
                    {
                        continue;
                    }
                }
            }

            List <string> removeList = new List <string>()
            {
                "\"", "\r", "\n", "(", ")", "[", "]", "{", "}", "", ".", " ", ",", "<", ">", "@", ";", "#"
            };

            foreach (string s in removeList)
            {
                distinctTerms.Remove(s);
            }


            List <DocumentVector> documentVectorSpace = new List <DocumentVector>();
            DocumentVector        _documentVector;

            float[]  space;
            string[] keys;
            foreach (string document in documentCollection)
            {
                int count = 0;
                space = new float[distinctTerms.Count];
                keys  = new string[distinctTerms.Count];
                foreach (string term in distinctTerms)
                {
                    space[count] = FindTFIDF(document, term);
                    if (space[count] > 0)
                    {
                        keys[count] = term;
                    }
                    count++;
                }
                _documentVector             = new DocumentVector();
                _documentVector.Content     = document;
                _documentVector.VectorSpace = space;
                _documentVector.keys        = keys;
                documentVectorSpace.Add(_documentVector);
            }
            Console.WriteLine(distinctTerms.ToString());
            return(documentVectorSpace);
        }