Ejemplo n.º 1
0
        public void Process()
        {
            foreach (TFIDF_Document art in documentList.Values)
            {
                Tokeniser tokeniser = new Tokeniser();
                Token[]   tokens    = tokeniser.Partition(art.Content);

                Punctuation_Remover pRemover = new Punctuation_Remover();
                tokens = pRemover.MarkPunc(tokens);

                tokens = Stemmer.Stem(tokens);

                Stopword_Remover swRemover = new Stopword_Remover();
                tokens = swRemover.RemoveStopwords(tokens);

                Number_Remover nRemover = new Number_Remover();
                tokens = nRemover.RemoveNumber(tokens);

                foreach (Token t in tokens)
                {
                    if (t.WordType == Token.WORDTYPE.DEFAULT)
                    {
                        t.WordType = Token.WORDTYPE.REGULAR;
                    }
                }
                art.SetToken(tokens);
                art.CalTF();
            }
            CalIDF();
            foreach (TFIDF_Document art in documentList.Values)
            {
                art.CalTFIDF(inverseDocumentFrequency);
            }
            graph.InitializeGraph(documentList.Values.ToArray());
        }
Ejemplo n.º 2
0
        public void Process()
        {
            foreach (TFIDF_Document art in documentList.Values)
            {
                Tokeniser tokeniser = new Tokeniser();
                Token[] tokens = tokeniser.Partition(art.Content);

                Punctuation_Remover pRemover = new Punctuation_Remover();
                tokens = pRemover.MarkPunc(tokens);

                tokens = Stemmer.Stem(tokens);

                Stopword_Remover swRemover = new Stopword_Remover();
                tokens = swRemover.RemoveStopwords(tokens);

                Number_Remover nRemover = new Number_Remover();
                tokens = nRemover.RemoveNumber(tokens);

                foreach (Token t in tokens)
                {
                    if (t.WordType == Token.WORDTYPE.DEFAULT)
                    {
                        t.WordType = Token.WORDTYPE.REGULAR;
                    }
                }
                art.SetToken(tokens);
                art.CalTF();
            }
            CalIDF();
            foreach (TFIDF_Document art in documentList.Values)
            {
                art.CalTFIDF(inverseDocumentFrequency);
            }
            graph.InitializeGraph(documentList.Values.ToArray());
        }