Exemplo n.º 1
        public Document(string text, int stemCode)
            No = "";
            Title = "";
            Content = new string[1];
            Content[0] = " ";
            text = "\n.I " + text;
            string[] textSplitedByPart = text.Split(new string[] { "\n." }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0 ; i<textSplitedByPart.Count(); i++)

                if (textSplitedByPart[i][0]=='I')
                    //Console.WriteLine("masuk sini");

                    string[] chunk = textSplitedByPart[i].Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);
                    No = chunk[1];
                    Console.WriteLine("no : " + No);
                else if (textSplitedByPart[i][0] == 'T')
                    if (textSplitedByPart[i].Length!=1)
                        Title = textSplitedByPart[i].Substring(2);
                        Title = " ";
                    //Console.WriteLine("title : " + Title);
                //else if (textSplitedByPart[i][0]=='A')
                //    Console.WriteLine("masuk ke A");
                //    Author[0] = textSplitedByPart[i].Substring(2);
                //    Console.WriteLine("author : " + Author);
                else if (textSplitedByPart[i][0]=='W')
                    string strContent = textSplitedByPart[i].Substring(2);
                    strContent = StopwordTool.RemoveStopwords(strContent);
                    //remove number from content text
                    strContent = Regex.Replace(strContent, @"[0-9]+ ", string.Empty);
                    if (stemCode == 1)
                        // Stemming, mengubah kata ke bentuk dasarnya
                        StemmingTool Stemmer2 = new StemmingTool();
                        strContent = Stemmer2.Stemming(strContent);
                    Content = strContent.Split(' ');
                    //print content
                    //Console.WriteLine("content : ");
                    //for (int j = 0; j < Content.Count(); j++)
                    //    Console.Write(Content[j] + " ");
                    //Console.WriteLine("ga diambil");

            if(!text.Contains("\n.A"))  // if doesn't have author
                No = Before(text, "\n.T");
                Title = Between(text, ".T\n", "\n.W");
                Author = null;
                No = Before(text, "\n.T");
                Title = getTitleRecurrence(Between(text, ".T\n", "\n.A"));

                string TextAuthor = Between(text, ".A\n", "\n.W");
                    Author = TextAuthor.Split(new string[] { "\n.A\n" }, StringSplitOptions.None);
                    if(TextAuthor.Contains("\n"))   // if author more than one
                        // Split text per author
                        Author = TextAuthor.Split('\n');
                    else                            // if author just one
                        Author = new string[1];
                        Author[0] = TextAuthor;

            string ContentString;
                ContentString = StopwordTool.RemoveStopwords(Between(text, ".W\n", "\n.X"));
                ContentString = StopwordTool.RemoveStopwords(After(text, ".W\n"));

            // Regex, untuk menghilangkan angka
            ContentString = Regex.Replace(ContentString, @"[0-9]+ ", string.Empty);

            if(stemCode == 1)
                // Stemming, mengubah kata ke bentuk dasarnya
                StemmingTool Stemmer = new StemmingTool();
                ContentString = Stemmer.Stemming(ContentString);

            // Split Content per word
            Content = ContentString.Split(' ');
Exemplo n.º 2
        // return weight for each query term
        public static List<WeightedTermQuery> weightingQuery(string q, List<Document> ListDocuments)
            double wTerm;

            string queryString = StopwordTool.RemoveStopwords(q);

            // Regex, untuk menghilangkan angka
            queryString = Regex.Replace(queryString, @"[0-9]+ ", string.Empty);

            if(stemCode == 1)
                // Stemming, mengubah kata ke bentuk dasarnya
                StemmingTool Stemmer = new StemmingTool();
                queryString = Stemmer.Stemming(queryString);

            string[] qTerm = queryString.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);
            TermWeighting QW = new TermWeighting(ListDocuments);
            List<WeightedTermQuery> ListQueryWithWeight = new List<WeightedTermQuery>();

            for (int i = 0; i < qTerm.Count(); i++)
                List<string> found = new List<string>();    // store word that has already counts

                if (!found.Contains(qTerm[i]))

                    // menghitung term weight masing-masing kata di tiap query
                    wTerm = QW.CalculateTermWeightingQuery(qTerm, i, tfQueryCode, idfQueryCode, normQueryCode);
                    //wTerm = 1;
                    ListQueryWithWeight.Add(new WeightedTermQuery(qTerm[i], wTerm));
            /*Print to console*/
            /*Console.Write("QUERY : ");
            foreach (var item in ListQueryWithWeight)
            return ListQueryWithWeight;