Пример #1
0
        public virtual void  TestPhraseQueryWithStopAnalyzer()
        {
            RAMDirectory directory    = new RAMDirectory();
            StopAnalyzer stopAnalyzer = new StopAnalyzer();
            IndexWriter  writer       = new IndexWriter(directory, stopAnalyzer, true);
            Document     doc          = new Document();

            doc.Add(Field.Text("Field", "the stop words are here"));
            writer.AddDocument(doc);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory);

            // valid exact phrase query
            PhraseQuery query = new PhraseQuery();

            query.Add(new Term("Field", "stop"));
            query.Add(new Term("Field", "words"));
            Hits hits = searcher.Search(query);

            Assert.AreEqual(1, hits.Length());

            // currently StopAnalyzer does not leave "holes", so this matches.
            query = new PhraseQuery();
            query.Add(new Term("Field", "words"));
            query.Add(new Term("Field", "here"));
            hits = searcher.Search(query);
            Assert.AreEqual(1, hits.Length());

            searcher.Close();
        }
Пример #2
0
        public static Lucene.Net.Analysis.Analyzer GetAnalyzerByName(string analyzerName)
        {
            Lucene.Net.Analysis.Analyzer result;
            Lucene.Net.Util.Version      AppLuceneVersion = Lucene.Net.Util.Version.LUCENE_30;
            switch (analyzerName)
            {
            case "SimpleAnalyzer":
                result = new Lucene.Net.Analysis.SimpleAnalyzer();
                break;

            case "StandardAnalyzer":
                result = new Lucene.Net.Analysis.Standard.StandardAnalyzer(AppLuceneVersion);
                break;

            case "KeywordAnalyzer":
                result = new Lucene.Net.Analysis.KeywordAnalyzer();
                break;

            case "StopAnalyzer":
                result = new Lucene.Net.Analysis.StopAnalyzer(AppLuceneVersion);
                break;

            case "WhitespaceAnalyzer":
                result = new Lucene.Net.Analysis.WhitespaceAnalyzer();
                break;

            default:
                result = new Lucene.Net.Analysis.SimpleAnalyzer();
                break;
            }
            return(result);
        }
        public static void CreateIndex(MongoCollection<TweetItem> collection)
        {
            DateTime dtmFirst = new DateTime(2014, 05, 17, 0, 0, 0);
            DateTime dtmLast = new DateTime(2014, 05, 17, 23, 59, 59);
            FSDirectory dir = FSDirectory.GetDirectory(Environment.CurrentDirectory + "\\LuceneIndex");
            //Lucene.Net.Store.RAMDirectory dir = new RAMDirectory();
            Lucene.Net.Analysis.StopAnalyzer an = new Lucene.Net.Analysis.StopAnalyzer();
            IndexWriter wr = new IndexWriter(dir, an, true);
            IStemmer stemmer = new EnglishStemmer();
            while (dtmFirst.Date <= DateTime.Now.Date)
            {
                var query = Query<TweetItem>.Where(t => t.CreationDate >= dtmFirst && t.CreationDate <= dtmLast);
                List<TweetItem> value = collection.Find(query).ToList();
                //DirectoryInfo diMain = new DirectoryInfo(dia.SelectedPath);               
                using (var client = new HttpClient())
                {
                    client.BaseAddress = new Uri("http://www.datasciencetoolkit.org/text2sentiment");
                    client.DefaultRequestHeaders.Accept.Clear();
                    client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));

                    foreach (TweetItem tweet in value)
                    {
                        Document doc = new Document();
                        
                        //SentimentResult res = await GetSentiment(tweet.Text, client);                        
                        string stemmedtext = PerformStemming(stemmer, NLPToolkit.Tokenizer.TokenizeNow(tweet.Text).ToArray());
                        var scores = classifier.Classify(stemmedtext,DragonHelper.DragonHelper.ExcludeList);
                        string positiveSentiment = string.Empty;
                        string negativeSentiment = string.Empty;                        
                        positiveSentiment = scores["Positive"].ToString();
                        negativeSentiment = scores["Negative"].ToString();
                        doc.Add(new Field("id", tweet._id.ToString(), Field.Store.YES, Field.Index.NO));
                        doc.Add(new Field("created", tweet.CreationDate.ToString(), Field.Store.YES, Field.Index.NO));
                        doc.Add(new Field("user", tweet.User, Field.Store.YES, Field.Index.NO));                        
                        doc.Add(new Field("text", stemmedtext, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));                        
                        doc.Add(new Field("possentiment", positiveSentiment , Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
                        doc.Add(new Field("negsentiment", negativeSentiment, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));

                        wr.AddDocument(doc);
                    }
                }
                dtmFirst = dtmFirst.AddDays(1);
                dtmLast = dtmLast.AddDays(1);
            }
            wr.Optimize();
            wr.Flush();
            wr.Close();
            dir.Close();
        }
Пример #4
0
        static void Main(string[] args)
        {
            var connectionString = "mongodb://10.0.0.17/test";
            //var connectionString = "mongodb://localhost/test";

            MongoClient   mongoClient = new MongoClient(connectionString);
            MongoServer   mongoServer = mongoClient.GetServer();
            MongoDatabase db          = mongoServer.GetDatabase("test");
            var           collection  = db.GetCollection <TweetItem>("TweetItems");
            DateTime      dtmFirst    = new DateTime(2014, 05, 17);
            DateTime      dtmLast     = DateTime.Now;

            dtmLast = dtmLast.AddHours(-dtmLast.Hour);
            dtmLast = dtmLast.AddMinutes(-dtmLast.Minute);
            var query = Query <TweetItem> .Where(t => t.CreationDate >= dtmFirst);

            List <TweetItem> value = collection.Find(query).ToList();
            FSDirectory      dir   = FSDirectory.GetDirectory(Environment.CurrentDirectory + "\\LuceneIndex");

            //Lucene.Net.Store.RAMDirectory dir = new RAMDirectory();
            Lucene.Net.Analysis.StopAnalyzer an = new Lucene.Net.Analysis.StopAnalyzer();
            IndexWriter wr = new IndexWriter(dir, an, true);

            //DirectoryInfo diMain = new DirectoryInfo(dia.SelectedPath);
            foreach (TweetItem tweet in value)
            {
                Document doc = new Document();
                doc.Add(new Field("id", tweet._id.ToString(), Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("created", tweet.CreationDate.ToString(), Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("user", tweet.User, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("text", tweet.Text, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
                wr.AddDocument(doc);
            }
            wr.Optimize();
            wr.Flush();
            wr.Close();
            dir.Close();
        }
Пример #5
0
		public virtual void  TestPhraseQueryWithStopAnalyzer()
		{
			RAMDirectory directory = new RAMDirectory();
			StopAnalyzer stopAnalyzer = new StopAnalyzer();
			IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true);
			Document doc = new Document();
			doc.Add(Field.Text("Field", "the stop words are here"));
			writer.AddDocument(doc);
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(directory);
			
			// valid exact phrase query
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("Field", "stop"));
			query.Add(new Term("Field", "words"));
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			
			// currently StopAnalyzer does not leave "holes", so this matches.
			query = new PhraseQuery();
			query.Add(new Term("Field", "words"));
			query.Add(new Term("Field", "here"));
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length());
			
			searcher.Close();
		}
Пример #6
0
        private void btnFolder_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog dia = new FolderBrowserDialog();
            DialogResult        res = dia.ShowDialog();

            if (res != System.Windows.Forms.DialogResult.OK)
            {
                return;
            }

            FSDirectory dir = FSDirectory.GetDirectory(Environment.CurrentDirectory + "\\LuceneIndex");

            //Lucene.Net.Store.RAMDirectory dir = new RAMDirectory();
            Lucene.Net.Analysis.StopAnalyzer an = new Lucene.Net.Analysis.StopAnalyzer();
            IndexWriter   wr     = new IndexWriter(dir, an, true);
            DirectoryInfo diMain = new DirectoryInfo(dia.SelectedPath);

            foreach (FileInfo fi in diMain.GetFiles())
            {
                Document doc = new Document();
                doc.Add(new Field("title", fi.Name, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("text", File.ReadAllText(fi.FullName), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
                wr.AddDocument(doc);
            }
            wr.Optimize();
            wr.Flush();
            wr.Close();
            dir.Close();

            IndexReader reader = IndexReader.Open(dir);

            for (int i = 0; i < reader.MaxDoc(); i++)
            {
                if (reader.IsDeleted(i))
                {
                    continue;
                }

                Document doc   = reader.Document(i);
                String   docId = doc.Get("docId");
                foreach (TermFreqVector vector in reader.GetTermFreqVectors(i))
                {
                    foreach (string term in vector.GetTerms())
                    {
                        Console.WriteLine(term);
                    }
                }
                // do something with docId here...
            }
            //IndexSearcher search = new IndexSearcher(wr.GetReader());

            //MoreLikeThis mlt = new MoreLikeThis(wr.GetReader());
            //FileInfo fitarget = new FileInfo(@"C:\Users\peacemaker\Desktop\TestNoBitcoin\test.txt");
            //Query query = mlt.Like(fitarget);

            //var hits = search.Search(query, int.MaxValue);
            //foreach (ScoreDoc doc in hits.ScoreDocs)
            //{
            //    textBox1.Text += doc.Score + Environment.NewLine;
            //}
        }