Exemplo n.º 1
0
        internal static void  Test(System.IO.TextReader reader, bool verbose, long bytes)
        {
            Analyzer    analyzer = new SimpleAnalyzer();
            TokenStream stream   = analyzer.TokenStream(null, reader);

            System.DateTime start = System.DateTime.Now;

            int count = 0;

            for (Token t = stream.Next(); t != null; t = stream.Next())
            {
                if (verbose)
                {
                    System.Console.Out.WriteLine("Text=" + t.TermText() + " start=" + t.StartOffset() + " end=" + t.EndOffset());
                }
                count++;
            }

            System.DateTime end = System.DateTime.Now;

            long time = end.Ticks - start.Ticks;

            System.Console.Out.WriteLine(time + " milliseconds to extract " + count + " tokens");
            System.Console.Out.WriteLine((time * 1000.0) / count + " microseconds/token");
            System.Console.Out.WriteLine((bytes * 1000.0 * 60.0 * 60.0) / (time * 1000000.0) + " megabytes/hour");
        }
Exemplo n.º 2
0
        private Directory MakeIndex()
        {
            Directory dir = new RAMDirectory();

            try
            {
                System.Random r        = NewRandom();
                Analyzer      analyzer = new SimpleAnalyzer();
                IndexWriter   writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

                writer.UseCompoundFile = false;

                for (int d = 1; d <= NUM_DOCS; d++)
                {
                    Document doc = new Document();
                    for (int f = 1; f <= NUM_FIELDS; f++)
                    {
                        doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED));
                    }
                    writer.AddDocument(doc);
                }
                writer.Close();
            }
            catch (System.Exception e)
            {
                throw new System.SystemException("", e);
            }
            return(dir);
        }
Exemplo n.º 3
0
        public static Lucene.Net.Analysis.Analyzer GetAnalyzerByName(string analyzerName)
        {
            Lucene.Net.Analysis.Analyzer result;
            Lucene.Net.Util.Version      AppLuceneVersion = Lucene.Net.Util.Version.LUCENE_30;
            switch (analyzerName)
            {
            case "SimpleAnalyzer":
                result = new Lucene.Net.Analysis.SimpleAnalyzer();
                break;

            case "StandardAnalyzer":
                result = new Lucene.Net.Analysis.Standard.StandardAnalyzer(AppLuceneVersion);
                break;

            case "KeywordAnalyzer":
                result = new Lucene.Net.Analysis.KeywordAnalyzer();
                break;

            case "StopAnalyzer":
                result = new Lucene.Net.Analysis.StopAnalyzer(AppLuceneVersion);
                break;

            case "WhitespaceAnalyzer":
                result = new Lucene.Net.Analysis.WhitespaceAnalyzer();
                break;

            default:
                result = new Lucene.Net.Analysis.SimpleAnalyzer();
                break;
            }
            return(result);
        }
Exemplo n.º 4
0
		private Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = NewRandom();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Document doc = new Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
Exemplo n.º 5
0
		private static Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = new System.Random((System.Int32) (BASE_SEED + 42));
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
Exemplo n.º 6
0
 public virtual QueryParsers.QueryParser GetParser(Analyzer a)
 {
     if (a == null)
     {
         a = new SimpleAnalyzer();
     }
     QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a);
     qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_OR);
     return(qp);
 }
Exemplo n.º 7
0
 public virtual Query GetQueryDOA(System.String query, Analyzer a)
 {
     if (a == null)
     {
         a = new SimpleAnalyzer();
     }
     QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a);
     qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_AND);
     return(qp.Parse(query));
 }
Exemplo n.º 8
0
		public static void  IndexDoc(System.String segment, System.String fileName)
		{
			Directory directory = FSDirectory.GetDirectory("test", false);
			Analyzer analyzer = new SimpleAnalyzer();
			DocumentWriter writer = new DocumentWriter(directory, analyzer, Similarity.GetDefault(), 1000);
			
			System.IO.FileInfo file = new System.IO.FileInfo(fileName);
			Document doc = Lucene.Net.Demo.FileDocument.Document(file);
			
			writer.AddDocument(segment, doc);
			
			directory.Close();
		}
Exemplo n.º 9
0
        private void  IndexDoc(System.String segment, System.String fileName)
        {
            Directory      directory = FSDirectory.GetDirectory(indexDir, false);
            Analyzer       analyzer  = new SimpleAnalyzer();
            DocumentWriter writer    = new DocumentWriter(directory, analyzer, Similarity.GetDefault(), 1000);

            System.IO.FileInfo file = new System.IO.FileInfo(workDir.FullName + "\\" + fileName);
            Document           doc  = FileDocument.Document(file);

            writer.AddDocument(segment, doc);

            directory.Close();
        }
Exemplo n.º 10
0
        public static void  IndexDoc(System.String segment, System.String fileName)
        {
            Directory      directory = FSDirectory.GetDirectory("test", false);
            Analyzer       analyzer  = new SimpleAnalyzer();
            DocumentWriter writer    = new DocumentWriter(directory, analyzer, Similarity.GetDefault(), 1000);

            System.IO.FileInfo file = new System.IO.FileInfo(fileName);
            Document           doc  = Lucene.Net.Demo.FileDocument.Document(file);

            writer.AddDocument(segment, doc);

            directory.Close();
        }
Exemplo n.º 11
0
		internal static void  Test(System.IO.TextReader reader, bool verbose, long bytes)
		{
			Analyzer analyzer = new SimpleAnalyzer();
			TokenStream stream = analyzer.TokenStream(null, reader);
			
			System.DateTime start = System.DateTime.Now;
			
			int count = 0;
			for (Token t = stream.Next(); t != null; t = stream.Next())
			{
				if (verbose)
				{
					System.Console.Out.WriteLine("Text=" + t.TermText() + " start=" + t.StartOffset() + " end=" + t.EndOffset());
				}
				count++;
			}
			
			System.DateTime end = System.DateTime.Now;
			
			long time = end.Ticks - start.Ticks;
			System.Console.Out.WriteLine(time + " milliseconds to extract " + count + " tokens");
			System.Console.Out.WriteLine((time * 1000.0) / count + " microseconds/token");
			System.Console.Out.WriteLine((bytes * 1000.0 * 60.0 * 60.0) / (time * 1000000.0) + " megabytes/hour");
		}
Exemplo n.º 12
0
		private void  IndexDoc(System.String segment, System.String fileName)
		{
			Directory directory = FSDirectory.GetDirectory(indexDir, false);
			Analyzer analyzer = new SimpleAnalyzer();
			DocumentWriter writer = new DocumentWriter(directory, analyzer, Similarity.GetDefault(), 1000);
			
			System.IO.FileInfo file = new System.IO.FileInfo(workDir.FullName + "\\" + fileName);
			Document doc = FileDocument.Document(file);
			
			writer.AddDocument(segment, doc);
			
			directory.Close();
		}
Exemplo n.º 13
0
		public virtual QueryParsers.QueryParser GetParser(Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
			QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a);
			qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_OR);
			return qp;
		}
Exemplo n.º 14
0
		public virtual Query GetQueryDOA(System.String query, Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
			QueryParsers.QueryParser qp = new QueryParsers.QueryParser("Field", a);
			qp.SetOperator(QueryParsers.QueryParser.DEFAULT_OPERATOR_AND);
			return qp.Parse(query);
		}
Exemplo n.º 15
0
		public virtual QueryParser GetParser(Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
            QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a);
			qp.DefaultOperator = QueryParser.OR_OPERATOR;
			return qp;
		}
Exemplo n.º 16
0
		public virtual Query GetQueryDOA(System.String query, Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
			QueryParser qp = new QueryParser("field", a);
			qp.SetDefaultOperator(QueryParser.AND_OPERATOR);
			return qp.Parse(query);
		}
Exemplo n.º 17
0
		public virtual Query GetQueryDOA(System.String query, Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
            QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a);
			qp.DefaultOperator = QueryParser.AND_OPERATOR;
			return qp.Parse(query);
		}
Exemplo n.º 18
0
		public virtual QueryParser GetParser(Analyzer a)
		{
			if (a == null)
				a = new SimpleAnalyzer();
			QueryParser qp = new QueryParser("field", a);
			qp.SetDefaultOperator(QueryParser.OR_OPERATOR);
			return qp;
		}
Exemplo n.º 19
0
        private void lblSearch_Click(object sender, EventArgs e)
        {
            //Check the saving path
            if (String.IsNullOrEmpty(txt_Saving.Text))
            {
                MessageBox.Show("Please choose a path to save results.");
                return;
            }
            if (String.IsNullOrEmpty(txt_FileName.Text))
            {
                MessageBox.Show("Please input a file name.");
                return;
            }
            // Check is existing or new file.
            String filePath = String.Format("{0}\\{1}.txt", txt_Saving.Text, txt_FileName.Text);

            if (File.Exists(filePath))
            {
                if (MessageBox.Show("The file is existing, do you want to append text into the file? (Yes: append it, No: Set a new file name.)",
                                    "Confirm", MessageBoxButtons.YesNo) == DialogResult.Yes)
                {
                    // User wanna append it, so get the Topic ID from document...
                    var lastLine = File.ReadLines(filePath).Count() != 0 ? File.ReadLines(filePath).Last() : "000";
                    if (!String.IsNullOrEmpty(lastLine))
                    {
                        var firstword = lastLine.ToString().Split(' ')[0];
                        try
                        {
                            TopicID = Convert.ToInt32(firstword) + 1;
                        }
                        catch (Exception ex) {
                            MessageBox.Show("Fail appending! The format in the file is wrong. Please set a new file name.");
                            return;
                        }
                    }
                }
                else
                {
                    return;
                }
            }

            btn_SaveBrowse.Visible = false;
            txt_Saving.ReadOnly    = true;
            txt_FileName.ReadOnly  = true;

            // Calculate the Process Time
            Lucene.Net.Analysis.Analyzer analyzer;
            if (ckb_Analzer.Checked)
            {
                analyzer = new Lucene.Net.Analysis.SimpleAnalyzer();
            }
            else
            {
                analyzer = generator.analyzer;
            }

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();
            Searcher searcher = new Searcher(txt_IndexPath.Text, analyzer, generator.writer);

            searcher.CreateSearcher();
            searcher.CreateParser(ddl_Type.SelectedItem.ToString(), ddl_Fields.SelectedItem.ToString());

            // Searching and generate result
            Dictionary <string, float> resultList = searcher.DisplayResults(searcher.SearchIndex(txt_InformationNeeds.Text), generator.collectionList);

            lbl_QueryText.Text   = searcher.query.ToString();
            ResultCollectionList =
                (from c in generator.collectionList
                 join r in resultList on c.DocID equals r.Key
                 select new Collection
            {
                DocID = c.DocID,
                Title = c.Title,
                Author = c.Author,
                Bibliographic = c.Bibliographic,
                Words = c.Words,
                Score = r.Value
            }).OrderByDescending(n => n.Score).ToList();
            stopWatch.Stop();
            long ts = stopWatch.ElapsedMilliseconds;

            lbl_SearchingTime.Text = ts.ToString() + " ms";

            // Set columns to listview
            GenerateListView();
            // Set the total number of documents
            total             = resultList.Count();
            lblTotalDocs.Text = total + " docs";

            // Saving
            using (System.IO.StreamWriter file =
                       new System.IO.StreamWriter(filePath, true))
            {
                int rank = 1;
                foreach (Collection c in ResultCollectionList)
                {
                    string TopicIDString = string.Format("{0:000}", TopicID);
                    file.NewLine = "\n";
                    file.WriteLine(String.Format("{0} {1} {2} {3} {4} {5}", TopicIDString, "Q0", c.DocID, rank.ToString(), c.Score, "9913661_9913351_10032711_RetrievalHero"));
                    rank = rank + 1;
                }
                TopicID++;
            }
        }