예제 #1
0
        static void Main(string[] args)
        {
            Console.WriteLine("LUCENE CREATE INDEX - Start");

            // Create Lucene Index Location
            var indexLocation    = Path.Combine(Environment.CurrentDirectory, "LuceneIndex");
            var indexZipLocation = Path.Combine(Environment.CurrentDirectory, "LuceneIndexZip");

            if (!System.IO.Directory.Exists(indexLocation))
            {
                System.IO.Directory.CreateDirectory(indexLocation);
            }

            if (!System.IO.Directory.Exists(indexZipLocation))
            {
                System.IO.Directory.CreateDirectory(indexZipLocation);
            }

            var dataStream = GetBaseballData();

            var lines = ReadLines(() => dataStream, Encoding.UTF8);

            // Skip the first header line
            var batters = lines
                          .Skip(1)
                          .Select(v => MLBBaseballBatter.FromCsv(v))
                          .ToList();


            // LUCENE - CREATE THE INDEX
            var AppLuceneVersion = LuceneVersion.LUCENE_48;

            var dir = FSDirectory.Open(indexLocation);

            //create an analyzer to process the text
            var analyzer = new StandardAnalyzer(AppLuceneVersion);

            //create an index writer
            var indexConfig = new IndexWriterConfig(AppLuceneVersion, analyzer);

            indexConfig.OpenMode = OpenMode.CREATE;

            var writer = new IndexWriter(dir, indexConfig);

            // Get max Years Played for each batter
            var battersMaxYearsPlayed = from b in batters
                                        group b by b.ID into g
                                        select new MLBBaseballBatter {
                ID = g.Key, YearsPlayed = g.Max(b => b.YearsPlayed)
            };

            Console.WriteLine("LUCENE CREATE INDEX - Iterating Data for Index");

            foreach (var batter in batters)
            {
                var isBatterMaxYearsRecord = (from batterMax in battersMaxYearsPlayed
                                              where ((batterMax.ID == batter.ID) && (batterMax.YearsPlayed == batter.YearsPlayed))
                                              select new { ID = batterMax.ID }).Count();

                Document doc = new Document
                {
                    // Field names map to the MLBBaseballPlayer.cs class
                    // that is used in ML.NET models, demos etc.

                    // StringField indexes but doesn't tokenize
                    new StringField("Id",
                                    batter.ID,
                                    Field.Store.YES),
                    new Int32Field("IsBatterMaxYearsRecord",
                                   isBatterMaxYearsRecord, Field.Store.YES),
                    new TextField("FullPlayerName",
                                  batter.FullPlayerName,
                                  Field.Store.YES),
                    new StringField("InductedToHallOfFame",
                                    batter.InductedToHallOfFame.ToString(),
                                    Field.Store.YES),
                    new StringField("OnHallOfFameBallot",
                                    batter.OnHallOfFameBallot.ToString(),
                                    Field.Store.YES),
                    new SingleField("YearsPlayed",
                                    batter.YearsPlayed, Field.Store.YES),
                    // Use StoredField to minimize index storage, since these fields won't be searched on
                    new StoredField("AB",
                                    batter.AB),
                    new StoredField("R",
                                    batter.R),
                    new StoredField("H",
                                    batter.H),
                    new StoredField("Doubles",
                                    batter.Doubles),
                    new StoredField("Triples",
                                    batter.Triples),
                    new StoredField("HR",
                                    batter.HR),
                    new StoredField("RBI",
                                    batter.RBI),
                    new StoredField("SB",
                                    batter.SB),
                    new StoredField("BattingAverage",
                                    batter.BattingAverage),
                    new StoredField("SluggingPct",
                                    batter.SluggingPct),
                    new StoredField("AllStarAppearances",
                                    batter.AllStarAppearances),
                    new StoredField("MVPs",
                                    batter.MVPs),
                    new StoredField("TripleCrowns",
                                    batter.TripleCrowns),
                    new StoredField("GoldGloves",
                                    batter.GoldGloves),
                    new StoredField("MajorLeaguePlayerOfTheYearAwards",
                                    batter.MajorLeaguePlayerOfTheYearAwards),
                    new StoredField("TB",
                                    batter.TB),
                    new StoredField("TotalPlayerAwards",
                                    batter.TotalPlayerAwards),
                    new StoredField("LastYearPlayed",
                                    batter.LastYearPlayed)
                };

                // Console.WriteLine("Added: " + batter.ToString());
                writer.AddDocument(doc);
            }

            writer.Flush(triggerMerge: true, applyAllDeletes: false);
            writer.Commit();

            var numberDocs = writer.NumDocs;

            Console.WriteLine("LUCENE CREATE INDEX - Number of Docs Written to Index: " + numberDocs);

            // Close the index writer
            writer.Dispose();
            Console.WriteLine("LUCENE CREATE INDEX - Index Created");

            // LUCENE - PACKAGE THE INDEX AS ZIP FILE
            var packagePath = Path.Combine(indexZipLocation, "LuceneIndex.zip");

            // Delete the Zip file before proceeding
            if (File.Exists(packagePath))
            {
                File.Delete(packagePath);
            }

            ZipFile.CreateFromDirectory(indexLocation, packagePath, CompressionLevel.Optimal, false);
            Console.WriteLine("LUCENE CREATE INDEX - Index Packaged (Zip)");

            // LUCENE - TEST THE INDEX
            // Load the index from Zip file (mimic it loading)
            Console.WriteLine("LUCENE CREATE INDEX - Text the Index from Packaged (Zip)");

            ZipFile.ExtractToDirectory(packagePath, Environment.CurrentDirectory, true);
            var zipDirectory = FSDirectory.Open(Environment.CurrentDirectory);

            var indexReader = DirectoryReader.Open(zipDirectory);
            var searcher    = new IndexSearcher(indexReader);

            // Simple Query
            QueryParser parser        = new QueryParser(AppLuceneVersion, "FullPlayerName", analyzer);
            var         query         = parser.Parse("Todd");
            var         searchResults = searcher.Search(query, 500);// 20 /* top 20 */);
            var         hits          = searchResults.ScoreDocs;

            Console.WriteLine("LUCENE CREATE INDEX - Search for 'Todd': " + hits.Length);

            //foreach (var hit in hits)
            //{
            //    var foundDoc = searcher.Doc(hit.Doc);
            //    var name = foundDoc.GetField("FullPlayerName").GetStringValue();
            //    var yearsPlayed = foundDoc.GetField("YearsPlayed").GetSingleValue();
            //    var explanation = searcher.Explain(query, hit.Doc);

            //    Console.WriteLine("Found: " + name + " - " + hit.Score);
            //    Console.WriteLine("Explanation: " + explanation.ToString());

            //    var score = hit.Score;
            //}


            // Simple Query- With Filter
            var queryRanged = NumericRangeQuery.NewInt32Range("IsBatterMaxYearsRecord", 1, 1, true, true);

            BooleanQuery andQuery = new BooleanQuery();

            andQuery.Add(query, Occur.MUST);
            andQuery.Add(queryRanged, Occur.MUST);

            var searchResultsWithFilter = searcher.Search(andQuery, 500); /* top 500 */;
            var hitsWithFilter          = searchResultsWithFilter.ScoreDocs;

            Console.WriteLine("LUCENE CREATE INDEX - Search for 'Todd' with Max Years Filter: " + hitsWithFilter.Length);

            //foreach (var hit in hitsWithFilter)
            //{
            //    var foundDoc = searcher.Doc(hit.Doc);
            //    var name = foundDoc.GetField("FullPlayerName").GetStringValue();
            //    var isBatterMaxYearsRecord = foundDoc.GetField("IsBatterMaxYearsRecord").GetInt32Value();
            //    var explanation = searcher.Explain(query, hit.Doc);

            //    Console.WriteLine("Found: " + name + " - " + hit.Score);
            //    Console.WriteLine("Explanation: " + explanation.ToString());

            //    var score = hit.Score;
            //}

            // Query For Id
            var termHankAaron = new Term("Id", "aaronha01");
            var termQuery     = new TermQuery(termHankAaron);

            var searchResultsTermQuery = searcher.Search(termQuery, 50); /* top 50 */;
            var hitsTermQuery          = searchResultsTermQuery.ScoreDocs;

            Console.WriteLine("LUCENE CREATE INDEX - Search for 'Id = aaronha01': " + hitsTermQuery.Length);
        }
예제 #2
0
        public Task <List <MLBBaseballBatter> > GetSampleBaseballData()
        {
            // Return sample baseball players (batters)
            // Mix of fictitious, active & retired players of all skills

            // Note: In a production system this service would load the list of batters
            // from distributed persisted storage, searched in information retrieval engine (i.e. Azure Search, Lucene),
            // a relational database etc.

            // Load MLB baseball batters from local CSV file
            string filePathMLBBaseballBatters = "Data/MLBBaseballBatters.csv";

            var batters = File.ReadAllLines(filePathMLBBaseballBatters)
                          .Skip(1)
                          .Select(v => MLBBaseballBatter.FromCsv(v))
                          .ToList();

            // Create Fictitious Players
            //MLBBaseballBatter badMLBBatter = new MLBBaseballBatter
            //{
            //    FullPlayerName = "Barry Badd (Fictitious Player)",
            //    ID = 100f,
            //    InductedToHallOfFame = false,
            //    LastYearPlayed = 0f,
            //    OnHallOfFameBallot = false,
            //    YearsPlayed = 2f,
            //    AB = 100f,
            //    R = 10f,
            //    H = 30f,
            //    Doubles = 1f,
            //    Triples = 1f,
            //    HR = 1f,
            //    RBI = 10f,
            //    SB = 10f,
            //    BattingAverage = 0.3f,
            //    SluggingPct = 0.15f,
            //    AllStarAppearances = 0f,
            //    MVPs = 0f,
            //    TripleCrowns = 0f,
            //    GoldGloves = 0f,
            //    MajorLeaguePlayerOfTheYearAwards = 0f,
            //    TB = 200f
            //};
            //MLBBaseballBatter averageMLBBatter = new MLBBaseballBatter
            //{
            //    FullPlayerName = "Andy Average (Fictitious Player)",
            //    ID = 200f,
            //    InductedToHallOfFame = false,
            //    LastYearPlayed = 0f,
            //    OnHallOfFameBallot = false,
            //    YearsPlayed = 17f,
            //    AB = 8393f,
            //    R = 1162f,
            //    H = 2300f,
            //    Doubles = 410f,
            //    Triples = 8f,
            //    HR = 400f,
            //    RBI = 1312f,
            //    SB = 9f,
            //    BattingAverage = 0.278f,
            //    SluggingPct = 0.476f,
            //    AllStarAppearances = 5f,
            //    MVPs = 0f,
            //    TripleCrowns = 0f,
            //    GoldGloves = 0f,
            //    MajorLeaguePlayerOfTheYearAwards = 0f,
            //    TB = 3910f
            //};
            //MLBBaseballBatter greatMLBBatter = new MLBBaseballBatter
            //{
            //    FullPlayerName = "Gary The Great (Fictitious Player)",
            //    ID = 300f,
            //    InductedToHallOfFame = false,
            //    LastYearPlayed = 0f,
            //    OnHallOfFameBallot = false,
            //    YearsPlayed = 20f,
            //    AB = 10000f,
            //    R = 1900f,
            //    H = 3500f,
            //    Doubles = 500f,
            //    Triples = 150f,
            //    HR = 600f,
            //    RBI = 1800f,
            //    SB = 400f,
            //    BattingAverage = 0.350f,
            //    SluggingPct = 0.65f,
            //    AllStarAppearances = 14f,
            //    MVPs = 2f,
            //    TripleCrowns = 1f,
            //    GoldGloves = 4f,
            //    MajorLeaguePlayerOfTheYearAwards = 2f,
            //    TB = 7000f
            //};

            //// Add Fictitious Players
            //batters.Add(badMLBBatter);
            //batters.Add(averageMLBBatter);
            //batters.Add(greatMLBBatter);

            return(Task.FromResult(
                       batters.OrderByDescending(a => a.YearsPlayed).ToList()
                       ));;
        }