コード例 #1
0
ファイル: BibIndexer.cs プロジェクト: radtek/epresenter
        public void IndexAllVersions()
        {
            /////////////////////////////////////////////////////////////////
            // Go through the bible and build a forward index
            Dictionary <string, List <int> > forwardIndex = new Dictionary <string, List <int> >();
            string badchars = "',.?!:;`-\"()[]";

            using (PerformanceTimer pt = new PerformanceTimer("Analyze"))
                using (FBirdTask t = new FBirdTask())
                {
                    t.CommandText = "SELECT \"BibleLookUp\".\"OrderNum\", BIBLEVERSES.REFCHAPTER, BIBLEVERSES.REFVERSE, BIBLEVERSES.DATA, Version " +
                                    "FROM BIBLEVERSES " +
                                    "INNER JOIN \"BibleLookUp\" ON (BIBLEVERSES.REFBOOK=\"BibleLookUp\".\"RefBook\") " +
                                    "WHERE (Version = 'KJV')";
                    using (PerformanceTimer pt2 = new PerformanceTimer("Analyze - Initial exec"))
                        t.ExecuteReader();

                    using (PerformanceTimer pt2 = new PerformanceTimer("Analyze - Data read"))
                        while (t.DR.Read())
                        {
                            string s = t.DR.GetString(3).ToLower();          // get raw
                            foreach (char c in badchars)                     // strip bad chars
                            {
                                s = s.Replace(c.ToString(), " ");
                            }
                            string[] words = s.Split(" ".ToCharArray());

                            // Count
                            List <string> localWords = new List <string>();
                            foreach (string word in words)
                            {
                                if (word.Length > 0)
                                {
                                    // Check local
                                    if (!localWords.Contains(word))
                                    {
                                        localWords.Add(word);

                                        // Ensure list
                                        if (!forwardIndex.ContainsKey(word))
                                        {
                                            forwardIndex[word] = new List <int>();
                                        }

                                        int z = ((byte)t.DR.GetInt16(2))
                                                + ((byte)t.DR.GetInt16(1) << 8)
                                                + ((byte)t.DR.GetInt16(0) << 16);

                                        // Adjust for version
                                        if (t.DR.GetString(4) == "RST")
                                        {
                                            z += ((byte)1 << 24);
                                        }
                                        else
                                        {
                                            z += ((byte)2 << 24);
                                        }

                                        forwardIndex[word].Add(z);
                                    }
                                }
                            }
                        }
                }

            /////////////////////////////////////////////////////////////////
            // Sort words
            List <string> sortedWords = new List <string>();

            foreach (string s in forwardIndex.Keys)
            {
                sortedWords.Add(s);
            }
            sortedWords.Sort();

            /////////////////////////////////////////////////////////////////
            // Write indexs to file
            Dictionary <string, int> wordIndex = new Dictionary <string, int>();
            string     pthIndex = Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "bibndx.bin");
            FileStream fs       = new FileStream(pthIndex, FileMode.Create, FileAccess.Write);

            foreach (string key in sortedWords)
            {
                // Store the current word offset into word map
                wordIndex.Add(key, (int)fs.Position);

                // Write the count of results
                byte[] bInt = BitConverter.GetBytes((int)forwardIndex[key].Count);
                fs.Write(bInt, 0, 4);

                // Write out all the result pointers
                foreach (int i in forwardIndex[key])
                {
                    bInt = BitConverter.GetBytes(i);
                    fs.Write(bInt, 0, 4);
                }
            }
            fs.Flush();
            fs.Close();

            /////////////////////////////////////////////////////////////////
            // Write word list to file
            string     pthWords = Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "bibwds.bin");
            FileStream fs2      = new FileStream(pthWords, FileMode.Create, FileAccess.Write, FileShare.None);

            foreach (string s in wordIndex.Keys)
            {
                byte[] bCurWord = UnicodeEncoding.Unicode.GetBytes(s);
                fs2.WriteByte((byte)bCurWord.Length);
                fs2.Write(bCurWord, 0, bCurWord.Length);
                byte[] bInt = BitConverter.GetBytes((int)wordIndex[s]);
                fs2.Write(bInt, 0, 4);
            }
            fs2.Flush();
            fs2.Close();
        }
コード例 #2
0
ファイル: BibIndexer.cs プロジェクト: radtek/epresenter
        public List <int> QueryWordFrags(string[] wordFrags)
        {
            /// For each word, expand word list
            /// For each word, build a primary list by using all the words in expanded list

            List <int> primarys = new List <int>();
            Dictionary <string, List <int> > fragPrimaries = new Dictionary <string, List <int> >();

            // Ensure init
            if (wordOffsetTable == null)
            {
                InitWordOffsetTable();
            }

            // Match all the partial words
            Dictionary <string, List <string> > expandedWords = new Dictionary <string, List <string> >();       // original, expanded

            foreach (string wordFrag in wordFrags)
            {
                foreach (string s in wordOffsetTable.Keys)
                {
                    if (!expandedWords.ContainsKey(wordFrag))
                    {
                        expandedWords.Add(wordFrag, new List <string>());
                    }
                    if (s.Contains(wordFrag) && !expandedWords[wordFrag].Contains(s))
                    {
                        expandedWords[wordFrag].Add(s);
                    }
                }
            }

            // Load file into memory
            if (msIndex == null)
            {
                string     pthIndex = Path.Combine(Path.GetDirectoryName(Application.ExecutablePath), "bibndx.bin");
                FileStream fs       = new FileStream(pthIndex, FileMode.Open, FileAccess.Read);
                byte[]     bytes    = new byte[fs.Length];
                fs.Read(bytes, 0, (int)fs.Length);
                fs.Close();
                msIndex = new MemoryStream(bytes);
            }

            // Get a list of keys
            byte[] keyCount = new byte[4];
            using (PerformanceTimer pt = new PerformanceTimer("Main"))
                foreach (string word in expandedWords.Keys)
                {
                    if (!fragPrimaries.ContainsKey(word))
                    {
                        fragPrimaries.Add(word, new List <int>());
                    }

                    foreach (string exWord in expandedWords[word])
                    {
                        msIndex.Position = wordOffsetTable[exWord];                 // Seek to position

                        // Get count of results
                        msIndex.Read(keyCount, 0, 4);
                        int count = BitConverter.ToInt32(keyCount, 0);

                        // Loop through all the kyes
                        byte[] bytes = new byte[4];
                        for (int i = 0; i < count; i++)
                        {
                            msIndex.Read(bytes, 0, 4);
                            int primary = BitConverter.ToInt32(bytes, 0);
                            if (!fragPrimaries[word].Contains(primary))
                            {
                                fragPrimaries[word].Add(primary);
                            }
                        }
                    }
                }


            // Filter to only overlapping
            bool first = true;

            using (PerformanceTimer pt = new PerformanceTimer("Filter"))
                foreach (List <int> li in fragPrimaries.Values)
                {
                    List <int> j = new List <int>();
                    foreach (int i in li)
                    {
                        if (first || primarys.Contains(i))
                        {
                            j.Add(i);
                        }
                    }
                    primarys = j;
                    first    = false;
                }

            return(primarys);
        }