Ejemplo n.º 1
0
        private int Insert0(string myText, WordExpander expandWithWordNet)
        {
            checkDbLock();
            if (AssertReducndancyChecks)
            {
                lock (allContentIdPairs)
                {
                    ulong cid;
                    if (allContentIdPairs.TryGetValue(myText.ToLower(), out cid))
                    {
                        return(0);
                    }
                }
            }

            ulong myDocID = IncDocId();
            Dictionary <ulong, string> contentIdPairs = new Dictionary <ulong, string>();

            contentIdPairs.Add(myDocID, myText);

            // Indexing:
            int numIndexed = Index(contentIdPairs, expandWithWordNet);

            // the null check is when not tracking we dont fill it up
            if (AssertReducndancyChecks)
            {
                lock (allContentIdPairs) allContentIdPairs.Add(myText.ToLower(), myDocID);
            }

            return(numIndexed);
        }
Ejemplo n.º 2
0
        public string callDbQuery0(string searchTerm1, OutputDelegate dbgLog, Func <Unifiable> OnFalure, XmlNode templateNode, float threshold, bool expandWithWordNet, bool expandOnNoHits, out float reliablity)
        {
            checkDbLock();
            bool tf;

            if (StaticXMLUtils.TryParseBool(templateNode, "wordnet,synonyms", out tf))
            {
                expandWithWordNet = tf;
            }
            WordExpander wordNetExpander = expandWithWordNet ? (WordExpander)WordNetExpand : NoWordNetExpander;
            string       userFilter      = "";
            // Do we only want responses with the current user name in it ?
            // As in "what is my favorite color?"
            string onlyUserStr = StaticXMLUtils.GetAttribValue(templateNode, "onlyUser", "false").ToLower();

            if (onlyUserStr.Equals("true"))
            {
                userFilter = TripleStoreProxy.Entify(TheBot.BotUserID);
            }
            string res = callDbQueryStatic(SearchSources, searchTerm1, dbgLog, templateNode, threshold, out reliablity,
                                           userFilter, wordNetExpander);

            if (OnFalure != null && string.IsNullOrEmpty(res))
            {
                return(OnFalure());
            }
            return(res);
        }
Ejemplo n.º 3
0
 public ICollection <ISearchResult> Search(string searchTerm, WordExpander expandWithWordNet)
 {
     lock (dbLock)
     {
         try
         {
             bool     expandOnNoHits = expandWithWordNet != null;
             Object[] ids;
             string[] results;
             float[]  scores;
             int      found = Search(searchTerm, out ids, out results, out scores, expandWithWordNet, expandOnNoHits);
             var      res   = new List <ISearchResult>();
             for (int i = 0; i < ids.Length; i++)
             {
                 res.Add(new OneSearchResult(ids[i], results[i], scores[i]));
             }
             return(res);
         }
         catch (Exception e)
         {
             writeToLog("ERROR Search {0}", e);
             return(new ISearchResult[0]);
         }
     }
 }
Ejemplo n.º 4
0
        public int callDbPush(string myText, XmlNode expandWordnet)
        {
            // the defualt is true

            if (MayPush(myText, expandWordnet) == null)
            {
                return(-1);
            }
            WordExpander expandWithWordNet = WordNetExpand;

            bool tf;

            if (StaticXMLUtils.TryParseBool(expandWordnet, "wordnet,synonyms", out tf))
            {
                expandWithWordNet = tf ? (WordExpander)WordNetExpand : NoWordNetExpander;
            }

            ulong myDocID = IncDocId();
            Dictionary <ulong, string> contentIdPairs = new Dictionary <ulong, string>();

            contentIdPairs.Add(myDocID, myText);

            // Indexing:
            int numIndexed = Index(contentIdPairs, expandWithWordNet);

            writeToLog("Indexed {0} docs.", numIndexed);
            return(numIndexed);
        }
Ejemplo n.º 5
0
 public TripleStoreFromEnglish(IEnglishFactiodEngine englishFactiodStore, AltBot theBot, WordExpander expander)
 {
     EntityFilter        = this;
     TheBot              = theBot;
     EnglishFactiodStore = englishFactiodStore;
     WordNetExpand       = expander;
     AddDefaultExclusions();
 }
Ejemplo n.º 6
0
        public virtual ICollection <ISearchResult> Search(string searchTerm1, WordExpander wordNetExpanderOnNoHits)
        {
            string ret = GetTextResult(searchTerm1);

            if (IsNullOrEmpty(ret))
            {
                return(NO_RESULTS);
            }
            float reliablity = DefaultReliablity(searchTerm1, ret);

            if (reliablity <= 0.0)
            {
                return(NO_RESULTS);
            }
            return(new[] { new OneSearchResult(GetType().Name + ": " + searchTerm1, ret, reliablity) });
        }
Ejemplo n.º 7
0
        internal int Update0(string searchQuery, string myText, WordExpander expandWithWordNet)
        {
            checkDbLock();
            // Searching:
            ulong[]  ids;
            string[] results;
            float[]  scores;

            int numHits;

            // find it
            writeToLog("LUCENE:Replacing best \"{0}\"...", searchQuery);
            //Search(query, out ids, out results, out scores);
            IndexSearcher indexSearcher = new IndexSearcher(_directory);

            try
            {
                QueryParser queryParser = new QueryParser(_fieldName, _analyzer);
                Query       query       = queryParser.Parse(searchQuery);
                Hits        hits        = indexSearcher.Search(query);
                numHits = hits.Length();

                // if we want to do something smarter later
                //ids = new ulong[numHits];
                //results = new string[numHits];
                //scores = new float[numHits];
                //for (int i = 0; i < numHits; ++i)
                //{
                //    float score = hits.Score(i);
                //    string text = hits.Doc(i).Get(_fieldName);
                //    string idAsText = hits.Doc(i).Get(MyLuceneIndexer.DOC_ID_FIELD_NAME);
                //    ids[i] = UInt64.Parse(idAsText);
                //    results[i] = text;
                //    scores[i] = score;
                //}
                if (numHits > 0)
                {
                    indexSearcher.GetIndexReader().DeleteDocument(0);
                }
            }
            finally
            {
                indexSearcher.Close();
            }

            return(Insert0(myText, expandWithWordNet));
        }
Ejemplo n.º 8
0
        internal long LoadFileByLines0(string filename, WordExpander expandWithWordNet)
        {
            checkDbLock();
            long   totals = 0;
            string line;
            long   linecount = 0;

            if (HostSystem.FileExists(filename))
            {
                string absoluteFileName                   = HostSystem.GetAbsolutePath(filename);
                System.IO.TextReader       tr             = new StreamReader(absoluteFileName);
                Dictionary <ulong, string> contentIdPairs = new Dictionary <ulong, string>();

                while ((linecount < 80000) && ((line = tr.ReadLine()) != null))
                {
                    linecount++;
                    if (linecount % 1000 == 0)
                    {
                        // batch a 1000
                        writeToLog("Lucene learn {0}", linecount);
                        int numIndexedb = Index(contentIdPairs, expandWithWordNet);
                        writeToLog("Indexed {0} lines.", numIndexedb);
                        totals += linecount;

                        contentIdPairs = new Dictionary <ulong, string>();
                    }
                    line = line.Trim();
                    if (line.Length != 0 && line[0] != '#')
                    {
                        contentIdPairs.Add(IncDocId(), line);
                    }
                }
                tr.Close();
                // Indexing:
                int numIndexed = Index(contentIdPairs, expandWithWordNet);
                writeToLog("Indexed {0} lines.", numIndexed);


                writeToLog("Last Line Mlearn {0}", linecount);
            }
            else
            {
                writeToLog(" LoadFileByLines cannot find file '{0}'", filename);
            }
            return(totals);
        }
Ejemplo n.º 9
0
        internal int Index0(Dictionary <ulong, string> txtIdPairToBeIndexed, WordExpander expandWithWordNet)
        {
            checkDbLock();
            bool        indexExists = IndexReader.IndexExists(_directory);
            bool        createIndex = !indexExists;
            IndexWriter indexWriter = new IndexWriter(_directory, _analyzer, createIndex);

            indexWriter.SetUseCompoundFile(false);

            Dictionary <ulong, string> .KeyCollection keys = txtIdPairToBeIndexed.Keys;

            foreach (ulong id in keys)
            {
                string text = txtIdPairToBeIndexed[id];

                Document document = new Document();
                // Raw text
                Field bodyField = new Field(_fieldName, text, Field.Store.YES, Field.Index.TOKENIZED);
                document.Add(bodyField);
                // Undisambiguated Hyponyms of nouns in text
                expandWithWordNet = expandWithWordNet ?? NoWordNetExpander;
                string wn_hypo     = expandWithWordNet(text, true);
                Field  wnHypoField = new Field(HYPO_FIELD_NAME, wn_hypo, Field.Store.YES, Field.Index.TOKENIZED);
                document.Add(wnHypoField);
                // The doc ID
                Field idField = new Field(DOC_ID_FIELD_NAME, (id).ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
                document.Add(idField);

                indexWriter.AddDocument(document);
            }

            int numIndexed = indexWriter.DocCount();

            indexWriter.Optimize();
            indexWriter.Close();

            return(numIndexed);
        }
Ejemplo n.º 10
0
        public string callDbQueryStatic(ICollection <IDocSearch> searchables, string searchTerm1, OutputDelegate dbgLog,
                                        XmlNode templateNode, float threshold,
                                        out float reliablity, string userFilter, WordExpander wordNetExpander)
        {
            reliablity = 0.0f;
            try
            {
                // if dbgLog == null then use /dev/null logger
                dbgLog = dbgLog ?? TextFilter.DEVNULL;
                // Searching:

                int numHits;

                string maxReplyStr = StaticXMLUtils.GetAttribValue(templateNode, "max", "1").ToLower();
                int    maxReply    = Int16.Parse(maxReplyStr);

                string thresholdStr = StaticXMLUtils.GetAttribValue(templateNode, "threshold", null);
                if (!string.IsNullOrEmpty(thresholdStr))
                {
                    float parsed;
                    if (float.TryParse(thresholdStr, out parsed))
                    {
                        threshold = parsed;
                    }
                }

                // Do we only want responses with the current user name in it ?
                // As in "what is my favorite color?"
                var results = new List <ISearchResult>();
                dbgLog("Searching for the term \"{0}\"...", searchTerm1);
                foreach (var ss in searchables)
                {
                    results.AddRange(Search(searchTerm1, wordNetExpander));
                }

                numHits = results.Count;
                dbgLog("Number of hits == {0}.", numHits);

                float topScore = 0;
                int   goodHits = 0;
                for (int i = 0; i < numHits; ++i)
                {
                    dbgLog("{0}) Doc-id: {1}; Content: \"{2}\" with score {3}.", i + 1, results[i].ID, results[i].Text, results[i].Score);
                    if (results[i].Text.Contains(userFilter))
                    {
                        if (results[i].Score >= threshold)
                        {
                            goodHits++;
                        }
                        if (results[i].Score > topScore)
                        {
                            topScore = results[i].Score;
                        }
                    }
                }

                //if (numHits > 0) topScore = scores[0];
                // Console.WriteLine();



                if ((goodHits > 0) && (topScore >= threshold))
                {
                    // should be weighted but lets just use the highest scoring
                    string reply       = "";
                    int    numReturned = 0;
                    if (goodHits < maxReply)
                    {
                        maxReply = goodHits;
                    }
                    for (int i = 0; ((i < numHits) && (numReturned < maxReply)); i++)
                    {
                        if (results[i].Text.Contains(userFilter) && (results[i].Score >= topScore))
                        {
                            reply = reply + " " + results[i].Text;
                            numReturned++;
                            reliablity = topScore;
                        }
                    }
                    Unifiable converseMemo = reply.Trim();
                    dbgLog(" reply = {0}", reply);
                    return(converseMemo);
                }
                else
                {
                    return(null);
                }
            }
            catch
            {
                return(Unifiable.Empty);
            }
        }
Ejemplo n.º 11
0
        /// <summary>
        /// This method searches for the search term passed by the caller.
        /// </summary>
        /// <param name="searchTerm">The search term as a string that the caller wants to search for within the
        /// index as referenced by this object.</param>
        /// <param name="ids">An out parameter that is populated by this method for the caller with docments ids.</param>
        /// <param name="results">An out parameter that is populated by this method for the caller with docments text.</param>
        /// <param name="scores">An out parameter that is populated by this method for the caller with docments scores.</param>
        internal int Search(string searchTerm, out Object[] ids, out string[] results, out float[] scores, WordExpander expandWithWordNet, bool expandOnNoHits)
        {
            checkDbLock();
            if (!IsDbPresent)
            {
                ids     = new Document[0];
                results = new string[0];
                scores  = new float[0];
                return(0);
            }
            IndexSearcher indexSearcher = new IndexSearcher(_directory);

            try
            {
                QueryParser queryParser = new QueryParser(_fieldName, _analyzer);
                Query       query       = queryParser.Parse(searchTerm);
                Hits        hits        = indexSearcher.Search(query);
                int         numHits     = hits.Length();

                ids     = new Document[numHits];
                results = new string[numHits];
                scores  = new float[numHits];

                for (int i = 0; i < numHits; ++i)
                {
                    float  score = hits.Score(i);
                    var    hdoc  = hits.Doc(i);
                    string text  = hdoc.Get(_fieldName);
                    //string idAsText = hdoc.Get(MyLuceneIndexer.DOC_ID_FIELD_NAME);
                    ids[i]     = hdoc;
                    results[i] = text;
                    scores[i]  = score;
                }

                if (numHits == 0 && expandOnNoHits)
                {
                    // Try expansion
                    //QueryParser queryParser = new QueryParser(_fieldName, _analyzer);
                    MultiFieldQueryParser queryParserWN = new MultiFieldQueryParser(
                        new string[] { _fieldName, MyLuceneIndexer.HYPO_FIELD_NAME },
                        _analyzer);
                    string hypo_expand = expandWithWordNet(searchTerm, false);
                    Query  queryWN     = queryParserWN.Parse(hypo_expand);
                    Hits   hitsWN      = indexSearcher.Search(queryWN);
                    int    numHitsWN   = hitsWN.Length();

                    ids     = new Document[numHitsWN];
                    results = new string[numHitsWN];
                    scores  = new float[numHitsWN];
                    for (int i = 0; i < numHitsWN; ++i)
                    {
                        float  score = hitsWN.Score(i);
                        string text  = hitsWN.Doc(i).Get(_fieldName);
                        //string idAsText = hitsWN.Doc(i).Get(MyLuceneIndexer.DOC_ID_FIELD_NAME);
                        ids[i]     = hitsWN.Doc(i);// UInt64.Parse(idAsText);
                        results[i] = text;
                        scores[i]  = score;
                    }
                }
            }
            finally
            {
                indexSearcher.Close();
            }
            return(ids.Length);
        }/*
Ejemplo n.º 12
0
 public int InsertFactiod(string myText, XmlNode templateNode, WordExpander expandWithWordNet)
 {
     return(EnsureLockedDatabase(() => Insert0(myText, expandWithWordNet)));
 }
Ejemplo n.º 13
0
 /// <summary>
 /// This method indexes the content that is sent across to it. Each piece of content (or "document")
 /// that is indexed has to have a unique identifier (so that the caller can take action based on the
 /// document id). Therefore, this method accepts key-value pairs in the form of a dictionary. The key
 /// is a ulong which uniquely identifies the string to be indexed. The string itself is the value
 /// within the dictionary for that key. Be aware that stop words (like the, this, at, etc.) are _not_
 /// indexed.
 /// </summary>
 /// <param name="txtIdPairToBeIndexed">A dictionary of key-value pairs that are sent by the caller
 /// to uniquely identify each string that is to be indexed.</param>
 /// <returns>The number of documents indexed.</returns>
 public int Index(Dictionary <ulong, string> txtIdPairToBeIndexed, WordExpander expandWithWordNet)
 {
     return(EnsureLockedDatabase(() => Index0(txtIdPairToBeIndexed, expandWithWordNet)));
 }
Ejemplo n.º 14
0
 public int InsertFactiod(string myText, XmlNode templateNode, WordExpander WordNetExpand)
 {
     return(assertTo.InsertFactiod(myText, templateNode, WordNetExpand));
 }