TextCache, Beagle.Daemon C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : StaticQueryable.cs Projet : ArsenShnurkov/beagle-1

		public StaticQueryable (string index_name, string index_path, bool read_only_mode) : base (index_path, read_only_mode)
		{
			Logger.Log.Debug ("Initializing static queryable: {0}", index_path);

			if (Directory.Exists (Path.Combine (index_path, "TextCache"))) {
				try {
					text_cache = new TextCache (index_path, true);
				} catch (UnauthorizedAccessException) {
					Logger.Log.Warn ("Unable to purge static queryable text cache in {0}.  Will run without it.", index_path);
				}
			}
		}

Exemple #2

0

Afficher le fichier

Fichier : StaticQueryable.cs Projet : ArsenShnurkov/beagle-1

        public StaticQueryable(string index_name, string index_path, bool read_only_mode) : base(index_path, read_only_mode)
        {
            Logger.Log.Debug("Initializing static queryable: {0}", index_path);

            if (Directory.Exists(Path.Combine(index_path, "TextCache")))
            {
                try {
                    text_cache = new TextCache(index_path, true);
                } catch (UnauthorizedAccessException) {
                    Logger.Log.Warn("Unable to purge static queryable text cache in {0}.  Will run without it.", index_path);
                }
            }
        }

Exemple #3

0

Afficher le fichier

Fichier : LuceneQueryingDriver.cs Projet : ArsenShnurkov/beagle-1

		private IEnumerable GetTextLinks (Uri uri, TextCache text_cache)
		{
			if (text_cache == null)
				yield break;

			IList<string> links = text_cache.GetLinks (uri);
			if (links == null)
				yield break;

			foreach (string link in links)
				yield return Property.NewKeyword ("TextLinks", link);
		}

Exemple #4

0

Afficher le fichier

Fichier : LuceneQueryingDriver.cs Projet : ArsenShnurkov/beagle-1

		private ICollection DoLowLevelRDFQuery (Query query,
							PropertyType pred_type,
							string predicate,
							string field_value,
							TextCache text_cache)
		{

			Stopwatch total, a, b, c, d, e, f;

			total = new Stopwatch ();
			a = new Stopwatch ();
			b = new Stopwatch ();
			c = new Stopwatch ();
			d = new Stopwatch ();
			e = new Stopwatch ();
			f = new Stopwatch ();

			total.Start ();
			a.Start ();

			// Assemble all of the parts into a bunch of Lucene queries

			ArrayList primary_required_part_queries;
			ArrayList secondary_required_part_queries;

			LNS.BooleanQuery primary_prohibited_part_query;
			LNS.BooleanQuery secondary_prohibited_part_query;

			AndHitFilter all_hit_filters;

			ArrayList term_list;

			// Assemble all of the parts into a bunch of Lucene queries

			term_list = AssembleQuery (query,
				null,
				null,
				out primary_required_part_queries,
				out secondary_required_part_queries,
				out primary_prohibited_part_query,
				out secondary_prohibited_part_query,
				out all_hit_filters);

			a.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Building queries took {1}", IndexName, a);

			// If we have no required parts, give up.
			if (primary_required_part_queries == null)
				return null;

			b.Start ();
			
			//
			// Now that we have all of these nice queries, let's execute them!
			//

			// Create the searchers that we will need.

			IndexReader primary_reader;
			LNS.IndexSearcher primary_searcher;
			IndexReader secondary_reader;
			LNS.IndexSearcher secondary_searcher;

			// Create the searchers that we will need.
			if (! BuildSearchers (out primary_reader, out primary_searcher, out secondary_reader, out secondary_searcher))
				return null;

			b.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Readers/searchers built in {1}", IndexName, b);

			// Build whitelists and blacklists for search subsets.
			c.Start ();
			
			// Possibly create our whitelists from the search subset.
			LuceneBitArray primary_whitelist, secondary_whitelist;
			CreateQueryWhitelists (null,
				primary_searcher,
				secondary_searcher,
				primary_prohibited_part_query,
				secondary_prohibited_part_query,
				out primary_whitelist,
				out secondary_whitelist);

			c.Stop ();
			if (Debug)
				Log.Debug ("###### {0}: Whitelists and blacklists built in {1}", IndexName, c);

			// Now run the low level queries against our indexes.
			d.Start ();

			BetterBitArray primary_matches = null;

			if (primary_required_part_queries != null) {

				if (secondary_searcher != null)
					primary_matches = DoRequiredQueries_TwoIndex (primary_searcher,
										      secondary_searcher,
										      primary_required_part_queries,
										      secondary_required_part_queries,
										      primary_whitelist,
										      secondary_whitelist);
				else
					primary_matches = DoRequiredQueries (primary_searcher,
									     primary_required_part_queries,
									     primary_whitelist);

			} 

			d.Stop ();
			if (Debug)
				Logger.Log.Debug ("###### {0}: Low-level queries finished in {1} and returned {2} matches", IndexName, d, primary_matches.TrueCount);

			e.Start ();

			int count = 0;
			Document doc;
			ArrayList hits = new ArrayList (primary_matches.TrueCount);

			TermDocs secondary_term_docs = null;
			if (secondary_searcher != null)
				secondary_term_docs = secondary_searcher.Reader.TermDocs ();
		
			FieldSelector fields = null;
			if (predicate != null)
				fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", PropertyToFieldName (pred_type, predicate)});

			for (int match_index = primary_matches.GetNextTrueIndex (0);
			     match_index < primary_matches.Count; 
			     match_index = primary_matches.GetNextTrueIndex (++ match_index)) {

				count++;

				// If we have a HitFilter, apply it.
				// RDF FIXME: Ignore Hit Filter for now

				// If predicate was not specified but object was specified,
				// then figure out the right predicate
				if (predicate == null && field_value != null) {
					Hit hit = new Hit ();
					doc = primary_searcher.Doc (match_index);
					hit.Uri = GetUriFromDocument (doc);
					hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));

					bool found_matching_predicate = false;

					foreach (Field field in doc.Fields ()) {
						if (! FieldIsPredicate (field, field_value))
							continue;

						Property prop = new Property ();
						prop.Type = pred_type;
						prop.Key = predicate;
						prop.Value = field_value;
						hit.AddProperty (prop);

						found_matching_predicate = true;
					}

					// Now get the matching predicate from the secondary index
					if (secondary_searcher == null) {
						doc = null;
					} else {
						Term term = new Term ("Uri", doc.Get ("Uri"));
						secondary_term_docs.Seek (term);
						if (secondary_term_docs.Next ())
							doc = secondary_searcher.Doc (secondary_term_docs.Doc ());
					}

					if (doc != null) {
						foreach (Field field in doc.Fields ()) {
							if (! FieldIsPredicate (field, field_value))
								continue;

							Property prop = new Property ();
							prop.Type = pred_type;
							prop.Key = predicate;
							prop.Value = field_value;
							hit.AddProperty (prop);

							found_matching_predicate = true;
						}
					}

					if (! found_matching_predicate) {
						// No matching predicate found
						// This means some unstored field matched the query
						// FIXME: Add a synthetic property #text
						hit.AddProperty (Property.New ("#text", field_value));
					}
					
					hits.Add (hit);
				} else if (predicate == "TextLinks") {
					// Special treatment: TextLinks is not stored but can be queried
					doc = primary_searcher.Doc (match_index, fields_timestamp_uri);
					Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields);
					if (field_value != null)
						hit.AddProperty (Property.New ("TextLinks", field_value));
					else {
						foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache))
							hit.AddProperty (text_link_property);
					}
					hits.Add (hit);
				} else {
					doc = primary_searcher.Doc (match_index, fields);
					Hit hit = CreateHit (doc, secondary_reader, secondary_term_docs, fields);
					foreach (Property prop in hit.Properties) {
						if (prop.Key == predicate)
							prop.Value = field_value;
					}

					hits.Add (hit);
				}
			}

			e.Stop ();

			if (Debug)
				Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e);

			//
			// Finally, we clean up after ourselves.
			//

			f.Start ();
			CloseSearchers (primary_reader, primary_searcher, secondary_reader, secondary_searcher);
			f.Stop ();
			
			if (Debug)
				Log.Debug ("###### {0}: Readers/searchers released in {1}", IndexName, f);

			total.Stop ();
			if (Debug) {
				Log.Debug ("###### {0}: Query time breakdown:", IndexName);
				Log.Debug ("###### {0}:    Build queries {1,6} ({2:0.0}%)", IndexName, a, 100 * a.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:      Got readers {1,6} ({2:0.0}%)", IndexName, b, 100 * b.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:       Whitelists {1,6} ({2:0.0}%)", IndexName, c, 100 * c.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:          Queries {1,6} ({2:0.0}%)", IndexName, d, 100 * d.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:    Gen'd Results {1,6} ({2:0.0}%)", IndexName, e, 100 * e.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:   Reader cleanup {1,6} ({2:0.0}%)", IndexName, f, 100 * f.ElapsedTime / total.ElapsedTime);
				Log.Debug ("###### {0}:            TOTAL {1,6}", IndexName, total);

				Logger.Log.Debug ("###### {0}: Total query run in {1}", IndexName, total);
			}

			return hits;
		}

Exemple #5

0

Afficher le fichier

Fichier : LuceneQueryingDriver.cs Projet : ArsenShnurkov/beagle-1

		///////// RDF fu ///////////////////////////////////////////////

		// Returns a collection of Uris
		// HitFilter and UriFilter are ignored for now
		// They will come into play in the final FetchDocument part
		// FIXME: Should RDFQuery do any query mapping using backend_query_part_hook ?
		// I think it should not. QueryPart hooks are for human beings, RDF is for softwares.
		public ICollection DoRDFQuery (Query _query, TextCache text_cache)
		{
			RDFQuery query = (RDFQuery) _query;

			string subject, predicate, _object;
			PropertyType pred_type;

			subject = query.SubjectString;
			predicate = query.Predicate;
			pred_type = query.PredicateType;
			_object = query.Object;

			if (Debug)
				Logger.Log.Debug ("###### {0}: Starting low-level queries '{1}' : '{4}:{2}' = '{3}'", IndexName, subject, predicate, _object, pred_type);

			// ******** 8 cases **********

			// Return all uris
			if (subject == String.Empty && predicate == String.Empty && _object == String.Empty) {
				ICollection hits = GetAllHitsByUri ().Values;
				foreach (Hit hit in hits)
					foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache))
						hit.AddProperty (text_link_property);
				return hits;
			}

			// Normal query
			if (subject == String.Empty && predicate == String.Empty && _object != String.Empty) {
				QueryPart_Text part = new QueryPart_Text ();
				part.Text = _object;
				part.SearchFullText = false; // We only search properties in RDF query
				query.AddPart (part);
				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			// Return uris for all documents with this property
			if (subject == String.Empty && predicate != String.Empty && _object == String.Empty) {
				string field_name = PropertyToFieldName (pred_type, predicate);

				QueryPart_Property part = new QueryPart_Property ();
				part.Type = PropertyType.Internal;
				part.Key = "Properties";
				part.Value = field_name;
				query.AddPart (part);

				return DoLowLevelRDFQuery (query, pred_type, predicate, null, text_cache);
			}

			// Property query
			if (subject == String.Empty && predicate != String.Empty && _object != String.Empty) {
				QueryPart_Property part = new QueryPart_Property ();
				part.Type = pred_type;
				part.Key = predicate;
				part.Value = _object;
				query.AddPart (part);
				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			// Return if the URI exists
			if (subject != String.Empty && predicate == String.Empty && _object == String.Empty) {
				QueryPart_Uri part = new QueryPart_Uri ();
				part.Uri = new Uri (subject, true); // better be URI!
				query.AddPart (part);
				// FIXME: Which properties to return in the hit? All or none ?
				return DoLowLevelRDFQuery (query, pred_type, predicate, null, text_cache);
			}

			// Normal query in the document with this URI
			if (subject != String.Empty && predicate == String.Empty && _object != String.Empty) {
				QueryPart_Uri uri_part = new QueryPart_Uri ();
				uri_part.Uri = new Uri (subject, true); // better be URI!
				query.AddPart (uri_part);

				QueryPart_Text part = new QueryPart_Text ();
				part.Text = _object;
				part.SearchFullText = false; // We only search properties in RDF query
				query.AddPart (part);

				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			// Return URI if the document with this URI contains this property
			if (subject != String.Empty && predicate != String.Empty && _object == String.Empty) {
				ArrayList returned_uris = new ArrayList (1);

				ArrayList uri_list = new ArrayList (1);
				uri_list.Add (new Uri (subject, true));

				string field_name = PropertyToFieldName (pred_type, predicate);
				FieldSelector fields = new MapFieldSelector (new string[] { "Uri", "Timestamp", field_name });
				ICollection hits = GetHitsForUris (uri_list, fields);
				if (predicate == "TextLinks") {
					foreach (Hit hit in hits)
						foreach (Property text_link_property in GetTextLinks (hit.Uri, text_cache))
							hit.AddProperty (text_link_property);
				}

				return hits;
			}

			// Property query in the document with this URI
			if (subject != String.Empty && predicate != String.Empty && _object != String.Empty) {
				QueryPart_Uri uri_part = new QueryPart_Uri ();
				uri_part.Uri = new Uri (subject, true); // better be URI!
				query.AddPart (uri_part);

				QueryPart_Property part = new QueryPart_Property ();
				part.Type = pred_type;
				part.Key = predicate;
				part.Value = _object;
				query.AddPart (part);

				return DoLowLevelRDFQuery (query, pred_type, predicate, _object, text_cache);
			}

			throw new Exception ("Never reaches");
		}

Exemple #6

0

Afficher le fichier

Fichier : FilterFactory.cs Projet : ArsenShnurkov/beagle-1

		/* Returns false if content can't/needn't be indexed.
		 * If AlreadyFiltered, then we don't return a filter but return true.
		 */
		static public bool FilterIndexable (Indexable indexable, TextCache text_cache, out Filter filter)
		{
			filter = null;
			ICollection filters = null;

			if (indexable.Filtering == IndexableFiltering.AlreadyFiltered)
				return true;

			if (! ShouldWeFilterThis (indexable))
				return false;

			string path = null;

			// First, figure out which filter we should use to deal with
			// the indexable.

			// If a specific mime type is specified, try to index as that type.
			if (indexable.MimeType != null)
				filters = CreateFiltersFromMimeType (indexable.MimeType);

			if (indexable.ContentUri.IsFile) {
				path = indexable.ContentUri.LocalPath;

				// Otherwise, set the mime type for a directory,
				// or sniff it from the file.
				if (indexable.MimeType == null) {
					if (Directory.Exists (path)) {
						indexable.MimeType = "inode/directory";
						indexable.NoContent = true;
					} else if (File.Exists (path)) {
						indexable.MimeType = XdgMime.GetMimeType (path);
					} else {
						Log.Warn ("Unable to filter {0}.  {1} not found.", indexable.DisplayUri, path);
						return false;
					}
				}

				// Set the timestamp to the last write time, if it isn't
				// set by the backend.
				if (! indexable.ValidTimestamp && indexable.IsNonTransient)
					indexable.Timestamp = FileSystem.GetLastWriteTimeUtc (path);

				// Check the timestamp to make sure the file hasn't
				// disappeared from underneath us.
				if (! FileSystem.ExistsByDateTime (indexable.Timestamp)) {
					Log.Warn ("Unable to filter {0}.  {1} appears to have disappeared from underneath us", indexable.DisplayUri, path);
					return false;
				}

				if (filters == null || filters.Count == 0) {
					filters = CreateFiltersFromIndexable (indexable);
				}
			}

			// We don't know how to filter this, so there is nothing else to do.
			if (filters.Count == 0) {
				if (! indexable.NoContent)
					Logger.Log.Debug ("No filter for {0} ({1}) [{2}]", indexable.DisplayUri, path, indexable.MimeType);

				return false;
			}

			foreach (Filter candidate_filter in filters) {
				if (Debug)
					Logger.Log.Debug ("Testing filter: {0}", candidate_filter);
				
				// Hook up the snippet writer.
				if (candidate_filter.SnippetMode && text_cache != null) {
					if (candidate_filter.OriginalIsText && indexable.IsNonTransient) {
						text_cache.MarkAsSelfCached (indexable.Uri);
					} else if (indexable.CacheContent) {
						TextWriter writer = text_cache.GetWriter (indexable.Uri);
						candidate_filter.AttachSnippetWriter (writer);
					}
				}

				// Set the indexable on the filter.
				candidate_filter.Indexable = indexable;

				// Open the filter, copy the file's properties to the indexable,
				// and hook up the TextReaders.

				bool successful_open = false;
				TextReader text_reader;
				Stream binary_stream;

				if (path != null)
					successful_open = candidate_filter.Open (path);
				else if ((text_reader = indexable.GetTextReader ()) != null)
					successful_open = candidate_filter.Open (text_reader);
				else if ((binary_stream = indexable.GetBinaryStream ()) != null)
					successful_open = candidate_filter.Open (binary_stream);
					
				if (successful_open) {
					// Set FileType
					indexable.AddProperty (Property.NewKeyword ("beagle:FileType", candidate_filter.FileType));

					indexable.SetTextReader (candidate_filter.GetTextReader ());
					indexable.SetHotTextReader (candidate_filter.GetHotTextReader ());
#if ENABLE_RDF_ADAPTER
					indexable.Links = candidate_filter.Links;
#endif

					if (Debug)
						Logger.Log.Debug ("Successfully filtered {0} with {1}", path, candidate_filter);

					filter = candidate_filter;
					return true;
				} else {
					Log.Warn ("Error in filtering {0} with {1}, falling back", path, candidate_filter);
					candidate_filter.Cleanup ();
				}
			}

			if (Debug)
				Logger.Log.Debug ("None of the matching filters could process the file: {0}", path);

			return false;
		}

Exemple #7

0

Afficher le fichier

Fichier : FilterFactory.cs Projet : ArsenShnurkov/beagle-1

        /* Returns false if content can't/needn't be indexed.
         * If AlreadyFiltered, then we don't return a filter but return true.
         */
        static public bool FilterIndexable(Indexable indexable, TextCache text_cache, out Filter filter)
        {
            filter = null;
            ICollection filters = null;

            if (indexable.Filtering == IndexableFiltering.AlreadyFiltered)
            {
                return(true);
            }

            if (!ShouldWeFilterThis(indexable))
            {
                return(false);
            }

            string path = null;

            // First, figure out which filter we should use to deal with
            // the indexable.

            // If a specific mime type is specified, try to index as that type.
            if (indexable.MimeType != null)
            {
                filters = CreateFiltersFromMimeType(indexable.MimeType);
            }

            if (indexable.ContentUri.IsFile)
            {
                path = indexable.ContentUri.LocalPath;

                // Otherwise, set the mime type for a directory,
                // or sniff it from the file.
                if (indexable.MimeType == null)
                {
                    if (Directory.Exists(path))
                    {
                        indexable.MimeType  = "inode/directory";
                        indexable.NoContent = true;
                    }
                    else if (File.Exists(path))
                    {
                        indexable.MimeType = XdgMime.GetMimeType(path);
                    }
                    else
                    {
                        Log.Warn("Unable to filter {0}.  {1} not found.", indexable.DisplayUri, path);
                        return(false);
                    }
                }

                // Set the timestamp to the last write time, if it isn't
                // set by the backend.
                if (!indexable.ValidTimestamp && indexable.IsNonTransient)
                {
                    indexable.Timestamp = FileSystem.GetLastWriteTimeUtc(path);
                }

                // Check the timestamp to make sure the file hasn't
                // disappeared from underneath us.
                if (!FileSystem.ExistsByDateTime(indexable.Timestamp))
                {
                    Log.Warn("Unable to filter {0}.  {1} appears to have disappeared from underneath us", indexable.DisplayUri, path);
                    return(false);
                }

                if (filters == null || filters.Count == 0)
                {
                    filters = CreateFiltersFromIndexable(indexable);
                }
            }

            // We don't know how to filter this, so there is nothing else to do.
            if (filters.Count == 0)
            {
                if (!indexable.NoContent)
                {
                    Logger.Log.Debug("No filter for {0} ({1}) [{2}]", indexable.DisplayUri, path, indexable.MimeType);
                }

                return(false);
            }

            foreach (Filter candidate_filter in filters)
            {
                if (Debug)
                {
                    Logger.Log.Debug("Testing filter: {0}", candidate_filter);
                }

                // Hook up the snippet writer.
                if (candidate_filter.SnippetMode && text_cache != null)
                {
                    if (candidate_filter.OriginalIsText && indexable.IsNonTransient)
                    {
                        text_cache.MarkAsSelfCached(indexable.Uri);
                    }
                    else if (indexable.CacheContent)
                    {
                        TextWriter writer = text_cache.GetWriter(indexable.Uri);
                        candidate_filter.AttachSnippetWriter(writer);
                    }
                }

                // Set the indexable on the filter.
                candidate_filter.Indexable = indexable;

                // Open the filter, copy the file's properties to the indexable,
                // and hook up the TextReaders.

                bool       successful_open = false;
                TextReader text_reader;
                Stream     binary_stream;

                if (path != null)
                {
                    successful_open = candidate_filter.Open(path);
                }
                else if ((text_reader = indexable.GetTextReader()) != null)
                {
                    successful_open = candidate_filter.Open(text_reader);
                }
                else if ((binary_stream = indexable.GetBinaryStream()) != null)
                {
                    successful_open = candidate_filter.Open(binary_stream);
                }

                if (successful_open)
                {
                    // Set FileType
                    indexable.AddProperty(Property.NewKeyword("beagle:FileType", candidate_filter.FileType));

                    indexable.SetTextReader(candidate_filter.GetTextReader());
                    indexable.SetHotTextReader(candidate_filter.GetHotTextReader());
#if ENABLE_RDF_ADAPTER
                    indexable.Links = candidate_filter.Links;
#endif

                    if (Debug)
                    {
                        Logger.Log.Debug("Successfully filtered {0} with {1}", path, candidate_filter);
                    }

                    filter = candidate_filter;
                    return(true);
                }
                else
                {
                    Log.Warn("Error in filtering {0} with {1}, falling back", path, candidate_filter);
                    candidate_filter.Cleanup();
                }
            }

            if (Debug)
            {
                Logger.Log.Debug("None of the matching filters could process the file: {0}", path);
            }

            return(false);
        }

C# (CSharp) Beagle.Daemon TextCache Exemples