public override void Initialize(Microsoft.Office.Server.Search.Connector.IConnectionContext context)
        {
            //connector.UriParser uri = new connector.UriParser(context.Path);
            //Uri sourceUri = uri.ToUri(false); // No case encoding
            Uri sourceUri = context.Path;


            this.lobSystemInstance =
                this.lobSystem.GetLobSystemInstances()[0].Value;

            string filepath = @"\\" + sourceUri.Host + sourceUri.AbsolutePath.Replace('/', '\\');

            ///
            ///To decode spaces and other characters in file names that are encoded by search
            ///
            filepath = SPHttpUtility.UrlPathDecode(filepath, false);


            if (Directory.Exists(filepath))
            {
                ///
                ///Only set the entity for the start address so that idEnumerator is called on the entity
                ///
                this.entity = this.Catalog.GetEntity("MyFileConnector", "MyFolder");

                ///
                ///If the Uri belongs to a folder, set the identity so that
                ///associations, specificifnder and streamaccessors are called
                ///
                this.identity = new Microsoft.BusinessData.Runtime.Identity(filepath);
            }
            else if (File.Exists(filepath))
            {
                ///
                ///If it is a file, switch the entity name to MyFile
                ///
                this.entity = this.Catalog.GetEntity("MyFileConnector", "MyFile");

                ///
                ///If the Uri belongs to a file, set the identity so that findSpecific is called
                ///
                this.identity = new Microsoft.BusinessData.Runtime.Identity(filepath);
            }
        }
Example #2
0
        /// <summary>
        /// This method does the processing of incoming URLs. The intent here is to receive a URL via this context
        /// object, parse it to determine what metadata object it refers to, and then populate members of this class
        /// with this information.
        /// </summary>
        /// <param name="context">
        /// Supplies the connection context. The context contains (most importantly) the URL of the item being crawled,
        /// and also other information about the current crawl.
        /// </param>
        public override void Initialize(
            Microsoft.Office.Server.Search.Connector.IConnectionContext context)
        {
            Uri sourceUri = context.Path;

            //
            // A URL can point to a LobSystem, a LobSystemInstance, an Entity, or a specific instance of an Entity.
            // A specific instance of an Entity for MyFileConnector would be, of course, an actual file or folder on
            // disk. The other BCS metadata objects exist to provide you more flexibility in designing your connector
            // by providing slightly different crawling behaviors (which we'll detail below).
            //
            // Your job in this method is to parse the URL and decide what it's pointing to. Then, populate the
            // appropriate class members to indicate what it's pointing to.
            //
            // To indicate that the supplied URL refers to a(n):         Populate only these class members:
            //
            //                                         LobSystem         this.LobSystem
            //
            //                                 LobSystemInstance         this.LobSystem
            //                                                           this.LobSystemInstance
            //
            //                                            Entity         this.LobSystem
            //                                                           this.LobSystemInstance
            //                                                           this.Entity
            //
            //                                   Entity instance         this.LobSystem
            //                                                           this.LobSystemInstance
            //                                                           this.Entity
            //                                                           this.Identity
            //
            // What happens when these BCS metadata objects are crawled? Obviously, if you've got an entity instance,
            // you're crawling an actual item in your repository. But what about the others? In general, there's no real
            // data or metadata associated with any of them until you get down to an actual Entity instance (i.e., a
            // file or folder on disk, in the case of this connector). So the only real thing that happens when these
            // objects are crawled is that they emit their children, which will then subsequently be crawled. So here's
            // what the "children" of each of these metadata objects are:
            //
            //  Metadata object type    Children that are emitted when crawled
            //
            //  LobSystem               All LobSystemInstances in the model file that are marked with the
            //                          'ShowInSearchUI' property.
            //
            //  LobSystemInstance       If at least one Entity defined in this LobSystemInstance has a Finder method
            //                          with the 'RootFinder' property set, the LobSystemInstance will emit all Entities
            //                          that have Finder methods marked with the 'RootFinder' property. If an Entity
            //                          does not have a RootFinder, it will not be emitted.
            //
            //                          If no Entities in the LobSystemInstance with a Finder marked with the
            //                          'RootFinder' property, the LobSystemInstance will emit any Entities that have
            //                          both IdEnumerator and SpecificFinder methods defined in the model file. Note: If
            //                          you define only one of the two (either IdEnumerator or SpecificFinder) in this
            //                          case, an exception will be thrown at crawl time. You must define both to be
            //                          crawled in this manner.
            //
            // (Up to this point, with the previous two metadata objects, we had not yet called into the custom
            // connector shim. Instead, we were able to figure out what to emit from the model file alone. Starting with
            // the Entity, we will be calling into the custom connector shim, and also the full and incremental crawl
            // behavior can be different...)
            //
            // Metadata object type     Children that are emitted when crawled
            //
            // Entity                   Full crawl:
            //                          If the Entity has a Finder with the 'RootFinder' property set, that method is
            //                          called. Whatever entity instances your shim emits are the child items of the
            //                          Entity. If there is no Finder with the 'RootFinder' property set, but there
            //                          is an IdEnumerator defined for the entity, that method will be called, and
            //                          whatever entity instances your shim emits are the child items of the Entity.
            //
            //                          Incremental crawl:
            //                          If the Entity is the source for any AssociationNavigator methods that are marked
            //                          with the 'DirectoryLink' property, the behavior is the same as in the full
            //                          crawl. If there are no AssociationNavigator methods where the Entity is the
            //                          source, marked with the 'DirectoryLink' property, and the Entity has both a
            //                          ChangedIdEnumerator and DeletedIdEnumerator defined, then both of those methods
            //                          will be called in an incremental crawl. If none of the above is true, the
            //                          incremental crawl behavior is the same as full crawl.
            //
            // (Now, for an actual Entity instance (e.g., a file or folder on disk, in this case), it will be helpful to
            // define the concept of 'container'-type Entities. A 'container' type Entity is an Entity that is defined
            // in your model file as the SourceEntity of any AssociationNavigator marked with the 'DirectoryLink'
            // property.)
            //
            // Metadata object type     Children that are emitted when crawled
            //
            // Container-type           Full crawl:
            //  Entity instance         The association navigators for which the Entity type is the SourceEntity are
            //                          called.
            //
            //                          Incremental crawl:
            //                          This is the same as in a full crawl, unless several things are configured to
            //                          enable your shim to more intelligently decide what to emit. Here's what needs
            //                          to be configured to enable this behavior:
            //
            //                          1.  The SpecificFinder of the Entity has to return a field containing the number
            //                              of deleted direct child items in that particular container and this field
            //                              must be identified in the model file by defining the 'DeletedCountField'
            //                              property on the SpecificFinder method instance, where its value must be the
            //                              name of the field that returns the delete count.
            //
            //                          2.  The AssociationNavigator for which this Entity is the SourceEntity must
            //                              include in its return type descriptor a last modified time DateTime field
            //                              and this field must be identified in the model file by defining the
            //                              'LastModifiedTimeStamp' property on the AssociationNavigator method
            //                              instance, where its value must be the name of the fiels that returns the
            //                              last modified DateTime.
            //
            //                          3.  The AssociationNavigator for which this Entity is the SourceEntity must
            //                              have an input filter defined, and that input filter must have the
            //                              'CrawlStartTime' string property defined:
            //
            //                                  <FilterDescriptor Name="LastModifiedFilter" Type="Input">
            //                                      <Properties>
            //                                          <Property Name="CrawlStartTime" Type="String">x</Property>
            //                                      </Properties>
            //                                  </FilterDescriptor>
            //
            //                          4.  The AssociationNavigator for this this Entity is the SourceEntity must
            //                              take a DateTime input parameter, and that parameter must be associated with
            //                              the filter described in #3:
            //
            //                                  <Parameter Name="lastModifiedTime" Direction="In">
            //                                      <TypeDescriptor
            //                                          Name="lastModifiedTime"
            //                                          TypeName="System.DateTime"
            //                                          AssociatedFilter="LastModifiedFilter" />
            //                                  </Parameter>
            //
            //                          If this is the case, then the association navigator is called, and your shim
            //                          will be provided the last modified time to determine what items to return.
            //
            // Non-container-type       Full and incremental crawl behavior is the same. The SpecificFinder of the
            //  Entity instance         non-container Entity is called. You can also enable caching behavior, if the
            //                          method that emitted this Entity instance has the 'UseClientCachingForSearch'
            //                          property defined on its method instance. If caching is enabled in this way,
            //                          the SpecificFinder is not called, and only the data returned by the method
            //                          that originally emitted this Entity instance (i.e., an AssociationNavigator
            //                          or IdEnumerator, etc.) will be used to index the item. Note: If you return a
            //                          security descriptor in the return type descriptor, identified by the
            //                          'WindowsSecurityDescriptorField' defined on the method instance, then items
            //                          will *not* be cached, regardless of if you set the 'UseClientCachingForSearch'
            //                          property or not. The reason is that the SharePoint Search crawler has a limited
            //                          size for caching an individual item, and security descriptors can regularly
            //                          exceed that size.
            //

            //
            // As we mentioned above, we expect that every URL will resolve to a valid folder or file once we
            // translate it. Because of this, we won't really need to crawl any of the BCS metadata objects (LobSystem,
            // LobSystemInstance or Entity) - we'll only be crawling Entity instances. So based on the above, we know
            // that this method must end up populating all four LobUri properties.
            //

            //
            // We already populated this.lobSystem in the constructor. Now here's this.lobSystemInstance.
            // LobSystemInstances are statically defined in your model file. In this sample, we've only defined one.
            //
            this.lobSystemInstance = this.lobSystem.GetLobSystemInstances()[0].Value;

            //
            // Next, we need to figure out what Entity to which the URL refers to populate this.Entity. So for starters,
            // let's read the first segment to see what entity type we should assign to this.Entity.
            //
            String entityType = sourceUri.Segments[1].Replace("/", "");

            if (entityType.Equals("MyFolder", StringComparison.OrdinalIgnoreCase))
            {
                this.entity = this.Catalog.GetEntity("MyFileConnector", "MyFolder");
            }
            else if (entityType.Equals("MyFile", StringComparison.OrdinalIgnoreCase))
            {
                this.entity = this.Catalog.GetEntity("MyFileConnector", "MyFile");
            }
            else
            {
                throw new Microsoft.BusinessData.Runtime.RuntimeException(String.Format(
                                                                              "Invalid entity type {0} specified in URL {1}",
                                                                              entityType,
                                                                              sourceUri.ToString()));
            }

            //
            // Finally, populate this.identity. Generally, the 'identity' is whatever your repository needs to uniquely
            // identify an Entity instance. In the case of our repository, an NTFS file system, that would conveniently
            // just be the path to the file. So reconstruct the URL as a UNC path, minus the first segment.
            //
            StringBuilder path = new StringBuilder(@"\\");

            path.Append(sourceUri.Host);
            path.Append(@"\");
            for (int i = 2; i < sourceUri.Segments.Length; ++i)
            {
                path.Append(SPHttpUtility.UrlPathDecode(sourceUri.Segments[i].Replace('/', '\\'), false));
            }

            this.identity = new Microsoft.BusinessData.Runtime.Identity(path.ToString());
        }