public Site( string name, Navigation navi, IFormat format, DataContent content ) { Name = name; Navigation = navi; Format = format; Content = content; }
public Navigation( Navigation navi, params TransformAction[] rules ) { DocumentType = rules.ApplyTo<DocumentType>( () => navi.DocumentType ); Uris = rules.ApplyTo<IArray<NavigatorUrl>>( () => navi.Uris ); UrisHashCode = CreateUrisHashCode(); }
public LocatorValidationResult( DataTable result, ParameterizedDatumLocator datumLocator, Navigation modifiedNavigation, string documentLocation, IFormat modifiedFormat ) { Result = result; DatumLocator = datumLocator; Navigation = modifiedNavigation ?? Navigation.Empty; DocumentLocation = documentLocation; Format = modifiedFormat; }
public Uri Navigate( Navigation navigation ) { var uri = TryNavigateWithWildcards( navigation ); if ( uri != null ) { return uri; } return NavigateToFinalSite( navigation.Uris ); }
protected IHtmlDocument LoadDocument( string name ) { string file = Path.Combine( TestDataRoot, "Recognition", "Html" ); file = Path.Combine( file, name ); var navi = new Navigation( DocumentType.Html, new NavigatorUrl( UriType.Request, file ) ); var doc = (HtmlDocumentHandle)myBrowser.GetDocument( navi ); return doc.Content; }
public IDocument GetDocument( Navigation navi ) { var uri = myNavigator.Navigate( navi ); myLogger.Info( "Url from navigator: {0}", uri ); var documentLoader = DocumentLoaderFactory.Create( navi.DocumentType ); var doc = documentLoader.Load( uri ); return doc; }
public Uri Navigate( Navigation navigation ) { var uri = myCache.TryGet( navigation ); if ( uri == null ) { uri = myNavigator.Navigate( navigation ); uri = myCache.Add( navigation, uri ); } return uri; }
/// <summary> /// Adds the document specified by the given URL and the given navigation as key to the cache /// </summary> internal Uri Add( Navigation key, Uri document ) { var entry = CreateCacheEntry( key, document ); ShrinkCacheIfRequired( entry ); myIndex.Add( entry ); myIndex.Store( myIndexFile ); return entry.Uri; }
public IDocument GetDocument( Navigation navi ) { var doc = TryNavigateWithWildcards( navi ); if ( doc != null ) { return doc; } if ( navi.DocumentType == DocumentType.Html ) { return new HtmlDocumentHandle( LoadDocument( navi.Uris ) ); } else if ( navi.DocumentType == DocumentType.Text ) { return new TextDocument( DownloadFile( navi.Uris ) ); } throw new NotSupportedException( "DocumentType: " + navi.DocumentType ); }
internal Uri TryGet( Navigation key ) { var entry = myIndex.TryGet( key.UrisHashCode ); if ( entry == null ) { return null; } if ( entry.IsExpired ) { // found but live time of entry expired myIndex.Remove( key.UrisHashCode ); return null; } // found and live time of entry not expired myLogger.Info( "DocumentCache CacheHit for {0}", key ); return entry.Uri; }
private CacheEntryBase CreateCacheEntry( Navigation key, Uri document ) { var expirationTime = DateTime.Now.Add( Settings.MaxEntryLiveTime ); if ( document.IsFile ) { return new CacheEntryBase( key.UrisHashCode, expirationTime, document ); } else { var cacheFile = Path.Combine( myCacheFolder, key.UrisHashCode + ".dat" ); WebUtil.DownloadTo( document, cacheFile ); return new ValueCacheEntry( key.UrisHashCode, expirationTime, new Uri( cacheFile ) ); } }
private Uri TryNavigateWithWildcards( Navigation navi ) { if ( navi.Uris.Count != 1 ) { // we can only handle single urls return null; } var url = navi.Uris[ 0 ]; Uri uri = new Uri( url.UrlString ); if ( !uri.IsFile && !uri.IsUnc ) { // we cannot handle e.g. http now return null; } // currently we only handle "/xyz/*/file.txt" int pos = url.UrlString.IndexOf( "/*/" ); if ( pos <= 0 ) { // no pattern found return null; } string root = url.UrlString.Substring( 0, pos ); string file = url.UrlString.Substring( pos + 3 ); string[] dirs = Directory.GetDirectories( root, "*" ); // now try everything with "or" // first path which returns s.th. wins foreach ( string dir in dirs ) { string tmpUri = Path.Combine( dir, file ); if ( !File.Exists( tmpUri ) ) { continue; } return new Uri( tmpUri ); } // so in this case we got a pattern navigation url but we were not able // to navigate to that url --> throw an exception throw new Exception( "Failed to navigate to the document" ); }