Ejemplo n.º 1
0
        /// <summary>
        /// Extracts links from the contents of a SWF document.
        /// </summary>
        /// <param name="content">The contents of the SWF document.</param>
        /// <param name="contentUrl">The url of the SWF document.</param>
        /// <returns>
        /// An <see cref="ArrayList"/> of <see cref="InternetUrlToIndex"/> objects, one for
        /// each link found in the content.
        /// </returns>
        /// <exception cref="ArgumentNullException">If the input buffer is null or empty.</exception>
        public override ArrayList ExtractLinks(byte[] content, ref InternetUrlToCrawl contentUrl)
        {
            ArrayList links = null;

            if ((content == null) || (content.Length == 0))
            {
                throw new ArgumentNullException("content", "The input buffer cannot be empty or null.");
            }
            try
            {
                mutex.WaitOne();
                string       FileName    = globals.AppWorkPath + Guid.NewGuid().ToString();
                string       swfFileName = FileName + ".swf";
                string       htmFileName = FileName + ".htm";
                FileStream   swf         = null;
                StreamReader htm         = null;
                try
                {
                    //store the swf file
                    swf = new FileStream(swfFileName, FileMode.Create);
                    swf.Write(content, 0, content.Length);
                    swf.Close();
                    swf = null;
                    //convert it to html
                    bool success = converter.ConvertSwfFile(swfFileName, htmFileName);
                    if (success)
                    {
                        htm = new StreamReader(htmFileName, encoding);
                        string html = htm.ReadToEnd();
                        htm.Close();
                        htm   = null;
                        links = parser.ExtractLinks(ref html, ref contentUrl);
                    }
                }
                catch (Exception ex)
                {
                    if (swf != null)
                    {
                        try
                        {
                            swf.Close();
                        }
                        catch
                        {}
                    }
                    if (htm != null)
                    {
                        try
                        {
                            htm.Close();
                        }
                        catch
                        {}
                    }
                    if (globals.Settings.LogLevel <= CWLogLevel.LogInfo)
                    {
                        globals.FileLog.LogInfo("SwfParser failed to extract links from " + contentUrl.Url + ": " + ex.ToString());
                    }
                }
                finally
                {
                    File.Delete(swfFileName);
                    File.Delete(htmFileName);
                }
            }
            catch (Exception ex)
            {
                if (globals.Settings.LogLevel <= CWLogLevel.LogWarning)
                {
                    globals.FileLog.LogWarning("SwfParser failed to extract links from " + contentUrl.Url + ": " + ex.Message);
                }
            }
            finally
            {
                GC.Collect();
                mutex.ReleaseMutex();
            }
            ParserEventArgs e = new ParserEventArgs(contentUrl.Url);

            OnExtractLinksComplete(e);
            return(links);
        }