Example #1
0
        public void Process(Crawler crawler, PropertyBag propertyBag)
        {
            if (propertyBag.StatusCode != HttpStatusCode.OK)
            {
                return;
            }

            string extension = MapContentTypeToExtension(propertyBag.ContentType);
            if (extension.IsNullOrEmpty())
            {
                return;
            }

            propertyBag.Title = propertyBag.Step.Uri.PathAndQuery;
            using (TempFile temp = new TempFile())
            {
                temp.FileName += "." + extension;
                File.WriteAllBytes(temp.FileName, propertyBag.Response);
                using (FilterReader filterReader = new FilterReader(temp.FileName))
                {
                    string content = filterReader.ReadToEnd();
                    propertyBag.Text = content.Trim();
                }
            }
        }
		public void Process(Crawler crawler, PropertyBag propertyBag)
		{
			if (propertyBag.StatusCode != HttpStatusCode.OK)
			{
				return;
			}

			string extension = MapContentTypeToExtension(propertyBag.ContentType);
			if (extension.IsNullOrEmpty())
			{
				return;
			}

			propertyBag.Title = propertyBag.Step.Uri.PathAndQuery;
			using (TempFile temp = new TempFile())
			{
				temp.FileName += "." + extension;
				using (FileStream fs = new FileStream(temp.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000))
				using (Stream input = propertyBag.GetResponse())
				{
					input.CopyToStream(fs);
				}

				using (FilterReader filterReader = new FilterReader(temp.FileName))
				{
					string content = filterReader.ReadToEnd();
					propertyBag.Text = content.Trim();
				}
			}
		}
Example #3
0
 private void btnBrowse_Click(object sender, EventArgs e)
 {
     if (openFileDialog1.ShowDialog()==DialogResult.OK)
       {
     TextReader reader=new FilterReader(openFileDialog1.FileName);
     using (reader)
     {
       textBox1.Text=reader.ReadToEnd();
       label1.Text="Text loaded from "+openFileDialog1.FileName;
     }
       }
 }
 public override bool Process(CrawlData data)
 {
     try
     {
         using (var file = new TempFile())
         {
             file.FileName += "." + Extension;
             File.WriteAllBytes(file.FileName, data.ResponseStream.ToArray());
             using (var filterReader = new FilterReader(file.FileName))
             {
                 data.FilteredContent = filterReader.ReadToEnd().Trim();
             }
         }
         return true;
     }
     catch
     {
         return false;
     }
 }
Example #5
0
 /// <summary>
 /// Extract the contents of the given file as plain text.
 /// </summary>
 /// <param name="filePath">The physical path of the file that contains the text to be extracted.</param>
 /// <returns>The extracted text.</returns>
 public string ExtractTextFromFile(string filePath)
 {
     string extractedText = String.Empty;
     string[] allowedExtensionsArray = this.AllowedExtensions.Split(new[]{','}, StringSplitOptions.RemoveEmptyEntries);
     if (allowedExtensionsArray.Contains(Path.GetExtension(filePath)))
     {
         try
         {
             using (FilterReader filterReader = new FilterReader(filePath))
             {
                 extractedText = filterReader.ReadToEnd();
             }
         }
         catch (ArgumentException ex)
         {
             // An argument exception usually happens when the IFilter for the file could not be found.
             // This is a non-critical error, so we're just logging it.
             Logger.Error(string.Format("Unable to extract text for {0}.", filePath), ex);
         }
     }
     return extractedText;
 }
        private void ImportPartList(string partListPath)
        {
            this._DB.Part.DeleteAllOnSubmit(this._DB.Part.Where(o => !o.IsCustom));
            this._DB.SubmitChanges();

            FilterReader reader = new FilterReader(partListPath);
            string text = reader.ReadToEnd();
            string[] parts = text.Split('\t');

            string failedParts = "";
            for (int i = 0; i < parts.Count() / 4; i++)
            {
                try
                {
                    Part part = new Part
                    {
                        IsCustom = false,
                        Code = parts[4 * i],
                        Description = parts[4 * i + 1],
                        Price = Math.Round(Double.Parse(parts[4 * i + 2]), 2)
                    };
                    this._DB.Part.InsertOnSubmit(part);
                    this._DB.SubmitChanges();
                }
                catch (Exception exc)
                {
                    failedParts += parts[4 * i] + "\t" + parts[4 * i + 1] + "\t" + parts[4 * i + 2] + System.Environment.NewLine;
                }
            }

            if (!string.IsNullOrEmpty(failedParts))
            {
                MessageBox.Show("The following parts were unable to be added:" + System.Environment.NewLine
                                    + failedParts + System.Environment.NewLine
                                    + "Please fix the input file and re-import, or add these parts manually.");
            }
        }
Example #7
0
    /// <summary>
    /// 
    /// </summary>
    /// <param name="fileName">file name only: such as: abc.doc</param>
    public static void IndexingDocumentFile(string fileName, int candidateID)
    {
        try
        {
            string indexFileLocation = WebConfig.DocumentIndexPhysicalPath;
            Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false);
            //create an analyzer to process the text
            Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            //create the index writer with the directory and analyzer defined.
            Lucene.Net.Index.IndexWriter indexWriter = new Lucene.Net.Index.IndexWriter(dir, analyzer, !IsIndexExists(indexFileLocation));

            Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();

            TextReader reader = new FilterReader(WebConfig.CVDocumentPhysicalPath + fileName);
            using (reader)
            {
                Lucene.Net.Documents.Field fldContent = new Lucene.Net.Documents.Field("content",
                                                                                    reader.ReadToEnd(),
                                                                                    Lucene.Net.Documents.Field.Store.YES,
                                                                                    Lucene.Net.Documents.Field.Index.TOKENIZED,
                                                                                    Lucene.Net.Documents.Field.TermVector.YES);
                Lucene.Net.Documents.Field fldPath = new Lucene.Net.Documents.Field("path",
                                                                                    WebConfig.CVDocumentAbsolutePath + fileName,
                                                                                    Lucene.Net.Documents.Field.Store.YES,
                                                                                    Lucene.Net.Documents.Field.Index.TOKENIZED,
                                                                                    Lucene.Net.Documents.Field.TermVector.YES);
                Lucene.Net.Documents.Field fldCandidateID = new Lucene.Net.Documents.Field("candidateID",
                                                                                    candidateID.ToString(),
                                                                                    Lucene.Net.Documents.Field.Store.YES,
                                                                                    Lucene.Net.Documents.Field.Index.TOKENIZED,
                                                                                    Lucene.Net.Documents.Field.TermVector.YES);

                doc.Add(fldContent);
                doc.Add(fldPath);
                doc.Add(fldCandidateID);
            }
            //write the document to the index
            indexWriter.AddDocument(doc);
            //optimize and close the writer
            indexWriter.Optimize();
            indexWriter.Close();
        }
        catch (Exception ex)
        {

        }
    }