public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } string extension = MapContentTypeToExtension(propertyBag.ContentType); if (extension.IsNullOrEmpty()) { return; } propertyBag.Title = propertyBag.Step.Uri.PathAndQuery; using (TempFile temp = new TempFile()) { temp.FileName += "." + extension; File.WriteAllBytes(temp.FileName, propertyBag.Response); using (FilterReader filterReader = new FilterReader(temp.FileName)) { string content = filterReader.ReadToEnd(); propertyBag.Text = content.Trim(); } } }
public void Process(Crawler crawler, PropertyBag propertyBag) { if (propertyBag.StatusCode != HttpStatusCode.OK) { return; } string extension = MapContentTypeToExtension(propertyBag.ContentType); if (extension.IsNullOrEmpty()) { return; } propertyBag.Title = propertyBag.Step.Uri.PathAndQuery; using (TempFile temp = new TempFile()) { temp.FileName += "." + extension; using (FileStream fs = new FileStream(temp.FileName, FileMode.Create, FileAccess.Write, FileShare.Read, 0x1000)) using (Stream input = propertyBag.GetResponse()) { input.CopyToStream(fs); } using (FilterReader filterReader = new FilterReader(temp.FileName)) { string content = filterReader.ReadToEnd(); propertyBag.Text = content.Trim(); } } }
private void btnBrowse_Click(object sender, EventArgs e) { if (openFileDialog1.ShowDialog()==DialogResult.OK) { TextReader reader=new FilterReader(openFileDialog1.FileName); using (reader) { textBox1.Text=reader.ReadToEnd(); label1.Text="Text loaded from "+openFileDialog1.FileName; } } }
public override bool Process(CrawlData data) { try { using (var file = new TempFile()) { file.FileName += "." + Extension; File.WriteAllBytes(file.FileName, data.ResponseStream.ToArray()); using (var filterReader = new FilterReader(file.FileName)) { data.FilteredContent = filterReader.ReadToEnd().Trim(); } } return true; } catch { return false; } }
/// <summary> /// Extract the contents of the given file as plain text. /// </summary> /// <param name="filePath">The physical path of the file that contains the text to be extracted.</param> /// <returns>The extracted text.</returns> public string ExtractTextFromFile(string filePath) { string extractedText = String.Empty; string[] allowedExtensionsArray = this.AllowedExtensions.Split(new[]{','}, StringSplitOptions.RemoveEmptyEntries); if (allowedExtensionsArray.Contains(Path.GetExtension(filePath))) { try { using (FilterReader filterReader = new FilterReader(filePath)) { extractedText = filterReader.ReadToEnd(); } } catch (ArgumentException ex) { // An argument exception usually happens when the IFilter for the file could not be found. // This is a non-critical error, so we're just logging it. Logger.Error(string.Format("Unable to extract text for {0}.", filePath), ex); } } return extractedText; }
private void ImportPartList(string partListPath) { this._DB.Part.DeleteAllOnSubmit(this._DB.Part.Where(o => !o.IsCustom)); this._DB.SubmitChanges(); FilterReader reader = new FilterReader(partListPath); string text = reader.ReadToEnd(); string[] parts = text.Split('\t'); string failedParts = ""; for (int i = 0; i < parts.Count() / 4; i++) { try { Part part = new Part { IsCustom = false, Code = parts[4 * i], Description = parts[4 * i + 1], Price = Math.Round(Double.Parse(parts[4 * i + 2]), 2) }; this._DB.Part.InsertOnSubmit(part); this._DB.SubmitChanges(); } catch (Exception exc) { failedParts += parts[4 * i] + "\t" + parts[4 * i + 1] + "\t" + parts[4 * i + 2] + System.Environment.NewLine; } } if (!string.IsNullOrEmpty(failedParts)) { MessageBox.Show("The following parts were unable to be added:" + System.Environment.NewLine + failedParts + System.Environment.NewLine + "Please fix the input file and re-import, or add these parts manually."); } }
/// <summary> /// /// </summary> /// <param name="fileName">file name only: such as: abc.doc</param> public static void IndexingDocumentFile(string fileName, int candidateID) { try { string indexFileLocation = WebConfig.DocumentIndexPhysicalPath; Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, false); //create an analyzer to process the text Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); //create the index writer with the directory and analyzer defined. Lucene.Net.Index.IndexWriter indexWriter = new Lucene.Net.Index.IndexWriter(dir, analyzer, !IsIndexExists(indexFileLocation)); Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); TextReader reader = new FilterReader(WebConfig.CVDocumentPhysicalPath + fileName); using (reader) { Lucene.Net.Documents.Field fldContent = new Lucene.Net.Documents.Field("content", reader.ReadToEnd(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED, Lucene.Net.Documents.Field.TermVector.YES); Lucene.Net.Documents.Field fldPath = new Lucene.Net.Documents.Field("path", WebConfig.CVDocumentAbsolutePath + fileName, Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED, Lucene.Net.Documents.Field.TermVector.YES); Lucene.Net.Documents.Field fldCandidateID = new Lucene.Net.Documents.Field("candidateID", candidateID.ToString(), Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.TOKENIZED, Lucene.Net.Documents.Field.TermVector.YES); doc.Add(fldContent); doc.Add(fldPath); doc.Add(fldCandidateID); } //write the document to the index indexWriter.AddDocument(doc); //optimize and close the writer indexWriter.Optimize(); indexWriter.Close(); } catch (Exception ex) { } }