/// <summary>Gets file entity using absolute path.</summary> /// <param name="absolutePath">Absolute path.</param> /// <exception cref="System.NotSupportedException">Specified file is not supports read or security error is detected.</exception> /// <returns>The <see cref="File"/> entity.</returns> private static File GetScanFileByPath(string absolutePath) { try { var fileInfo = new FileInfo(absolutePath); var scanFile = new File { Filename = fileInfo.Name, Extension = fileInfo.Extension, Path = fileInfo.DirectoryName, FullName = fileInfo.FullName }; return(scanFile); } catch (System.Exception ex) { throw new System.NotSupportedException(ex.Message, ex.InnerException); } }
/// <summary>Scans the location of words in the StreamReader of the file.</summary> /// <param name="scanFile">Scan file entity.</param> /// <param name="stream">Stream reader for the text file.</param> /// <param name="type">Type of result.</param> /// <exception cref="FileNotFoundException">Specified pathname does not exist.</exception> /// <exception cref="DirectoryNotFoundException">Directory cannot be found.</exception> /// <exception cref="System.NotSupportedException">Specified file is not supports read or security error is detected.</exception> private static ScanResult ParseFile(File scanFile, TextReader stream, TypeResult type) { var pattern = new Regex(@"[^\W_\d]([^\W_\d]|[-’'](?=[^\W_\d]))*([^\W_\d]|['’])?"); var wordsLocker = new object(); var compositionsLocker = new object(); var fileWords = new List <Word>(); var compositions = new List <Composition>(); var lines = new Dictionary <int, string>(); string currentLine; var counter = 1; while ((currentLine = stream.ReadLine()) != null) { lines.Add(counter, currentLine.ToLower()); counter++; } Parallel.ForEach( lines, line => { var words = pattern.Matches(line.Value); for (var i = 0; i < words.Count; i++) { var scanWord = GetOrCreateScanWord(wordsLocker, fileWords, scanFile, words[i].Value); if (type == TypeResult.CompositionOfWords) { AddWordToCompositions(compositionsLocker, compositions, scanWord, line.Key, words[i].Index + 1); } } }); return(new ScanResult { Words = fileWords, Compositions = compositions }); }
/// <summary>Scans the unique words in the StreamReader of the file.</summary> /// <param name="scanFile">Scan file entity.</param> /// <param name="stream">Stream reader for the text file.</param> /// <exception cref="FileNotFoundException">Specified pathname does not exist.</exception> /// <exception cref="DirectoryNotFoundException">Directory cannot be found.</exception> /// <exception cref="System.NotSupportedException">Specified file is not supports read or security error is detected.</exception> /// <returns>Unsorted collection of words in file.</returns> public List <Word> ParseUnigueWordsInFile(File scanFile, StreamReader stream) { return(ParseFile(scanFile, stream, TypeResult.OnlyUniqueWordsInFile).Words); }
/// <summary>Gets or creates word entity using the word string.</summary> /// <param name="wordsLocker">Mutex for adding words.</param> /// <param name="fileWords">Existing words to compare.</param> /// <param name="scanFile">File containing this word.</param> /// <param name="wordText">The word string.</param> /// <returns>The <see cref="Word"/> entity.</returns> private static Word GetOrCreateScanWord(object wordsLocker, ICollection <Word> fileWords, File scanFile, string wordText) { Word word; lock (wordsLocker) { word = fileWords.FirstOrDefault(w => w.TheWord == wordText); if (!Equals(word, default(Word))) { word.Count++; return(word); } word = new Word { File = scanFile, TheWord = wordText, Count = 1 }; fileWords.Add(word); } return(word); }
/// <summary>Scans all the words and their positions in the StreamReader of the file.</summary> /// <param name="scanFile">Scan file entity.</param> /// <param name="stream">Stream reader for the text file.</param> /// <exception cref="FileNotFoundException">Specified pathname does not exist.</exception> /// <exception cref="DirectoryNotFoundException">Directory cannot be found.</exception> /// <exception cref="System.NotSupportedException">Specified file is not supports read or security error is detected.</exception> /// <returns>Unsorted collection of word compositions in file.</returns> public List <Composition> ParseAllWordsInFile(File scanFile, StreamReader stream) { return(ParseFile(scanFile, stream, TypeResult.CompositionOfWords).Compositions); }