static void Main(string[] args) { var parser = new CsvParser(); var config = BuildConfiguration(); Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var encoding = EncodingDetector.GetEncoding(config["data-file-path"]); var data = parser.ParseFile(config["data-file-path"], encoding); if (parser.ErrorMessage != null) { Console.WriteLine("Parsing failed"); Console.WriteLine(parser.ErrorMessage); } var report = DataAnalyser.Analyze(data, Convert.ToUInt32(config["min-number-for-adv-stat"])); using (var stream = new FileStream("report.txt", FileMode.Create)) { TextReporter.ToStream(stream, report); } var reporter = new ExcelReport.ExcelReporter(); using (var stream = new FileStream("report.xlsx", FileMode.Create)) { reporter.ToStream(stream, report); } }
public MovieCollection ImportMoviesFromDumpFiles(string[] filesToRead, MovieCollection movieCollection) { var movieList = new List <Movie>(); foreach (var txtFile in filesToRead) { if (File.Exists(txtFile) && txtFile.Contains(".txt")) { var fileName = Path.GetFileNameWithoutExtension(txtFile); var Encoding = EncodingDetector.DetectTextFileEncoding(txtFile); var myFile = new StreamReader(txtFile, Encoding); var moviesString = myFile.ReadToEnd(); myFile.Close(); moviesString = moviesString.Replace("last-modified |ext |size|name|location", string.Empty); var moviesLines = moviesString.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); OnProgressUpdate?.Invoke(ImportProgressType.NewFile, fileName, moviesLines.Count()); foreach (var line in moviesLines) { if (line != "done" && !string.IsNullOrWhiteSpace(line)) { var movie = FileHelper.GetMovieFromLine(line, fileName); if (movie != null) { movieCollection.AddMovie(movie); } } OnProgressUpdate?.Invoke(ImportProgressType.NewLine, string.Empty, 0); } } } return(movieCollection); }
private bool CheckCUE() { State = FileState.ValidFile; Encode = EncodingDetector.GetEncoding(FullPath, out _confidence); if (Encode != "UTF-8") { State = FileState.InValidEncode; return(false); } using (var fs = File.OpenRead(FullPath)) { var buffer = new byte[3]; fs.Read(buffer, 0, 3); if (buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF) { if (!CueCurer.CueMatchCheck(this)) { State = FileState.InValidCue; } return(true); } } State = FileState.NonUTF8WBOM; return(false); }
private void RunTest(EncodingDetector.Options encDetectorOptions) { int totalFiles = 0; int numFoundEncodings = 0; foreach (string filePath in Directory.EnumerateFiles(_tempDir, "*.*", SearchOption.AllDirectories)) { var sampleBytes = Utils.ReadFileContentSample(filePath); Encoding encoding = EncodingDetector.Detect(sampleBytes, encDetectorOptions); totalFiles++; if (encoding != null) { numFoundEncodings++; } WriteToConsole(Path.GetFileName(filePath) + ": " + encoding); if (totalFiles > 10) { break; } } Console.WriteLine("Found Encoding in:" + numFoundEncodings + " out of " + totalFiles); }
/// <summary> /// Загрузить описание базовых объектов из файла /// </summary> /// <param name="pathToFile">Путь к файлу с базовыми объектами</param> /// <returns>Описание</returns> private string LoadBaseTechObjectsDescription(string pathToFile) { var reader = new StreamReader(pathToFile, EncodingDetector.DetectFileEncoding(pathToFile)); string readedDescription = reader.ReadToEnd(); return(readedDescription); }
public void IsXmlEncoded_ValidInput_ReturnsTrue() { // Arrange var input = "MSH.1>MSH.2>"; // Act / Assert Assert.IsTrue(EncodingDetector.IsXmlEncoded(input)); }
void IncludeFile(string fileName, List <TokenExpression> tokens) { var encoding = EncodingDetector.DetectEncoding(fileName); var trBase = new StreamReader(fileName, encoding); var tr = new MyTextReader2(trBase, fileName); IncludeFile(tr, tokens, fileName); }
public void AssertXmlEncoded_ValidInput_NoExceptionThrown() { // Arrange var input = "MSH.1>MSH.2>"; // Arrange / Act / Assert Assert.DoesNotThrow( () => EncodingDetector.AssertXmlEncoded(input)); }
/// <summary> /// Загрузить LUA-скрипт из файла /// </summary> /// <param name="path">Путь к файлу скрипта</param> private void LoadScript(string path) { var reader = new StreamReader(path, EncodingDetector.DetectFileEncoding(path)); string script = reader.ReadToEnd(); reader.Close(); lua.DoString(script); }
public void AssertEr7Encoded_4thCharacterOfgivenSegmentNotDelimiter_ThrowsInvalidOperationException() { // Arrange var input = "MSH|^~\\&||\rUNKNOWN|"; // Act / Assert Assert.Throws <InvalidOperationException>( () => EncodingDetector.AssertEr7Encoded(input)); }
public void IsUTF8Test() { foreach (var item in Directory.GetFiles(@"..\..\[Encode Sample]")) { float confindence; string encode = EncodingDetector.GetEncoding(item, out confindence); Console.WriteLine($"{Path.GetFileName(item)}: {encode == "UTF-8"} confidence: {confindence:F3}"); } }
public void IsEr7Encoded_InvalidInput_ReturnsTrue() { // Arrange var input = "MSH|^~\\&|LABGL1||DMCRES||19951002185200||ADT^A01|LABGL1199510021852632|P|2.2\r" + "PID|||T12345||TEST^PATIENT^P||19601002|M||||||||||123456\r" + "PV1|||NER|||||||GSU||||||||E||||||||||||||||||||||||||19951002174900|19951006\r"; // Act / Assert Assert.IsTrue(EncodingDetector.IsEr7Encoded(input)); }
public void TestDetectingCodepages(int codepage) { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); var stream = Common.CreateStreamFromText( Common.TestResultsDataString, Encoding.GetEncoding(codepage)); var encoding = EncodingDetector.GetEncoding(stream); Assert.Equal(codepage, encoding.CodePage); }
public void AssertEr7Encoded_ValidInput_NoExceptionThrown() { // Arrange var input = "MSH|^~\\&|LABGL1||DMCRES||19951002185200||ADT^A01|LABGL1199510021852632|P|2.2\r" + "PID|||T12345||TEST^PATIENT^P||19601002|M||||||||||123456\r" + "PV1|||NER|||||||GSU||||||||E||||||||||||||||||||||||||19951002174900|19951006\r"; // Act / Assert Assert.DoesNotThrow( () => EncodingDetector.AssertEr7Encoded(input)); }
public void TestDetectNoPreamble() { using (var stream = new MemoryStream()) using (var writer = new StreamWriter(stream, Encoding.ASCII)) { writer.Write("Controversial opinion: White House Down is a better movie than Olympus has Fallen, ikr"); writer.Flush(); stream.Position = 0; var detector = new EncodingDetector(); detector.TryFindEncoding(stream).Should().Be(null, "because there's no preamble to detect ASCII and the detector shall not make any guesses and give up"); } }
public void EncodeTest() { float confindence; foreach (var item in Directory.GetFiles(@"..\..\[Encode Sample]")) { Console.WriteLine($"{Path.GetFileName(item)}: {EncodingDetector.GetEncoding(item, out confindence)} ({confindence:F3})"); } foreach (var item in Directory.GetFiles(@"..\..\[Encoding All Star]")) { Console.WriteLine($"{Path.GetFileName(item)}: {EncodingDetector.GetEncoding(item, out confindence)} ({confindence:F3})"); } }
/// <summary> /// Прочитать shared.lua в список для манипуляций /// </summary> /// <param name="projName">Имя проекта для поиска модели</param> /// <param name="pathToSharedFile">Путь к файлу shared.lua</param> private void ReadModelSharedFileToList(string projName, string pathToSharedFile) { IProjectModel model = interprojectExchange.Models .Where(x => x.ProjectName == projName) .FirstOrDefault(); if (model != null) { model.SharedFileAsStringList = File .ReadAllLines(pathToSharedFile, EncodingDetector.DetectFileEncoding(pathToSharedFile)) .ToList(); } }
/// <summary> /// Detects encoding of a filestream when BOM is present /// </summary> /// <param name="fileName"></param> public static void ExtractEncodingByBOM(string fileName) { //ExStart:ExtractEncodingByBOM try { EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251)); //get file actual path String filePath = Common.GetFilePath(fileName); Stream stream = new FileStream(filePath, FileMode.Open); Console.WriteLine(detector.Detect(stream)); } catch (Exception ex) { Console.WriteLine(ex.Message); } //ExEnd:ExtractEncodingByBOM }
public void TestDetect([ValueSource(nameof(EncodingsWithPreamble))] Encoding encoding) { var preamble = encoding.GetPreamble(); preamble.Length.Should().BeGreaterOrEqualTo(1, "because this test doesn't make much sense if the encoding has no preamble"); using (var stream = new MemoryStream()) using (var writer = new StreamWriter(stream, encoding)) { writer.Write("Controversial opinion: White House Down is a better movie than Olympus has Fallen, ikr"); writer.Flush(); stream.Position = 0; var detector = new EncodingDetector(); detector.TryFindEncoding(stream).Should().Be(encoding); } }
public bool Load() { if (!File.Exists(FileName)) { return(false); } try { Sections.Clear(); using (var fileData = new StreamReader(FileName, FileEncoding, true)) { string firstLine = null; while (string.IsNullOrEmpty(firstLine) && !fileData.EndOfStream) { firstLine = fileData.ReadLine(); } if (!FileEncoding.Equals(Encoding.Unicode) && !FileEncoding.Equals(Encoding.BigEndianUnicode) && EncodingDetector.IsUnicode(firstLine)) { FileEncoding = Encoding.Unicode; } else { FileEncoding = fileData.CurrentEncoding; } fileData.Close(); } using (var fileData = new StreamReader(FileName, FileEncoding, true)) { LoadContent(fileData); fileData.Close(); } return(true); } catch (IOException ex) { System.Diagnostics.Trace.TraceError(ex.Message); return(false); } }
public ActionResult ExtractDocumentEndocing([FromBody] string fileName) { List <string> extractedText = new List <string>(); ExtractorFactory factory = new ExtractorFactory(); string filePath = Server.MapPath("../App_Data//Uploads//" + fileName); try { EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251)); Stream stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); extractedText.Add(detector.Detect(stream).ToString()); } catch (Exception ex) { extractedText.Add("File Format not supported"); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }
/// <summary> /// Чтение Shared файла альтернативного проекта /// </summary> /// <param name="pathToProjectsDir">Путь к каталогу с проектами</param> /// <param name="projName">Имя проекта</param> /// <returns></returns> public void LoadAdvancedProjectSharedLuaData(string pathToProjectsDir, string projName) { string pathToSharedFile = Path.Combine(pathToProjectsDir, projName, signalsFile); if (File.Exists(pathToSharedFile)) { var reader = new StreamReader(pathToSharedFile, EncodingDetector.DetectFileEncoding(pathToSharedFile)); string sharedInfo = reader.ReadToEnd(); reader.Close(); lua.DoString(sharedInfo); // Функция из Lua lua.DoString("init_advanced_project_shared_lua()"); ReadModelSharedFileToList(projName, pathToSharedFile); } }
private void DetectEncodingSyncAction(DetectEncodingFileDto dto) { string stopWatchKey = "DetectEncodingSyncAction_" + Thread.CurrentThread.ManagedThreadId; StopWatch.Start(stopWatchKey); //First try BOM detection and Unicode detection using Klerks Soft encoder //stream.Seek(0, SeekOrigin.Begin); Encoding encoding = EncodingDetector.Detect(dto.SampleBytes, EncodingDetector.Options.MLang); //Encoding encoding = null; //Encoding[] detected; //try //{ // detected = EncodingTools.DetectInputCodepages(dto.SampleBytes, 1); // if (detected.Length > 0) // encoding = detected[0]; //} //catch (COMException ex) //{ // // return default codepage on error //} lock (lockObj) { _totalFiles++; if (encoding != null) { _numFoundEncodings++; } } //WriteToConsole(dto.FilePath + ": " + encoding); StopWatch.Stop(stopWatchKey); }
public static Event?Recode(FileContext context) { byte[] content = File.ReadAllBytes(context.FullFileName); Encoding encoding = EncodingDetector.Detect(content); if (encoding != FileRecoder.win1252) { return(null); } int length = (int)(content.Length * 1.5); if (FileRecoder.resultContent.Length < length) { FileRecoder.resultContent = new byte[length]; } using (MemoryStream mem = new(FileRecoder.resultContent)) { foreach (byte b in content) { if (b >= 0x80) { byte[] buffer = FileRecoder.charmap1[b - 0x80]; mem.Write(buffer, 0, buffer.Length); } else { mem.WriteByte(b); } } length = (int)mem.Position; } using FileStream stream = File.OpenWrite(context.FullFileName); stream.Write(FileRecoder.resultContent, 0, length); return(null); }
/// <summary> /// Загрузка devices.lua файла проекта /// </summary> /// <param name="pathToProjectsDir"></param> /// <param name="projName"></param> private void LoadDevicesFile(string pathToProjectsDir, string projName) { string pathToDevices = Path.Combine(pathToProjectsDir, projName, deviceDescriptionFile); if (File.Exists(pathToDevices)) { var reader = new StreamReader(pathToDevices, EncodingDetector.DetectFileEncoding(pathToDevices)); string devicesInfo = reader.ReadToEnd(); reader.Close(); lua.DoString(devicesInfo); // Функция из Lua lua.DoString("system.init_dev_names()"); } else { form.ShowErrorMessage($"Не найден файл main.devices.lua " + $"проекта \"{projName}\""); } }
/// <summary> /// Чтение информации о ПЛК из main.io.lua /// </summary> /// <param name="pathToProjectsDir">Путь к папке с проектами</param> /// <param name="projName">Имя проекта</param> /// <returns></returns> private void LoadMainIOData(string pathToProjectsDir, string projName) { string pathToIOFile = Path.Combine(pathToProjectsDir, projName, devicesAndPLCFile); if (File.Exists(pathToIOFile)) { var reader = new StreamReader(pathToIOFile, EncodingDetector.DetectFileEncoding(pathToIOFile)); string ioInfo = reader.ReadToEnd(); reader.Close(); lua.DoString(ioInfo); // Функция из Lua lua.DoString("init_io_file()"); } else { MessageBox.Show($"Не найден файл main.io.lua проекта" + $" \"{projName}\"", "Ошибка", MessageBoxButtons.OK, MessageBoxIcon.Error); } }
/// <summary> /// Загрузка описание базовых объектов /// </summary> /// <returns>Описание</returns> private string LoadBaseTechObjectsDescription() { var fileName = "sys_base_objects_description.lua"; var pathToFile = Path.Combine( ProjectManager.GetInstance().SystemFilesPath, fileName); if (!File.Exists(pathToFile)) { string template = EasyEPlanner.Properties.Resources .ResourceManager .GetString("SysBaseObjectsDescriptionPattern"); File.WriteAllText(pathToFile, template, EncodingDetector.UTF8); MessageBox.Show("Файл с описанием базовых объектов не найден." + " Будет создан пустой файл (без описания).", "Ошибка", MessageBoxButtons.OK, MessageBoxIcon.Error); } var reader = new StreamReader(pathToFile, EncodingDetector.DetectFileEncoding(pathToFile)); string readedDescription = reader.ReadToEnd(); return(readedDescription); }
private async Task <Response> ParseFileText(string fileName, string folderName) { string logMsg = "ControllerName: GroupDocsParserController FileName: " + fileName + " FolderName: " + folderName; try { return(await ProcessTask(fileName, folderName, ".txt", false, "", delegate(string inFilePath, string outPath, string zipOutFolder) { EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251)); if (!Directory.Exists(zipOutFolder)) { Directory.CreateDirectory(zipOutFolder); } using (Stream stream = new FileStream(inFilePath, FileMode.Open)) { System.IO.File.WriteAllText(outPath, "Encoding: " + detector.Detect(stream, true) + Environment.NewLine); } ExtractorFactory factory = new ExtractorFactory(); MetadataExtractor metadataExtractor = factory.CreateMetadataExtractor(inFilePath); if (metadataExtractor != null) { MetadataCollection metadataCollection = metadataExtractor.ExtractMetadata(inFilePath); System.IO.File.AppendAllText(outPath, Environment.NewLine + "Metadata:" + Environment.NewLine); foreach (string key in metadataCollection.Keys) { System.IO.File.AppendAllText(outPath, string.Format("{0} = {1}", key, metadataCollection[key]) + Environment.NewLine); } } System.IO.File.AppendAllText(outPath, Environment.NewLine + "Parsed content:" + Environment.NewLine); string fileExt = Path.GetExtension(fileName).Substring(1).ToLower(); if (GetFormatType(fileExt) == FormatType.Excel) { CellsTextExtractor extractor = new CellsTextExtractor(inFilePath); extractor.ExtractMode = ExtractMode.Standard; for (int sheetIndex = 0; sheetIndex < extractor.SheetCount; sheetIndex++) { System.IO.File.AppendAllText(outPath, Environment.NewLine + "Sheet # " + extractor.SheetCount + Environment.NewLine); System.IO.File.AppendAllText(outPath, extractor.ExtractSheet(sheetIndex)); } } else { TextExtractor textExtractor = factory.CreateFormattedTextExtractor(inFilePath); if (textExtractor == null) { textExtractor = factory.CreateTextExtractor(inFilePath); } System.IO.File.AppendAllText(outPath, textExtractor.ExtractAll()); } })); } catch (Exception exc) { return(new Response { FileName = fileName, FolderName = folderName, OutputType = "txt", Status = exc.Message, StatusCode = 500, Text = exc.ToString() }); } }
public void IsXmlEncoded_InvalidInput_ReturnsFalse(string input) { // Arrange / Act / Assert Assert.IsFalse(EncodingDetector.IsXmlEncoded(input)); }
public void AssertXmlEncoded_InValidInput_ThrowsArgumentException(string input) { // Arrange / Act / Assert Assert.Throws <ArgumentException>( () => EncodingDetector.AssertXmlEncoded(input)); }