/// <summary> /// Parses data from a file and builds it into a collection of <see cref="Document"/> objects. /// </summary> /// <param name="file"></param> /// <param name="encoding"></param> /// <returns></returns> public DocumentCollection Import(FileInfo file, Encoding encoding) { List <string[]> records = parser.Parse(file, encoding); List <Document> documentList = builder.Build(records); DocumentCollection documents = new DocumentCollection(documentList); return(documents); }
public void Exporters_DatExporter_FromCsvTest() { // Arrange var mockReader = new Mock <TextReader>(); var mockWriter = new Mock <TextWriter>(); int calls = 0; mockReader .Setup(r => r.ReadLine()) .Returns(() => datLines[calls]) .Callback(() => calls++); List <string> output = new List <string>(); mockWriter .Setup(r => r.WriteLine(It.IsAny <string>())) .Callback((string s) => output.Add(s)); FileInfo infile = new FileInfo(@"X:\VOL001\infile.dat"); bool hasHeader = true; string keyColName = "DOCID"; string parentColName = "BEGATT"; string childColName = String.Empty; string childColDelim = ";"; RepresentativeBuilder repSetting = new RepresentativeBuilder("NATIVE", Representative.FileType.Native); List <RepresentativeBuilder> reps = new List <RepresentativeBuilder>(); reps.Add(repSetting); var builder = new DatBuilder(); IParser parser = new DatParser(Delimiters.CONCORDANCE); builder.ChildColumnName = childColName; builder.ChildSeparator = childColDelim; builder.RepresentativeBuilders = reps; builder.PathPrefix = infile.Directory.FullName; builder.HasHeader = hasHeader; builder.ParentColumnName = parentColName; builder.KeyColumnName = keyColName; // act List <string[]> records = parser.Parse(mockReader.Object); List <Document> documents = builder.Build(records); DocumentCollection docs = new DocumentCollection(documents); string[] fields = new string[] { "DOCID", "BEGATT", "VOLUME", "NATIVE" }; var exporter = DatExporter.Builder .Start(mockWriter.Object, fields) .SetDelimiters(Delimiters.PIPE_CARET) .Build(); exporter.Export(docs); // assert Assert.AreEqual("^DOCID^|^BEGATT^|^VOLUME^|^NATIVE^", output[0]); Assert.AreEqual("^DOC000001^|^DOC000001^|^VOL001^|^X:\\VOL001\\NATIVE\\0001\\DOC000001.XLSX^", output[1]); Assert.AreEqual("^DOC000002^|^DOC000001^|^VOL001^|^^", output[2]); }
public DocumentCollection GetDocs() { var datLines = new List <string>(new string[] { "þDOCIDþþBEGATTþþVOLUMEþþDOCTYPEþþNATIVEþ", "þDOC000001þþDOC000001þþVOL001þþEMAILþþX:\\VOL001\\NATIVE\\0001\\DOC000001.XLSXþ", "þDOC000002þþDOC000001þþVOL001þþPDFþþþ", null, null }); var optLines = new List <string[]> { new string[] { "DOC000001", "VOL001", "X:\\VOL001\\IMAGES\\0001\\DOC000001.jpg", "Y", "", "", "1" }, new string[] { "DOC000002", "VOL001", "X:\\VOL001\\IMAGES\\0001\\DOC000002.tif", "Y", "", "", "2" }, new string[] { "DOC000003", "VOL001", "X:\\VOL001\\IMAGES\\0001\\DOC000003.tif", "", "", "", "" } }; var mockReader = new Mock <TextReader>(); int calls = 0; mockReader .Setup(r => r.ReadLine()) .Returns(() => datLines[calls]) .Callback(() => calls++); FileInfo infile = new FileInfo(@"X:\VOL001\infile.dat"); bool hasHeader = true; string keyColName = "DOCID"; string parentColName = "BEGATT"; string childColName = String.Empty; string childColDelim = ";"; RepresentativeBuilder repSetting = new RepresentativeBuilder("NATIVE", Representative.FileType.Native); List <RepresentativeBuilder> reps = new List <RepresentativeBuilder>(); reps.Add(repSetting); var builder = new DatBuilder(); IParser parser = new DatParser(Delimiters.CONCORDANCE); List <string[]> records = parser.Parse(mockReader.Object); builder.HasHeader = hasHeader; builder.KeyColumnName = keyColName; builder.ParentColumnName = parentColName; builder.ChildColumnName = childColName; builder.ChildSeparator = childColDelim; builder.RepresentativeBuilders = reps; builder.ParentColumnName = infile.Directory.FullName; List <Document> documents = builder.Build(records); var docs = new DocumentCollection(documents); var optBuilder = new OptBuilder(); optBuilder.PathPrefix = String.Empty; optBuilder.TextBuilder = null; List <Document> optDocs = optBuilder.Build(optLines); docs.AddRange(optDocs); docs[1].SetParent(docs[0]); return(docs); }
public void Exporters_OptExporter_FromCsvTest() { // Arrange var mockReader = new Mock <TextReader>(); var mockWriter = new Mock <TextWriter>(); int calls = 0; mockReader .Setup(r => r.ReadLine()) .Returns(() => datLines[calls]) .Callback(() => calls++); List <string> output = new List <string>(); mockWriter .Setup(r => r.WriteLine(It.IsAny <string>())) .Callback((string s) => output.Add(s)); FileInfo infile = new FileInfo(@"X:\VOL001\infile.dat"); bool hasHeader = true; string keyColName = "DOCID"; string parentColName = "BEGATT"; string childColName = String.Empty; string childColDelim = ";"; string vol = "TEST001"; RepresentativeBuilder repSetting = new RepresentativeBuilder("NATIVE", Representative.FileType.Native); List <RepresentativeBuilder> reps = new List <RepresentativeBuilder>(); reps.Add(repSetting); var builder = new DatBuilder(); IParser parser = new DatParser(Delimiters.CONCORDANCE); builder.HasHeader = hasHeader; builder.KeyColumnName = keyColName; builder.ParentColumnName = parentColName; builder.ChildColumnName = childColName; builder.ChildSeparator = childColDelim; builder.RepresentativeBuilders = reps; builder.PathPrefix = infile.Directory.FullName; // act List <string[]> records = parser.Parse(mockReader.Object); List <Document> documents = builder.Build(records); DocumentCollection docs = new DocumentCollection(documents); var exporter = OptExporter.Builder.Start(mockWriter.Object).SetVolumeName(vol).Build(); exporter.Export(docs); // assert Assert.IsTrue(output.Count == 0); }
public void TestSetup() { datLines = new List <string>(new string[] { "þDOCIDþþBEGATTþþVOLUMEþþNATIVEþ", "þDOC000001þþDOC000001þþVOL001þþX:\\VOL001\\NATIVE\\0001\\DOC000001.XLSXþ", "þDOC000002þþDOC000001þþVOL001þþþ", null, null }); var mockReader = new Mock <TextReader>(); int calls = 0; mockReader .Setup(r => r.ReadLine()) .Returns(() => datLines[calls]) .Callback(() => calls++); FileInfo infile = new FileInfo(@"X:\VOL001\infile.dat"); bool hasHeader = true; string keyColName = "DOCID"; string parentColName = "BEGATT"; string childColName = String.Empty; string childColDelim = ";"; RepresentativeBuilder repSetting = new RepresentativeBuilder("NATIVE", Representative.FileType.Native); List <RepresentativeBuilder> reps = new List <RepresentativeBuilder>(); reps.Add(repSetting); var builder = new DatBuilder(); IParser parser = new DatParser(Delimiters.CONCORDANCE); List <string[]> records = parser.Parse(mockReader.Object); builder.HasHeader = hasHeader; builder.KeyColumnName = keyColName; builder.ParentColumnName = parentColName; builder.ChildColumnName = childColName; builder.ChildSeparator = childColDelim; builder.RepresentativeBuilders = reps; builder.ParentColumnName = infile.Directory.FullName; List <Document> documents = builder.Build(records); docs = new DocumentCollection(documents); }
static void ReplaceDatImages(string srcDatPath, string destDatPath, string replacementList) { List <string> tempPaths = new List <string>(); try { using (DatReader dat = new DatReader(File.OpenRead(srcDatPath))) { DatBuilder builder = new DatBuilder(dat); using (StreamReader sr = File.OpenText(replacementList)) { while (!sr.EndOfStream) { var line = sr.ReadLine().Trim(); if (line.Length == 0 || line.StartsWith("#")) { continue; } var lineSplit = line.Split(' ', 2, StringSplitOptions.RemoveEmptyEntries); if (lineSplit.Length != 2) { throw new InvalidDataException($"Invalid line \"{line}\"."); } if (!int.TryParse(lineSplit[0], out var imageIndex)) { throw new InvalidDataException($"Invalid index on line \"{line}\"."); } byte level = 1; ushort bufferBase = 0; ushort paletteBufferBase = 0; if (imageIndex < dat.EntriesCount) { using (MemoryStream ms = new MemoryStream(dat.GetData(imageIndex))) { TxmHeader txm = new TxmHeader(); txm.Read(new BinaryReader(ms)); level = (byte)(txm.Misc & 0x0f); bufferBase = txm.ImageBufferBase; paletteBufferBase = txm.ClutBufferBase; } } string tempPath = Path.GetTempFileName(); tempPaths.Add(tempPath); using (FileStream fs = File.Create(tempPath)) { TxmConversion.ConvertImageToTxm(lineSplit[1], fs, level, bufferBase, paletteBufferBase); } builder.ReplacementEntries.Add(new DatBuilder.ReplacementEntry { Index = imageIndex, SourceFile = tempPath }); } } using (FileStream fs = File.Create(destDatPath)) { builder.Build(fs); } } } finally { foreach (var path in tempPaths) { File.Delete(path); } } }
public void Transformers_Transformer_Test() { // Arrange List <Transformation> edits = new List <Transformation>(); var builder = new DatBuilder(); builder.HasHeader = true; builder.KeyColumnName = "DOCID"; builder.ParentColumnName = "BEGATT"; builder.ChildColumnName = ""; builder.ChildSeparator = ";"; builder.RepresentativeBuilders = new RepresentativeBuilder[] { new RepresentativeBuilder("NATIVE", Representative.FileType.Native) }; builder.PathPrefix = String.Empty; // edit 1 string fieldName = "VOLUME"; Regex find = new Regex("VOL"); string replace = "TEST"; string altDest = null; string prepend = null; string append = null; string join = null; Regex filterText = null; string filterField = null; DirectoryInfo dir = null; edits.Add(MetaDataTransformation.Builder .Start(fieldName, find, replace, filterField, filterText) .SetAltDestinationField(altDest) .SetPrependField(prepend) .SetAppendField(append) .SetJoinDelimiter(join) .SetPrependDir(dir) .Build()); // edit 2 fieldName = "TEST3"; find = null; replace = String.Empty; prepend = "VOLUME"; edits.Add(MetaDataTransformation.Builder .Start(fieldName, find, replace, filterField, filterText) .SetAltDestinationField(altDest) .SetPrependField(prepend) .SetAppendField(append) .SetJoinDelimiter(join) .SetPrependDir(dir) .Build()); // edit 3 fieldName = "TEST1"; prepend = String.Empty; append = "VOLUME"; filterText = new Regex("a"); filterField = "TEST1"; join = "-"; edits.Add(MetaDataTransformation.Builder .Start(fieldName, find, replace, filterField, filterText) .SetAltDestinationField(altDest) .SetPrependField(prepend) .SetAppendField(append) .SetJoinDelimiter(join) .SetPrependDir(dir) .Build()); // edit 4 fieldName = "TEST2"; append = String.Empty; altDest = "VOLUME"; filterText = new Regex("j"); edits.Add(MetaDataTransformation.Builder .Start(fieldName, find, replace, filterField, filterText) .SetAltDestinationField(altDest) .SetPrependField(prepend) .SetAppendField(append) .SetJoinDelimiter(join) .SetPrependDir(dir) .Build()); // edit 5 altDest = String.Empty; filterText = null; find = new Regex("E+$"); replace = "x"; edits.Add(MetaDataTransformation.Builder .Start(fieldName, find, replace, filterField, filterText) .SetAltDestinationField(altDest) .SetPrependField(prepend) .SetAppendField(append) .SetJoinDelimiter(join) .SetPrependDir(dir) .Build()); // edit 6 edits.Add(RepresentativeTransformation.Builder .Start(Representative.FileType.Native, new Regex("X:\\\\VOL001\\\\"), String.Empty) .Build()); // act List <Document> documents = builder.Build(records); DocumentCollection docs = new DocumentCollection(documents); Transformer transformer = new Transformer(); transformer.Transform(docs, edits.ToArray()); // assert Assert.AreEqual("TEST001", docs[0].Metadata["VOLUME"]); Assert.AreEqual("TEST001k", docs[0].Metadata["TEST3"]); Assert.AreEqual("a-TEST001", docs[0].Metadata["TEST1"]); Assert.AreEqual("b", docs[1].Metadata["TEST1"]); Assert.AreEqual("DOOR", docs[9].Metadata["VOLUME"]); Assert.AreEqual("DOOR", docs[9].Metadata["TEST2"]); Assert.AreEqual("BUCKLx", docs[2].Metadata["TEST2"]); Assert.AreEqual("SHOx", docs[4].Metadata["TEST2"]); Assert.AreEqual("THRx", docs[5].Metadata["TEST2"]); Assert.AreEqual("THx", docs[8].Metadata["TEST2"]); Assert.AreEqual(Representative.FileType.Native, docs[0].Representatives.First().Type); Assert.AreEqual("NATIVE\\0001\\DOC000001.XLSX", docs[0].Representatives.First().Files.First().Value); }
public void Write(Stream stream) { Encoding shiftJisEncoding = Encoding.GetEncoding(932); List <string> tempFiles = new List <string>(); try { ushort[] charRanges = new ushort[] { 0x0, 0x889f, 0x989f }; // General, Kanji 1, Kanji 2 uint[] charCounts = new uint[charRanges.Length]; // Build mapping from Unicode to Shift-JIS List <Tuple <char, ushort> > encodedMapping = new List <Tuple <char, ushort> >(); char[] chArray = new char[1]; foreach (var ch in Characters) { chArray[0] = ch; byte[] encodedBytes = shiftJisEncoding.GetBytes(chArray); ushort encoded = (ushort)((encodedBytes[0] << 8) | encodedBytes[1]); encodedMapping.Add(new Tuple <char, ushort>(ch, encoded)); } encodedMapping.Sort((x, y) => x.Item2.CompareTo(y.Item2)); string infoFsPath = Path.GetTempFileName(); tempFiles.Add(infoFsPath); int currentRange = -1; FileStream currentRangeFs = null; BinaryWriter currentRangeBw = null; DatBuilder datBuilder = new DatBuilder(); datBuilder.ReplacementEntries.Add(new DatBuilder.ReplacementEntry { Index = 0, SourceFile = infoFsPath }); using (FileStream infoFs = File.Create(infoFsPath)) { BinaryWriter infoBw = new BinaryWriter(infoFs); infoBw.Write(new byte[4 + 4 * charCounts.Length]); // Dummy header try { foreach (var pair in encodedMapping) { // Advance range if char matches next range's start if (currentRange < charRanges.Length - 1 && pair.Item2 >= charRanges[currentRange + 1]) { string path = Path.GetTempFileName(); tempFiles.Add(path); if (currentRangeFs != null) { currentRangeFs.Close(); } currentRangeFs = File.Create(path); currentRangeBw = new BinaryWriter(currentRangeFs); ++currentRange; datBuilder.ReplacementEntries.Add(new DatBuilder.ReplacementEntry { Index = currentRange + 1, SourceFile = path }); } infoBw.Write(pair.Item2); currentRangeBw.Write(this[pair.Item1]); ++charCounts[currentRange]; } } finally { if (currentRangeFs != null) { currentRangeFs.Close(); } } uint length = (uint)infoFs.Length; // Pad to nearest 16 bytes var paddedLength = (length + 15) & ~15; infoBw.Write(new byte[paddedLength - length]); infoFs.Seek(0, SeekOrigin.Begin); infoBw.Write(length); foreach (var count in charCounts) { infoBw.Write(count); } } datBuilder.Build(stream); } finally { foreach (var file in tempFiles) { File.Delete(file); } } }