private void UpdateFile() { // to read original file StreamFeeder origFFeeder = new StreamFeeder(_origFilePath); TagParser origFParser = new TagParser(origFFeeder); // to create merged file writer FileHelper.CheckPath(tempUpdFilePath, true); _writer = new StreamWriter(tempUpdFilePath); NormalizeFileComments(); bool isCmtFound = false; bool isTagFound = false; bool isFmtFound = false; while (origFParser.Next()) { // add comments in header // add fmt-defs in header // add tags in header if (origFParser.TagID == Tags.HeaderEnd) { if (!isFmtFound && _headerNewFmts.Count > 0) { WriteToFile(string.Format("{0}>", TagDirectory.FmtDefsStart)); WriteFmtsToFile(); WriteToFile(TagDirectory.FmtDefsEnd); } if (!isTagFound && _headerNewTags.Count > 0) { WriteToFile(string.Format("{0}>", TagDirectory.TagDefsStart)); WriteTagsToFile(); WriteToFile(TagDirectory.TagDefsEnd); } if (_isFileCommentNew) { WriteToFile(string.Format("{0} {1}=\"{2}\" />", TagDirectory.FileCmtStart, TagDirectory.AttrID, _origFileCommentID)); } } // add comments in doc-info if (origFParser.TagID == Tags.DocInfoEnd && _headerComments.Count > 0 && !isCmtFound) { WriteToFile(string.Format("{0}>", TagDirectory.CmtsStart)); WriteCmtsToFile(); WriteToFile(TagDirectory.CmtsEnd); isCmtFound = true; } if (origFParser.TagID == Tags.FileStart && _headerComments.Count > 0 && !isCmtFound) { WriteToFile(string.Format("{0} {1}=\"{2}\">", TagDirectory.DocInfoStart, TagDirectory.AttrXmlNs, TagDirectory.AttrXmlNsValue)); WriteToFile(string.Format("{0}>", TagDirectory.CmtsStart)); WriteCmtsToFile(); WriteToFile(TagDirectory.CmtsEnd); WriteToFile(TagDirectory.DocInfoEnd); } // add fmts in header if (origFParser.TagID == Tags.FmtDefsEnd && _headerNewFmts.Count > 0) { WriteFmtsToFile(); isFmtFound = true; } // --- always write to file everything --- WriteToFile(origFParser.ParsedText); // add comments in doc-info if (origFParser.TagID == Tags.CmtsStart && _headerComments.Count > 0) { WriteCmtsToFile(); isCmtFound = true; } // add tags in header if (origFParser.TagID == Tags.TagDefsStart && _headerNewTags.Count > 0) { WriteTagsToFile(); isTagFound = true; } } origFParser.Dispose(); _writer.Dispose(); RenameUpdFiles(); }
private void DoMerge() { string fileName = ""; bool isTUFound = false; bool isNewTagFound = false; bool anyVal = false; // << to encrease performance int tuNumber = 0; int tuCount = 0; int wordCount = 0; _mergedFName = new List <string>(); _origFileCommentID = Guid.NewGuid().ToString(); // create perser for split info file _infoFFeeder = new StreamFeeder(_infoFilePath); _infoFParser = new TagParser(_infoFFeeder); // to create merged file writer FileHelper.CheckPath(tempWriteFilePath, true); _writer = new StreamWriter(tempWriteFilePath); // to read original file StreamFeeder origFFeeder = new StreamFeeder(_origFilePath); TagParser origFParser = new TagParser(origFFeeder); // to read split files string splitFName = Path.GetFileName(_inFiles[0]); StreamFeeder splitFFeeder = new StreamFeeder(string.Format(@"{0}\{1}", _inFilesPath, splitFName)); TagParser splitFParser = new TagParser(splitFFeeder); _splitFFmts = new Dictionary <string, string>(); _mergedFName.Add(splitFName); while (origFParser.Next()) { if (!origFParser.IsInTransUnit && !origFParser.IsInCmtDef) { // always write to file everything but trans-units and comments as exception WriteToFile(origFParser.ParsedText); // read comment id in header (file level comments) if (origFParser.IsInFileCmtClosed && !string.IsNullOrEmpty(origFParser.FileCmtIDAttr)) { _origFileCommentID = origFParser.FileCmtIDAttr; } // read tag id in header if (origFParser.TagID == Tags.TagStart && !string.IsNullOrEmpty(origFParser.TagIDAttr)) { AddTagIDToList(origFParser.TagIDAttr); } // read fmt-def id in header if (origFParser.TagID == Tags.FmtDefStart && !string.IsNullOrEmpty(origFParser.FmtDefIDAttr)) { int.TryParse(origFParser.FmtDefIDAttr, out _origFileFmtID); } // read fmt-defs written to merged file if (origFParser.IsInFmtDef) { AddFmtDefOrigToDict(origFParser.FmtDefIDAttr, origFParser.ParsedText); } } else if (origFParser.TagID == Tags.TransUnitStart) { // check if current TU number >= number of all TUs in split file if (tuCount == 0 || tuNumber >= tuCount) { // find next split file name fileName = getFileNameFromSplitInfo(); tuCount = getTUCountFromSplitInfo(); wordCount = getWordCountFromSplitInfo(); tuNumber = 0; if (wordCount > 0) { splitFName = ""; // validate if (string.IsNullOrEmpty(fileName) || !File.Exists(getInFilePath(fileName))) { throw new FileNotFoundException(string.Format(Properties.StringResource.errSplitFileNotFound, fileName)); } else if (getInFileHash(fileName) != _fileOrigHash) { throw new InvalidDataException(Properties.StringResource.errCanNotMerge); } else { // open new split file splitFName = fileName; splitFParser.Dispose(); splitFFeeder = new StreamFeeder(string.Format(@"{0}\{1}", _inFilesPath, splitFName)); splitFParser = new TagParser(splitFFeeder); _splitFFmts = new Dictionary <string, string>(); if (!_mergedFName.Contains(splitFName)) { _mergedFName.Add(splitFName); } } } } isTUFound = false; isNewTagFound = false; if (splitFName.Length > 0) { while (splitFParser.Next()) { // read trans-units if (splitFParser.IsInTransUnit) { // write to file trans-units from split file WriteToFile(splitFParser.ParsedText); // read comment ids in body (segment level comments) if (splitFParser.IsInMrkComm && splitFParser.TagID == Tags.MrkStart && !string.IsNullOrEmpty(splitFParser.MrkCommCIDAttr)) { AddCommentIDToList(splitFParser.MrkCommCIDAttr); } if (splitFParser.TagID == Tags.TransUnitEnd) { isTUFound = true; break; } } // read comments in doc-info else if (splitFParser.IsInCmtDef && !string.IsNullOrEmpty(splitFParser.CmtDefIDAttr)) { AddCommentToDict(splitFParser.CmtDefIDAttr, splitFParser.ParsedText, (splitFParser.TagID == Tags.CmtDefStart)); } // read comment ids in header (file level comments) else if (splitFParser.IsInFileCmtClosed && !string.IsNullOrEmpty(splitFParser.FileCmtIDAttr)) { AddHdrCommentIDToList(splitFParser.FileCmtIDAttr); } // read tags in header -- optimized else if (splitFParser.IsInTag) { if (splitFParser.TagID == Tags.TagStart) { if (_fileTagIDs.TryGetValue(splitFParser.TagIDAttr, out anyVal)) { isNewTagFound = false; } else { isNewTagFound = true; } } //else if (splitFParser.IsInTag && !_fileTagIDs.Contains(splitFParser.TagIDAttr)) if (isNewTagFound) { string pText = splitFParser.ParsedText; if (splitFParser.IsInFmtClosed) { // change format tag id pText = string.Format("{0} {1}=\"{2}\"/>", TagDirectory.FmtStart, TagDirectory.AttrID, FindFmtDef(splitFParser.FmtIDAttr)); } AddTagToDict(splitFParser.TagIDAttr, pText, (splitFParser.TagID == Tags.TagStart)); } if (splitFParser.TagID == Tags.TagEnd) { isNewTagFound = false; } } // read fmt-defs in header else if (splitFParser.IsInFmtDef && !string.IsNullOrEmpty(splitFParser.FmtDefIDAttr)) { AddFmtDefToDict(splitFParser.FmtDefIDAttr, splitFParser.ParsedText); } } } if (!isTUFound) { throw new InvalidDataException(string.Format(Properties.StringResource.errTUNotFound, origFParser.TransUnitIDAttr, splitFName, tuNumber)); } tuNumber++; #region older version //// get trans-unit id //tuID = origFParser.TransUnitIDAttr; //// find split file name for current trans-unit //tuFileName = getFileNameFromSplitInfo(tuID); //if (string.IsNullOrEmpty(tuFileName) || !File.Exists(getInFilePath(tuFileName))) // throw new FileNotFoundException(string.Format("Split file '{0}' not found.", tuFileName)); //else if (getInFileHash(tuFileName) != _fileOrigHash) // throw new InvalidDataException("Cannot merge files. Possible reasons: " + // "1 - Original file was modified after the split. 2 - Split files do not correspond the indicated original file."); //else //{ // // create reader for new split file // if (!string.Equals(tuFileName, splitFName)) // { // splitFName = tuFileName; // splitFParser.Dispose(); // splitFFeeder = new StreamFeeder(string.Format(@"{0}\{1}", _inFilesPath, splitFName)); // splitFParser = new TagParser(splitFName); // if (!_mergedFName.Contains(splitFName)) // _mergedFName.Add(splitFName); // } // // find trans-unit with current id // // write the whole trans-unit to merged file // isTUFound = false; // fileReads = 0; // while (!isTUFound && fileReads < 2) // { // while (splitFParser.Next()) // { // // read trans-units // if (splitFParser.IsInTransUnit && splitFParser.TransUnitIDAttr == tuID) // { // // write to file trans-units from split file // WriteToFile(splitFParser.ParsedText); // // read comment ids in body (segment level comments) // if (splitFParser.IsInMrkComm && splitFParser.TagID == Tags.MrkStart && !string.IsNullOrEmpty(splitFParser.MrkCommCIDAttr)) // AddCommentIDToList(splitFParser.MrkCommCIDAttr); // if (splitFParser.TagID == Tags.TransUnitEnd) // { isTUFound = true; break; } // } // // read comments in doc-info // else if (splitFParser.IsInCmtDef && !string.IsNullOrEmpty(splitFParser.CmtDefIDAttr)) // AddCommentToDict(splitFParser.CmtDefIDAttr, splitFParser.ParsedText, (splitFParser.TagID == Tags.CmtDefStart)); // // read comment ids in header (file level comments) // else if (splitFParser.IsInFileCmtClosed && !string.IsNullOrEmpty(splitFParser.FileCmtIDAttr)) // AddHdrCommentIDToList(splitFParser.FileCmtIDAttr); // } // if (!isTUFound) // { // // start reading this file again // fileReads++; // splitFParser = new TagParser(splitFFeeder); // } // } // // if trans-unit not found - throw ex // if (!isTUFound) // throw new InvalidDataException(string.Format("Trans-unit '{0}' not found in file '{1}'.", tuID, splitFName)); //} #endregion } // report current operation progress ProgressMerge(origFParser.Progress()); } origFParser.Dispose(); splitFParser.Dispose(); _infoFParser.Dispose(); _writer.Dispose(); RenameFiles(); }
private void SetWordsCount(int partsNumber) { int wordsInFile = 0; //int segIDNum = 0; string mrkID = ""; string segID = ""; // Words Count dictionary for every segment Dictionary <string, int> segmentCountWords = new Dictionary <string, int>(); StreamFeeder feeder = new StreamFeeder(_fPath); TagParser tagParser = new TagParser(feeder); bool isMrkFound = false; while (tagParser.Next()) { // look for text in body only if (tagParser.IsInBody) { // if TEXT found -- count segment words and save in dictionary if (!tagParser.IsTag) { if (tagParser.IsInMrkText && tagParser.IsInSegSource && tagParser.isUnitTranslatable) { mrkID = tagParser.MrkMIDAttr; if (mrkID.Length > 0) { AddCountToDict(mrkID, TextHelper.GetWordsCount(tagParser.ParsedText, tagParser.SourceLangAttr), ref segmentCountWords); } isMrkFound = true; } } else { // get info about segments, count words in group // that satisfy the conditions if (tagParser.TagID == Tags.SegStart) { segID = tagParser.ParsedText.AttributeValue(TagDirectory.AttrID) ?? ""; if (segID.Length > 0) { if (isGroupWordsCountable(tagParser.ParsedText.AttributeValue(TagDirectory.AttrSegConf), tagParser.ParsedText.AttributeValue(TagDirectory.AttrSegPerc), tagParser.ParsedText.AttributeValue(TagDirectory.AttrSegLocked))) { if (segmentCountWords.ContainsKey(segID)) { wordsInFile += segmentCountWords[segID]; } } } } // clear the Words Count dictionary for every segment else if (tagParser.TagID == Tags.SegDefsEnd) { segmentCountWords = new Dictionary <string, int>(); } } } // report current operation progress ProgressSplit(tagParser.Progress(), 1); } tagParser.Dispose(); // file is not pre-processed if (!isMrkFound) { throw new InvalidDataException(Properties.StringResource.errFileUnexpectedStructure); } // calculate number of words for a file _wordsMax = wordsInFile / partsNumber; if (wordsInFile % partsNumber > 0) { _wordsMax += 1; } }
/// <summary> /// isLastFileEmpty - if true - we delete last file and update previous, /// if false - we update last file as usual /// </summary> /// <param name="isLastFileEmpty"></param> private void UpdateFile(bool isLastFileEmpty) { bool anyVal = false; // << to encrease performance Dictionary <string, bool> addedTags = new Dictionary <string, bool>(); if (isLastFileEmpty) { _outFCount--; } // to read split file StreamFeeder splitFFeeder = new StreamFeeder(currFileOut); TagParser splitFParser = new TagParser(splitFFeeder); // to create split file writer FileHelper.CheckPath(tempUpdFilePath, true); _writer = new StreamWriter(tempUpdFilePath); while (splitFParser.Next()) { // read all TUs from last file and write to previous if (isLastFileEmpty && splitFParser.TagID == Tags.BodyEnd) { _outFCount++; StreamFeeder lastFFeeder = new StreamFeeder(currFileOut); TagParser lastFParser = new TagParser(lastFFeeder); while (lastFParser.Next()) { if (lastFParser.IsInBody && lastFParser.TagID != Tags.BodyStart && lastFParser.TagID != Tags.BodyEnd) { WriteToFile(lastFParser.ParsedText); } } // delete last file if (isLastFileEmpty) { File.Delete(currFileOut); } _outFCount--; } // always write to file everything WriteToFile(splitFParser.ParsedText); // add tags in tag_defs if (splitFParser.TagID == Tags.TagDefsStart) { addedTags = new Dictionary <string, bool>(); // find tags that need to be written // write tag text to file foreach (KeyValuePair <string, string> tag in _headerTags) { bool isTagToWrite = true; if (_headerTagSubs != null && _headerTagSubs.ContainsKey(tag.Key)) { string[] tagTUs = _headerTagSubs[tag.Key].Split('|'); foreach (string tagTU in tagTUs) { if (_splitFileTUs.TryGetValue(tagTU, out anyVal)) { } else { isTagToWrite = false; break; } } } if (isTagToWrite) { if (!isLastFileEmpty) { addedTags.Add(tag.Key, false); WriteToFile(tag.Value); } else if (_prevFileTags != null && _prevFileTags.TryGetValue(tag.Key, out anyVal)) { } else { addedTags.Add(tag.Key, false); WriteToFile(tag.Value); } } } } } // restore the list of tags added to file _prevFileTags = addedTags; splitFFeeder.Dispose(); _writer.Dispose(); RenameUpdFiles(); }
private void DoSplit() { int countWords = 0; string mrkID = ""; bool isStart = true; string segID; // int segIDNum; string segToSplit = ""; // Words Count dictionary for every segment Dictionary <string, int> segmentCountWords = new Dictionary <string, int>(); List <string> xTagTUs = new List <string>(); _splitFileTUs = new Dictionary <string, bool>(); // generate file hash string _fileHash = FileHelper.SHA1HashFile(_fPath); // Init writer _outFCount = 1; FileHelper.CheckPath(currFileOut, true); FileHelper.CheckPath(_outPath, true); _writer = new StreamWriter(currFileOut); _infoWriter = new SplitInfoWriter(_fPath, _fileHash, _outPath); _infoWriter.WriteFileTag(Path.GetFileName(currFileOut)); StreamFeeder feeder = new StreamFeeder(_fPath); TagParser tagParser = new TagParser(feeder); bool isFileCorrupt = false; bool isFileEmpty = true; bool isMrkFound = false; while (tagParser.Next() && !isFileCorrupt) { #region BODY LOGIC if (tagParser.IsInBody) { // if TEXT found -- count segment words and save in dictionary if (!tagParser.IsTag) { if (tagParser.IsInMrkText && tagParser.IsInSegSource && tagParser.isUnitTranslatable) { mrkID = tagParser.MrkMIDAttr; if (mrkID.Length > 0) { // AddCountToDict(mrkID, TextHelper.GetWordsCountEng(tagParser.ParsedText), ref segmentCountWords); AddCountToDict(mrkID, TextHelper.GetWordsCount(tagParser.ParsedText, tagParser.SourceLangAttr), ref segmentCountWords); } isMrkFound = true; } } // if TAG found else { // get info about segments, count words in group // that satisfy the conditions if (tagParser.TagID == Tags.SegStart) { segID = tagParser.ParsedText.AttributeValue(TagDirectory.AttrID) ?? ""; if (segID.Length > 0) { if (_options.Criterion == SplitOptions.SplitType.SegmentNumbers) { SetSegToSplit(segID, ref segToSplit); } if (isGroupWordsCountable(tagParser.ParsedText.AttributeValue(TagDirectory.AttrSegConf), tagParser.ParsedText.AttributeValue(TagDirectory.AttrSegPerc), tagParser.ParsedText.AttributeValue(TagDirectory.AttrSegLocked))) { if (segmentCountWords.ContainsKey(segID)) { countWords += segmentCountWords[segID]; } } } } // clear the Words Count dictionary for every segment else if (tagParser.TagID == Tags.SegDefsEnd) { segmentCountWords = new Dictionary <string, int>(); } // we are at groups edge - check for words count >= max words count // to start new file else if ((tagParser.TagID == Tags.GroupStart || tagParser.TagID == Tags.TransUnitStart) && tagParser.isBodyParent) { if (xTagTUs.Count == 0 || xTagTUs.Intersect(_splitFileTUs.Keys).Count() == xTagTUs.Count) { if ((_options.Criterion == SplitOptions.SplitType.SegmentNumbers && segToSplit.Length > 0 && !_segmentIDsFound.Contains(segToSplit)) || (_options.Criterion != SplitOptions.SplitType.SegmentNumbers && countWords >= _wordsMax)) { // write to split info file _infoWriter.WriteTransUnitsCountTag(_splitFileTUs.Count); _infoWriter.WriteWordsCountTag(countWords); AddSplitSegmentIDs(segToSplit); WriteToNewFile(); countWords = 0; segToSplit = ""; // clear TUs log lists _splitFileTUs = new Dictionary <string, bool>(); xTagTUs = new List <string>(); } } } // get trans-unit ids in <x> tag else if (tagParser.TagID == Tags.XStart && !string.IsNullOrEmpty(tagParser.XTagXIDAttr)) { string[] xTUs = tagParser.XTagXIDAttr.Split(' '); foreach (string xTU in xTUs) { if (!xTagTUs.Contains(xTU)) { xTagTUs.Add(xTU); } } } // get any trans-unit id and write it to splitinfo file & list of trans-units of one split file if (tagParser.TagID == Tags.TransUnitStart) { if (!_splitFileTUs.ContainsKey(tagParser.TransUnitIDAttr)) { _splitFileTUs.Add(tagParser.TransUnitIDAttr, false); } } } isFileEmpty = false; } #endregion #region HEADER LOGIC (<tag id=""...> optimization only) else if (tagParser.IsInHeader) { // we are handling tags separately if (tagParser.IsInTag) { AddTagToDict(tagParser.TagIDAttr, tagParser.ParsedText, (tagParser.TagID == Tags.TagStart)); if (tagParser.TagID == Tags.TagSubStart && !string.IsNullOrEmpty(tagParser.TagSubXIDAttr)) { AddTagSubToDict(tagParser.TagIDAttr, tagParser.TagSubXIDAttr); } } // just write header to write it to all the split files else if (!tagParser.IsInReference) { _header.Append(tagParser.ParsedText); } } #endregion #region DOCINFO LOGIC else if (tagParser.IsInDocInfo) { _docinfo.Append(tagParser.ParsedText); } #endregion #region OTHER else { if (tagParser.TagID == Tags.FileStart) { if (!string.IsNullOrEmpty(_fileTag)) { isFileCorrupt = true; } _fileTag = tagParser.ParsedText; } else if (tagParser.TagID == Tags.XliffStart) { _xliffTag = tagParser.ParsedText; } // XML Declaration else if (isStart) { _xliffDeclar = tagParser.ParsedText; isStart = false; } } #endregion // if not <tag> tag - always write to file // if not reference - optimized split files if (!tagParser.IsInTag && !tagParser.IsInReference) { WriteToFile(tagParser.ParsedText); } // report current operation progress ProgressSplit(tagParser.Progress(), 2); } tagParser.Dispose(); _writer.Dispose(); // write to split info file _infoWriter.WriteTransUnitsCountTag(_splitFileTUs.Count); _infoWriter.WriteWordsCountTag(countWords); _infoWriter.CloseWrite(_outFCount.HasValue ? _outFCount.Value : 0); AddSplitSegmentIDs(segToSplit); // validate file (throws exception) this.ValidateFileAfterParsing(isFileCorrupt, isMrkFound, isFileEmpty); // last file is empty if (countWords == 0) { UpdateFile(true); } // read & write split file again to add tags into <tag_defs> else if (_headerTags.Count > 0) { UpdateFile(false); } }