/// <summary> /// Tell whether one segment file is a valid one. /// </summary> /// <param name="filePath">Segment file to test.</param> /// <param name="setting">Setting.</param> /// <returns>True if valid, otherwise false.</returns> public static bool IsValid(string filePath, SegmentSetting setting) { if (string.IsNullOrEmpty(filePath)) { throw new ArgumentNullException("filePath"); } if (setting == null) { throw new ArgumentNullException("setting"); } Collection<WaveSegment> segments = null; try { segments = ReadAllData(filePath, setting); } catch (InvalidDataException ide) { string message = string.Format(CultureInfo.InvariantCulture, "Invalid data found for alignment file [{0}], for {1}", filePath, ide.Message); System.Diagnostics.Trace.WriteLine(message); return false; } if (segments.Count < 3 && setting.HasHeadSilence && setting.HasTailSilence) { // should start/end with Phoneme.Silence tags, and at least one other segment return false; } if ((setting.HasHeadSilence && !segments[0].IsSilencePhone) || (setting.HasTailSilence && !segments[segments.Count - 1].IsSilencePhone)) { // should start/end with Phoneme.Silence tags return false; } for (int i = 1; i < segments.Count; i++) { if (segments[i - 1].StartTime >= segments[i].StartTime) { // timestamp of preview segment should less than following one. return false; } } for (int i = 0; setting.HasEndTime && i < segments.Count; i++) { if (segments[i].StartTime > segments[i].EndTime) { return false; } } return true; }
/// <summary> /// Save this segment data into TextWriter. /// </summary> /// <param name="tw">Text writer to write the segments.</param> /// <param name="setting">Setting.</param> public void Save(TextWriter tw, SegmentSetting setting) { if (tw == null) { throw new ArgumentNullException("tw"); } foreach (WaveSegment ws in _waveSegments) { tw.WriteLine(ws.ToString(setting.HasEndTime)); } }
/// <summary> /// Load segment data from text reader stream. /// </summary> /// <param name="tr">Text reader to read segment from.</param> /// <param name="setting">Setting.</param> /// <returns>Wave segment collection.</returns> public static Collection<WaveSegment> ReadAllData(TextReader tr, SegmentSetting setting) { if (tr == null) { throw new ArgumentNullException("tr"); } if (setting == null) { throw new ArgumentNullException("setting"); } Collection<WaveSegment> segs = new Collection<WaveSegment>(); string line = null; while ((line = tr.ReadLine()) != null) { if (string.IsNullOrEmpty(line)) { continue; } if (line == ".") { // end of section break; } string[] items = line.Split(new char[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); if (items.Length < 2 && !setting.HasEndTime) { string message = string.Format(CultureInfo.InvariantCulture, "The normal segment line of alignment file shoud be (timestamp) (label) [confidence score]. But '{0}' is found.", line); throw new InvalidDataException(message); } if (items.Length < 3 && setting.HasEndTime) { string message = string.Format(CultureInfo.InvariantCulture, "The normal segment line of alignment file should be (timestamp) (timestamp) (label) [confidence score]. But '{0}' is found.", line); throw new InvalidDataException(message); } WaveSegment seg = new WaveSegment(); try { seg.StartTime = float.Parse(items[0], CultureInfo.InvariantCulture); if (!setting.HasEndTime) { if (items.Length == 3) { seg.Confidence = float.Parse(items[2], CultureInfo.InvariantCulture); } } else { seg.EndTime = float.Parse(items[1], CultureInfo.InvariantCulture); } } catch (FormatException) { string message = string.Format(CultureInfo.InvariantCulture, "Malformed line found as '{0}'", line); throw new InvalidDataException(message); } seg.Label = Phoneme.ToOffline(items[setting.HasEndTime ? 2 : 1]); segs.Add(seg); } RemoveDuplicatedSilence(segs); for (int i = 0; i < segs.Count - 1; i++) { if (!setting.HasEndTime) { segs[i].EndTime = segs[i + 1].StartTime; } else { if (segs[i].StartTime > segs[i].EndTime) { string message = string.Format(CultureInfo.InvariantCulture, "The start time of the {0}(th) segment [{1}] must not be later the end time of it.", i, segs[i].Label); throw new InvalidDataException(message); } } if (segs[i].StartTime > segs[i + 1].StartTime) { string message = string.Format(CultureInfo.InvariantCulture, "The start time of the {0}(th) segment [{1}] must not be later than the start time of the following segment [{2}].", i, segs[i].Label, segs[i + 1].Label); throw new InvalidDataException(message); } } return segs; }
/// <summary> /// Save this segment data into file. /// </summary> /// <param name="filePath">Target file to save.</param> /// <param name="setting">Setting.</param> public void Save(string filePath, SegmentSetting setting) { Helper.EnsureFolderExistForFile(filePath); using (StreamWriter sw = new StreamWriter(filePath, false, Encoding.ASCII)) { Save(sw, setting); } }
/// <summary> /// Initialize from text stream. /// </summary> /// <param name="tr">Segment text stream.</param> /// <param name="setting">Setting.</param> public void Load(TextReader tr, SegmentSetting setting) { _waveSegments = SegmentFile.ReadAllData(tr, setting); UpdateNonSilenceWaveSegments(); }
/// <summary> /// Initialize from a segment file. /// </summary> /// <param name="filePath">Segment file.</param> /// <param name="setting">Setting.</param> public void Load(string filePath, SegmentSetting setting) { if (string.IsNullOrEmpty(filePath)) { throw new ArgumentNullException("filePath"); } if (setting == null) { throw new ArgumentNullException("setting"); } if (!File.Exists(filePath)) { throw new FileNotFoundException(filePath); } _filePath = filePath; try { using (TextReader tr = new StreamReader(filePath)) { Load(tr, setting); } } catch (InvalidDataException ide) { string message = string.Format(CultureInfo.InvariantCulture, "Failed to load alignment file [{0}].", filePath); throw new InvalidDataException(message, ide); } if (WaveSegments.Count == 0) { string message = string.Format(CultureInfo.InvariantCulture, "Empty alignment file found at [{0}].", filePath); throw new InvalidDataException(message); } if (setting.HasTailSilence && !Phoneme.IsSilencePhone(WaveSegments[WaveSegments.Count - 1].Label)) { string message = string.Format(CultureInfo.InvariantCulture, "Last segment [{0}] in file [{1}] should be [{2}].", WaveSegments[WaveSegments.Count - 1].Label, filePath, Phoneme.ToOffline(Phoneme.SilencePhone)); throw new InvalidDataException(message); } }
/// <summary> /// Initialize from a segment file. /// </summary> /// <param name="filePath">Segment file.</param> /// <param name="fHasEndTime">Whether to check the ending silence.</param> /// <param name="fHasHeadSilence">Whether to check the head silence.</param> /// <param name="fHasTailSilence">Whether to check the tail silence.</param> public void Load(string filePath, bool fHasEndTime, bool fHasHeadSilence, bool fHasTailSilence) { SegmentSetting setting = new SegmentSetting() { HasEndTime = fHasEndTime, HasHeadSilence = fHasHeadSilence, HasTailSilence = fHasTailSilence }; Load(filePath, setting); }
/// <summary> /// Load segment data from text file. /// </summary> /// <param name="filePath">Segment file path.</param> /// <param name="setting">Setting.</param> /// <returns>Wave segment collection.</returns> public static Collection<WaveSegment> ReadAllData(string filePath, SegmentSetting setting) { if (string.IsNullOrEmpty(filePath)) { throw new ArgumentNullException("filePath"); } if (setting == null) { throw new ArgumentNullException("setting"); } try { using (StreamReader sr = new StreamReader(filePath)) { return SegmentFile.ReadAllData(sr, setting); } } catch (InvalidDataException ide) { string message = string.Format(CultureInfo.InvariantCulture, "Failed to load alignment file [{0}].", filePath); throw new InvalidDataException(message, ide); } }