예제 #1
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Gets a collection of Mark chunk words. An understanding
        /// of how SA handles annotations, glosses and references is important to understanding
        /// what's going on in this method.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private SortedDictionary <uint, AudioDocWords> GetMarkInfo()
        {
            SortedDictionary <uint, AudioDocWords> words = new SortedDictionary <uint, AudioDocWords>();

            ResetSegmentEnumerators();
            uint   offset;
            uint   length;
            bool   isBkMrk;
            string gloss;
            string pos;
            string reference;

            while (ReadMarkSegment(out offset, out length, out gloss, out pos, out reference, out isBkMrk))
            {
                AudioDocWords adw = new AudioDocWords();

                SortedDictionary <AnnotationType, string> wrds =
                    ReflectionHelper.GetField(adw, "m_words") as
                    SortedDictionary <AnnotationType, string>;

                if (wrds != null)
                {
                    wrds[AnnotationType.Gloss]     = gloss;
                    wrds[AnnotationType.Reference] = reference;
                }

                adw.AudioLength = length;
                words[offset]   = adw;
            }

            return(words.Count == 0 ? null : words);
        }
예제 #2
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// When there are no mark segments added to the audio document in SA to indicate word
        /// boundaries, the assumption is that all segments belong to a single word. Therefore
        /// all segments found for the specified annotation type will be combined into a single
        /// word in the specified AudioDocWords object.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void BuildSingleAnnotationWord(AnnotationType atype, AudioDocWords adw)
        {
            if (adw == null)
            {
                return;
            }

            uint          offset;
            uint          length;
            string        segment;
            StringBuilder bldr = new StringBuilder();

            // Read all the segments for the annotation type.
            while (ReadSegment((int)atype, out offset, out length, out segment))
            {
                bldr.Append(segment);
            }

            // Make sure to save the last word constructed.
            if (bldr.Length > 0)
            {
                SortedDictionary <AnnotationType, string> wrds =
                    ReflectionHelper.GetField(adw, "m_words") as
                    SortedDictionary <AnnotationType, string>;

                if (wrds != null)
                {
                    wrds[atype] = bldr.ToString();
                }
            }
        }
예제 #3
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        ///  Gets a collection of the words in the audio document.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        public SortedDictionary <uint, AudioDocWords> GetWords()
        {
            SortedDictionary <uint, AudioDocWords> words = GetMarkInfo();

            ResetSegmentEnumerators();

            if (words != null)
            {
                BuildAnnotationWords(AnnotationType.Phonetic, words);
                BuildAnnotationWords(AnnotationType.Phonemic, words);
                BuildAnnotationWords(AnnotationType.Tone, words);
                BuildAnnotationWords(AnnotationType.Orthographic, words);
                return(words);
            }

            // At this point we know there were no mark segments added to the audio
            // document in SA to indicate word boundaries. Therefore combine all the
            // existing segments into single words for each annotation type. This
            // should fix JIRA issue SPM-404.
            words    = new SortedDictionary <uint, AudioDocWords>();
            words[0] = new AudioDocWords();

            BuildSingleAnnotationWord(AnnotationType.Phonetic, words[0]);
            BuildSingleAnnotationWord(AnnotationType.Phonemic, words[0]);
            BuildSingleAnnotationWord(AnnotationType.Tone, words[0]);
            BuildSingleAnnotationWord(AnnotationType.Orthographic, words[0]);

            SortedDictionary <AnnotationType, string> wrds =
                ReflectionHelper.GetField(words[0], "m_words") as
                SortedDictionary <AnnotationType, string>;

            if (wrds == null)
            {
                return(null);
            }

            return(string.IsNullOrEmpty(wrds[AnnotationType.Phonetic]) &&
                   string.IsNullOrEmpty(wrds[AnnotationType.Phonemic]) &&
                   string.IsNullOrEmpty(wrds[AnnotationType.Tone]) &&
                   string.IsNullOrEmpty(wrds[AnnotationType.Orthographic]) ?
                   null : words);
        }
예제 #4
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Builds the word from segments in the specified annotation type. An understanding
        /// of how SA handles annotations, glosses and references is important to understanding
        /// what's going on in this method.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void BuildAnnotationWords(AnnotationType atype,
                                          SortedDictionary <uint, AudioDocWords> words)
        {
            uint          offset;
            uint          length;
            uint          firstSegOffset = 0;
            uint          lengthSum      = 0;
            string        segment;
            StringBuilder bldr    = new StringBuilder();
            AudioDocWords prevAdw = null;

            // Read all the segments for the annotation type.
            while (ReadSegment((int)atype, out offset, out length, out segment))
            {
                AudioDocWords currWord;

                // We'll only use lengthSum and firstSegOffset in the case when the first
                // word's offset isn't the same as the offset of the first phonetic segment.
                lengthSum += length;
                if (bldr.Length == 0)
                {
                    firstSegOffset = offset;
                }

                // When the offset for the current segment matches one already in the
                // collection of words we know we've come to the beginning of the next
                // word (or the first word if the string builder is empty).
                if (words.TryGetValue(offset, out currWord))
                {
                    // If we have a word that's been constructed, save it and reset the
                    // builder to accept the next word coming down the pike.
                    if (bldr.Length > 0)
                    {
                        // This should only happen when the first word's offset is not the same as
                        // the first phonetic segment's offset. When that happens, we need to add
                        // a word at the beginning of the collection to accomodate the fact that
                        // the audio file contains one or more phonetic segments at the beginning
                        // of the transcription that do not belong to a word.
                        if (prevAdw == null)
                        {
                            prevAdw               = new AudioDocWords();
                            prevAdw.AudioLength   = lengthSum;
                            words[firstSegOffset] = prevAdw;
                        }

                        SortedDictionary <AnnotationType, string> wrds =
                            ReflectionHelper.GetField(prevAdw, "m_words") as
                            SortedDictionary <AnnotationType, string>;

                        if (wrds != null)
                        {
                            wrds[atype] = bldr.ToString().Replace(kNullSegment, string.Empty);
                        }

                        bldr.Length = 0;
                    }

                    // Save a reference to the AudioDocWords object so we can
                    // store in it the word we're just beginning to construct.
                    prevAdw = currWord;
                }

                bldr.Append(segment);
            }

            // Make sure to save the last word constructed.
            if (bldr.Length > 0 && prevAdw != null)
            {
                SortedDictionary <AnnotationType, string> wrds =
                    ReflectionHelper.GetField(prevAdw, "m_words") as
                    SortedDictionary <AnnotationType, string>;

                if (wrds != null)
                {
                    wrds[atype] = bldr.ToString().Replace(kNullSegment, string.Empty);
                }
            }
        }