Example #1
0
        /// <summary>
        /// Translate this WrittenCharacter into a CharacterDescriptor.
        /// The written data is distilled into SubStrokes in the CharacterDescriptor.
        /// The CharacterDescriptor can be used against StrokesRepository to find the closest matches.
        ///
        /// @return a CharacterDescriptor translated from this WrittenCharacter.
        /// </summary>
        public CharacterDescriptor BuildCharacterDescriptor()
        {
            int strokeCount    = this.strokeList.Count;
            int subStrokeCount = 0;

            CharacterDescriptor descriptor = new CharacterDescriptor();

            double[] directions = descriptor.Directions;
            double[] lengths    = descriptor.Lengths;

            // Iterate over the WrittenStrokes, and translate them into CharacterDescriptor.SubStrokes.
            // Add all of the CharacterDescriptor.SubStrokes to the version.
            // When we run out of substroke positions we truncate all the remaining stroke and substroke information.
            foreach (WrittenStroke nextStroke in strokeList)
            {
                // Add each substroke's direction and length to the arrays.
                // All substrokes are lumped sequentially.  What strokes they
                // were a part of is not factored into the algorithm.
                // Don't run off the end of the array, if we do we just truncate.
                var subStrokes = nextStroke.GetSubStrokes(RightX - LeftX, BottomY - TopY);
                foreach (var subStroke in subStrokes)
                {
                    directions[subStrokeCount] = subStroke.Direction;
                    lengths[subStrokeCount]    = subStroke.Length;
                    ++subStrokeCount;
                }
            }

            descriptor.StrokeCount    = strokeCount;
            descriptor.SubStrokeCount = subStrokeCount;

            return(descriptor);
        }
Example #2
0
        /**
         * Helper method loads the next character data into the given CharacterDescriptor
         * from the given DataInputStream as formatted by a strokes data file.
         *
         * @param loadInto the CharacterDescriptor instance to load data into
         * @param dataStream the stream to load data from
         * @throws IOException
         */
        private void loadNextCharacterDataFromStream(CharacterDescriptor loadInto)
        {
            char character     = (char)strokeDataStream.ReadInt16(); // character is the first two bytes
            int  characterType = (int)strokeDataStream.ReadByte();   // character type is the first byte
            int  strokeCount   = (int)strokeDataStream.ReadByte();   // number of strokes is next
            // the number of strokes is deducible from
            // where we are in the stream, but the stream
            // wasn't originally ordered by stroke count...

            int subStrokeCount = 0;

            double[] directions = loadInto.Directions;
            double[] lengths    = loadInto.Lengths;

            // format of substroke data is [sub stroke count per stroke]([direction][length])+
            // there will be a direction,length pair for each of the substrokes
            for (int i = 0; i < strokeCount; i++)
            {
                // for each stroke

                // read the number of sub strokes in the stroke
                int numSubStrokesInStroke = (int)strokeDataStream.ReadByte();

                for (int j = 0; j < numSubStrokesInStroke; j++)
                {
                    // for each sub stroke read out the direction and length

                    double direction = StrokesIO.ReadDirection(strokeDataStream);
                    double length    = StrokesIO.ReadLength(strokeDataStream);

                    directions[subStrokeCount] = direction;
                    lengths[subStrokeCount]    = length;

                    subStrokeCount++;
                }
            }

            loadInto.Character      = character;
            loadInto.CharacterType  = characterType;
            loadInto.StrokeCount    = strokeCount;
            loadInto.SubStrokeCount = subStrokeCount;
        }
Example #3
0
        /**
         * @param character the input character we want matches for
         * @param searchTraditional true if traditional characters should included in results
         * @param searchSimplified true if simplified characters should be included in results
         * @param looseness matching looseness, 0-1
         * @param numMatches number of matches to return
         * @param strokesDataSource the dat source
         */
        public StrokesMatcher(CharacterDescriptor character,
                                bool searchTraditional,
                                bool searchSimplified,
                                double looseness,
                                int numMatches,
                                StrokesDataSource strokesDataSource)
        {
            this.inputCharacter = character;
            this.compareTo = new CharacterDescriptor();

            this.searchTraditional = searchTraditional;
            this.searchSimplified = searchSimplified;

            this.looseness = looseness;

            this.running = true;
            this.strokesDataSource = strokesDataSource;

            this.matches = new CharacterMatchCollector(numMatches);
            this.initScoreMatrix();
        }
Example #4
0
        /**
         * @param character the input character we want matches for
         * @param searchTraditional true if traditional characters should included in results
         * @param searchSimplified true if simplified characters should be included in results
         * @param looseness matching looseness, 0-1
         * @param numMatches number of matches to return
         * @param strokesDataSource the dat source
         */
        public StrokesMatcher(CharacterDescriptor character,
                              bool searchTraditional,
                              bool searchSimplified,
                              double looseness,
                              int numMatches,
                              StrokesDataSource strokesDataSource)
        {
            this.inputCharacter = character;
            this.compareTo      = new CharacterDescriptor();

            this.searchTraditional = searchTraditional;
            this.searchSimplified  = searchSimplified;

            this.looseness = looseness;

            this.running           = true;
            this.strokesDataSource = strokesDataSource;

            this.matches = new CharacterMatchCollector(numMatches);
            this.initScoreMatrix();
        }
Example #5
0
        /**
         * Load the next character data in the data stream into the given CharacterDescriptor Object.
         * We load into the given rather than instantiating and returning our own instance because
         * potentially there may be thousands of calls to this method per input lookup.  No sense
         * in creating all that heap action if it's not necessary since we can reuse a CharacterDescriptor
         * instance.
         *
         * @param descriptor the descriptor to read stroke data into
         * @return true if another character's data was loaded, false if there aren't any more characters
         * @throws IOException
         */
        public bool LoadNextCharacterStrokeData(CharacterDescriptor descriptor)
        {
            if (this.skipToNextTypePosition)
            {
                // Finished one of the character types (i.e. traditional.)
                // We now want to skip to the position for the next type.

                if (!this.positionsIter.MoveNext())
                {
                    // No more character types.  We're done.
                    return(false);
                }

                // Get the position of the next character type and skip to it.
                long nextPosition = positionsIter.Current;
                long skipBytes    = nextPosition - this.position;
                strokeDataStream.BaseStream.Position += skipBytes;

                this.position = nextPosition;
                this.skipToNextTypePosition = false;
            }

            if (this.loadNextStrokeCount)
            {
                // We've finished reading all the characters for a particular stroke count
                // within a character type.  Prime for reading the next stroke count.

                this.position += 4;     // We're about to read an int to get the size of the next stroke count group,
                // an int is 4 bytes, so advance the position accordingly.

                // Save in the instance the position where the characters for the new stroke count end.
                this.endOfStrokeCount    = this.position + this.strokeDataStream.ReadInt32();
                this.loadNextStrokeCount = false;
            }

            if (this.position < this.endOfStrokeCount)
            {
                // If there are more characters to read for a stroke count, then load the next character's data.
                loadNextCharacterDataFromStream(descriptor);

                // Advance the position by the number of bytes read for the character
                this.position += 4                                  // 2 bytes for the actual unicode character + 1 byte for the type of character + 1 byte for the number of strokes
                                 + descriptor.StrokeCount           // 1 byte for each stroke that tells the number of substrokes in the stroke
                                 + (4 * descriptor.SubStrokeCount); // 4 bytes for each sub stroke (2 for direction, 2 for length)
            }

            if (this.position == this.endOfStrokeCount)
            {
                // We've reached the characters for a particular stroke count.

                this.loadNextStrokeCount = true;

                if (this.strokeCount == this.maxStrokes)
                {
                    // We've also reached the end of all the characters that we're
                    // going to check for this character type, so on the next request
                    // we'll skip to the next character type.
                    this.skipToNextTypePosition = true;
                    this.strokeCount            = this.minStrokes; // reset
                }
                else
                {
                    this.strokeCount++;
                }
            }

            return(true);
        }
Example #6
0
        /**
            * Load the next character data in the data stream into the given CharacterDescriptor Object.
            * We load into the given rather than instantiating and returning our own instance because
            * potentially there may be thousands of calls to this method per input lookup.  No sense
            * in creating all that heap action if it's not necessary since we can reuse a CharacterDescriptor
            * instance.
            *
            * @param descriptor the descriptor to read stroke data into
            * @return true if another character's data was loaded, false if there aren't any more characters
            * @throws IOException
            */
        public bool LoadNextCharacterStrokeData(CharacterDescriptor descriptor)
        {
            if (this.skipToNextTypePosition)
            {
                // Finished one of the character types (i.e. traditional.)
                // We now want to skip to the position for the next type.

                if (!this.positionsIter.MoveNext())
                {
                    // No more character types.  We're done.
                    return false;
                }

                // Get the position of the next character type and skip to it.
                long nextPosition = positionsIter.Current;
                long skipBytes = nextPosition - this.position;
                strokeDataStream.BaseStream.Position += skipBytes;

                this.position = nextPosition;
                this.skipToNextTypePosition = false;
            }

            if (this.loadNextStrokeCount)
            {
                // We've finished reading all the characters for a particular stroke count
                // within a character type.  Prime for reading the next stroke count.

                this.position += 4;	// We're about to read an int to get the size of the next stroke count group,
                // an int is 4 bytes, so advance the position accordingly.

                // Save in the instance the position where the characters for the new stroke count end.
                this.endOfStrokeCount = this.position + this.strokeDataStream.ReadInt32();
                this.loadNextStrokeCount = false;
            }

            if (this.position < this.endOfStrokeCount)
            {
                // If there are more characters to read for a stroke count, then load the next character's data.
                loadNextCharacterDataFromStream(descriptor);

                // Advance the position by the number of bytes read for the character
                this.position += 4	// 2 bytes for the actual unicode character + 1 byte for the type of character + 1 byte for the number of strokes
                                + descriptor.StrokeCount			// 1 byte for each stroke that tells the number of substrokes in the stroke
                                + (4 * descriptor.SubStrokeCount); 	// 4 bytes for each sub stroke (2 for direction, 2 for length)
            }

            if (this.position == this.endOfStrokeCount)
            {
                // We've reached the characters for a particular stroke count.

                this.loadNextStrokeCount = true;

                if (this.strokeCount == this.maxStrokes)
                {
                    // We've also reached the end of all the characters that we're
                    // going to check for this character type, so on the next request
                    // we'll skip to the next character type.
                    this.skipToNextTypePosition = true;
                    this.strokeCount = this.minStrokes;	// reset

                }
                else this.strokeCount++;
            }

            return true;
        }
Example #7
0
        /**
            * Helper method loads the next character data into the given CharacterDescriptor
            * from the given DataInputStream as formatted by a strokes data file.
            *
            * @param loadInto the CharacterDescriptor instance to load data into
            * @param dataStream the stream to load data from
            * @throws IOException
            */
        private void loadNextCharacterDataFromStream(CharacterDescriptor loadInto)
        {
            char character = (char)strokeDataStream.ReadInt16(); // character is the first two bytes
            int characterType = (int)strokeDataStream.ReadByte(); // character type is the first byte
            int strokeCount = (int)strokeDataStream.ReadByte(); // number of strokes is next
            // the number of strokes is deducible from
            // where we are in the stream, but the stream
            // wasn't originally ordered by stroke count...

            int subStrokeCount = 0;

            double[] directions = loadInto.Directions;
            double[] lengths = loadInto.Lengths;

            // format of substroke data is [sub stroke count per stroke]([direction][length])+
            // there will be a direction,length pair for each of the substrokes
            for (int i = 0; i < strokeCount; i++)
            {
                // for each stroke

                // read the number of sub strokes in the stroke
                int numSubStrokesInStroke = (int)strokeDataStream.ReadByte();

                for (int j = 0; j < numSubStrokesInStroke; j++)
                {
                    // for each sub stroke read out the direction and length

                    double direction = StrokesIO.ReadDirection(strokeDataStream);
                    double length = StrokesIO.ReadLength(strokeDataStream);

                    directions[subStrokeCount] = direction;
                    lengths[subStrokeCount] = length;

                    subStrokeCount++;
                }
            }

            loadInto.Character = character;
            loadInto.CharacterType = characterType;
            loadInto.StrokeCount = strokeCount;
            loadInto.SubStrokeCount = subStrokeCount;
        }