コード例 #1
0
ファイル: AdaptItKBReader.cs プロジェクト: sillsdev/WorldPad
        public void AddEntryPair(string strSourceWord, string strTargetWord)
        {
            // first get the map for the number of words in the source string (e.g. "ke picche" would be map=2)
            strSourceWord = strSourceWord.Trim((m_caDelimitersForward != null) ? m_caDelimitersForward : caSplitChars);
            int nMapValue = strSourceWord.Split(caSplitChars, StringSplitOptions.RemoveEmptyEntries).Length;

            if (nMapValue > 10)
            {
                throw new ApplicationException("Cannot have a source phrase with more than 10 words!");
            }

            Dictionary <string, string> mapLookup;

            if (!m_mapOfMaps.TryGetValue(nMapValue, out mapLookup))
            {
                mapLookup = new Dictionary <string, string>();
            }

            // first see if this pair is already there
            string strTargetWordsInMap = null;

            if (mapLookup.TryGetValue(strSourceWord, out strTargetWordsInMap))
            {
                if ((strTargetWordsInMap == strTargetWord) || (strTargetWordsInMap.IndexOf(String.Format("%{0}%", strTargetWord)) != -1))
                {
                    return;                        // already there
                }
            }

            // otherwise, we need to add it.
#if !NotUseSchemaGeneratedClass
            try
            {
                XmlDocument         doc;
                XPathNavigator      navigator;
                XmlNamespaceManager manager;
                GetXmlDocument(out doc, out navigator, out manager);

                if (doc.DocumentElement != null)
                {
                    XmlNode nodeKbNode = doc.DocumentElement.SelectSingleNode(XPathToKB, manager);
                    if (nodeKbNode == null)
                    {
                        doc.CreateElement(XPathToKB);                           // no KB element, so create one
                        nodeKbNode = doc.DocumentElement.SelectSingleNode(XPathToKB, manager);
                    }

                    // see if the proper map entry is present (so we can add it, if not)
                    string  strMapSelect = XPathToSpecificMAP(nMapValue);
                    XmlNode nodeMapEntry = nodeKbNode.SelectSingleNode(strMapSelect, manager);
                    if (nodeMapEntry == null)
                    {
                        // if not, then add it.
                        // xpathnavs are easier to use to add child elements
                        XPathNavigator xpnMap = nodeKbNode.CreateNavigator();
                        xpnMap.AppendChild(String.Format("<MAP mn=\"{0}\"/>", nMapValue));

                        // now try it again
                        nodeMapEntry = nodeKbNode.SelectSingleNode(strMapSelect, manager);
                    }

                    // see if the source word exists (so we can add it if not)
                    string  strSourceWordSelect = XPathToSpecificTU(strSourceWord);
                    XmlNode nodeSourceWordEntry = nodeMapEntry.SelectSingleNode(strSourceWordSelect, manager);
                    if (nodeSourceWordEntry == null)
                    {
                        // add it.
                        XPathNavigator xpnSourceWord = nodeMapEntry.CreateNavigator();
                        xpnSourceWord.AppendChild(String.Format("<TU f=\"0\" k=\"{0}\"/>", strSourceWord));

                        // now try it again
                        nodeSourceWordEntry = nodeMapEntry.SelectSingleNode(strSourceWordSelect, manager);
                    }

                    // the target word shouldn't exist (or we wouldn't be here... unless it was *just* added
                    //  but to avoid doing two loads... just be sure we're not adding it twice here
                    string  strTargetWordSelect = XPathToSpecificRS(strTargetWord);
                    XmlNode nodeTargetWordEntry = nodeSourceWordEntry.SelectSingleNode(strTargetWordSelect, manager);
                    if (nodeTargetWordEntry != null)
                    {
                        return;                         // nothing to do, because it's already in there.
                    }
                    // add it.
                    XPathNavigator xpnTargetWord = nodeSourceWordEntry.CreateNavigator();
                    xpnTargetWord.AppendChild(String.Format("<RS n=\"1\" a=\"{0}\"/>", strTargetWord));

                    File.Copy(m_strKnowledgeBaseFileSpec, m_strKnowledgeBaseFileSpec + ".bak", true);

                    XmlTextWriter writer = new XmlTextWriter(m_strKnowledgeBaseFileSpec, Encoding.UTF8);
                    writer.Formatting = Formatting.Indented;
                    doc.Save(writer);
                    writer.Close();
                }
            }
            catch (System.Data.DataException ex)
            {
                if (ex.Message == "A child row has multiple parents.")
                {
                    // this happens when the knowledge base has invalid data in it (e.g. when there is two
                    //  canonically equivalent words in different records). This is technically a bug in
                    //  AdaptIt.
                    throw new ApplicationException("The AdaptIt knowledge base has invalid data in it! Contact [email protected]", ex);
                }

                throw ex;
            }
#else
            AdaptItKnowledgeBase aikb = new AdaptItKnowledgeBase();
            aikb.ReadXml(m_strKnowledgeBaseFileSpec);

            // make sure there's a KB record (if AI created it, there will be, but why not allow users to create
            //  AI KBs without AI...)
            AdaptItKnowledgeBase.KBRow aKBRow = null;
            if (aikb.KB.Count == 0)
            {
                aKBRow = aikb.KB.AddKBRow("4", null, null, "1");
            }
            else
            {
                aKBRow = aikb.KB[0];
            }

            // get the proper MAP element
            AdaptItKnowledgeBase.MAPRow aMAPRow = aikb.MAP.FindBymn(nMapValue.ToString());
            if (aMAPRow == null)
            {
                // have to add it
                System.Diagnostics.Debug.Assert(aKBRow != null);
                aMAPRow = aikb.MAP.AddMAPRow(nMapValue.ToString(), aKBRow);
            }

            // get the proper Source Word element
            AdaptItKnowledgeBase.TURow aTURow = aikb.TU.FindByk(strSourceWord);
            if (aTURow == null)
            {
                System.Diagnostics.Debug.Assert(aMAPRow != null);
                aTURow = aikb.TU.AddTURow("0", strSourceWord, aMAPRow);
            }

            // see if the particular target word entry is there...
            System.Diagnostics.Debug.Assert(aTURow != null);
            foreach (AdaptItKnowledgeBase.RSRow aRSRow in aTURow.GetRSRows())
            {
                if (aRSRow.a == strTargetWord)
                {
                    return;
                }
            }

            // otherwise, add a new RS element for this word
            aikb.RS.AddRSRow("1", strTargetWord, aTURow);
            File.Copy(m_strKnowledgeBaseFileSpec, m_strKnowledgeBaseFileSpec + ".bak", true);
            aikb.WriteXml(m_strKnowledgeBaseFileSpec);
#endif
        }
コード例 #2
0
ファイル: AdaptItKBReader.cs プロジェクト: sillsdev/WorldPad
        protected virtual bool Load()
        {
            System.Diagnostics.Debug.Assert(!String.IsNullOrEmpty(m_strProjectFileSpec));
            System.Diagnostics.Debug.Assert(!String.IsNullOrEmpty(m_strKnowledgeBaseFileSpec));

            // see if the project file timestamp has changed (in which case, we should
            //  reload the punctuation just in case it changed);
            DateTime timeModified = DateTime.Now;             // don't care really, but have to initialize it.

            if (!DoesFileExist(m_strProjectFileSpec, ref timeModified))
            {
                EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strProjectFileSpec);
            }

            bool bSomethingChanged = false;

            // if we have "old" data
            if (timeModified > m_timeModifiedProj)
            {
                // get the punctuation out of the project file.
                string strProjectFileContents = null;
                using (StreamReader sr = File.OpenText(m_strProjectFileSpec))
                {
                    strProjectFileContents = sr.ReadToEnd();
                }

                if (m_bLegacy)                  // legacy project file does it differently
                {
                    int    nIndex         = strProjectFileContents.IndexOf(cstrAdaptItPunctuationPairsLegacy) + cstrAdaptItPunctuationPairsLegacy.Length;
                    int    nLength        = strProjectFileContents.IndexOfAny(caSplitChars, nIndex) - nIndex;
                    string strPunctuation = strProjectFileContents.Substring(nIndex, nLength);
                    InitializeDelimitersLegacy(strPunctuation, out m_caDelimitersForward, out m_caDelimitersReverse);
                }
                else                    // NonRoman version
                {
                    int nIndex  = strProjectFileContents.IndexOf(cstrAdaptItPunctuationPairsNRSource) + cstrAdaptItPunctuationPairsNRSource.Length;
                    int nLength = strProjectFileContents.IndexOf('\n', nIndex) - nIndex;
                    this.m_caDelimitersForward = ReturnDelimiters(strProjectFileContents, nIndex, nLength);
                    nIndex  = strProjectFileContents.IndexOf(cstrAdaptItPunctuationPairsNRTarget, nIndex) + cstrAdaptItPunctuationPairsNRTarget.Length;
                    nLength = strProjectFileContents.IndexOf('\n', nIndex) - nIndex;
                    this.m_caDelimitersReverse = ReturnDelimiters(strProjectFileContents, nIndex, nLength);
                }

                m_timeModifiedProj = timeModified;
                bSomethingChanged  = true;
            }

            // next check on the knowledge base... make sure it's there and get the last time it was modified
            timeModified = DateTime.Now;             // don't care really, but have to initialize it.
            if (!DoesFileExist(m_strKnowledgeBaseFileSpec, ref timeModified))
            {
                EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strKnowledgeBaseFileSpec);
            }

            // if it has been modified or it's not already loaded...
            if (timeModified > m_timeModifiedKB)
            {
                m_mapOfMaps.Clear();
                m_mapOfReversalMaps = null;

#if !NotUseSchemaGeneratedClass
                // Since AdaptIt will make different records for two words which are canonically
                //  equivalent, if we use the class object to read it in via ReadXml, that will throw
                //  an exception in such a case. So see if using XmlDocument is any less restrictive
                try
                {
                    XmlDocument         doc;
                    XPathNavigator      navigator;
                    XmlNamespaceManager manager;
                    GetXmlDocument(out doc, out navigator, out manager);

                    XPathNodeIterator xpMapIterator = navigator.Select(XPathToMAP, manager);

                    List <string> astrTargetWords = new List <string>();
                    while (xpMapIterator.MoveNext())
                    {
                        // get the map number so we can make different maps for different size phrases
                        string strMapNum = xpMapIterator.Current.GetAttribute("mn", navigator.NamespaceURI);
                        int    nMapNum   = System.Convert.ToInt32(strMapNum, 10);
                        Dictionary <string, string> mapWords = new Dictionary <string, string>();
                        m_mapOfMaps.Add(nMapNum, mapWords);

                        XPathNodeIterator xpSourceWords = xpMapIterator.Current.Select(XPathToTU, manager);
                        while (xpSourceWords.MoveNext())
                        {
                            XPathNodeIterator xpTargetWords = xpSourceWords.Current.Select(XPathToRS, manager);

                            astrTargetWords.Clear();
                            while (xpTargetWords.MoveNext())
                            {
                                string strTargetWord = xpTargetWords.Current.GetAttribute("a", navigator.NamespaceURI);
                                astrTargetWords.Add(strTargetWord);
                            }

                            // if there are multiple target words for this form, then return it in Ample-like
                            //  %2%target1%target% format
                            string strTargetWordFull = null;
                            if (astrTargetWords.Count > 1)
                            {
                                strTargetWordFull = String.Format("%{0}%", astrTargetWords.Count);
                                foreach (string strTargetWord in astrTargetWords)
                                {
                                    strTargetWordFull += String.Format("{0}%", strTargetWord);
                                }
                            }
                            else if (astrTargetWords.Count == 1)
                            {
                                strTargetWordFull = astrTargetWords[0];
                                if (strTargetWordFull == "<Not In KB>")
                                {
                                    continue;                                       // skip this one so we *don't* get a match later on.
                                }
                            }

                            string strSourceWord = xpSourceWords.Current.GetAttribute("k", navigator.NamespaceURI);
                            System.Diagnostics.Debug.Assert(!mapWords.ContainsKey(strSourceWord), String.Format("The Knowledge Base has two different source records which are canonically equivalent! See if you can merge the two KB entries for word that look like, '{0}'", strSourceWord));
                            mapWords[strSourceWord] = strTargetWordFull;
                        }
                    }
                }
                catch (System.Data.DataException ex)
                {
                    if (ex.Message == "A child row has multiple parents.")
                    {
                        // this happens when the knowledge base has invalid data in it (e.g. when there is two
                        //  canonically equivalent words in different records). This is technically a bug in
                        //  AdaptIt.
                        throw new ApplicationException("The AdaptIt knowledge base has invalid data in it! Contact [email protected]", ex);
                    }

                    throw ex;
                }
                catch (Exception ex)
                {
                    throw new ApplicationException("Unable to open the AdaptIt knowledge base. Contact [email protected]", ex);
                }
#else
                AdaptItKnowledgeBase aikb = new AdaptItKnowledgeBase();
                try
                {
                    aikb.ReadXml(m_strKnowledgeBaseFileSpec);
                    if (aikb.KB.Count > 0)
                    {
                        AdaptItKnowledgeBase.KBRow aKBRow = aikb.KB[0];
                        foreach (AdaptItKnowledgeBase.MAPRow aMapRow in aKBRow.GetMAPRows())
                        {
                            foreach (AdaptItKnowledgeBase.TURow aTURow in aMapRow.GetTURows())
                            {
                                string strValue = null;
                                AdaptItKnowledgeBase.RSRow[] aRSRows = aTURow.GetRSRows();
                                if (aRSRows.Length > 1)
                                {
                                    // if there is more than one mapping, then make it %count%val1%val2%...
                                    //  so people can use the Word Pick macro to choose it
                                    strValue = String.Format("%{0}%", aRSRows.Length);
                                    foreach (AdaptItKnowledgeBase.RSRow aRSRow in aRSRows)
                                    {
                                        strValue += String.Format("{0}%", aRSRow.a);
                                    }
                                }
                                else if (aRSRows.Length == 1)
                                {
                                    AdaptItKnowledgeBase.RSRow aRSRow = aRSRows[0];
                                    if (aRSRow.a == "<Not In KB>")
                                    {
                                        continue;                                           // skip this one so we *don't* get a match later on.
                                    }
                                    else
                                    {
                                        strValue = aRSRow.a;
                                    }
                                }

                                m_mapLookup[aTURow.k] = strValue;
                            }
                        }
                    }
                }
                catch (System.Data.DataException ex)
                {
                    if (ex.Message == "A child row has multiple parents.")
                    {
                        // this happens when the knowledge base has invalid data in it (e.g. when there is two
                        //  canonically equivalent words in different records). This is technically a bug in
                        //  AdaptIt.
                        throw new ApplicationException("The AdaptIt knowledge base has invalid data in it! Contact [email protected]", ex);
                    }

                    throw ex;
                }
#endif

                // keep track of the modified date, so we can detect a new version to reload
                m_timeModifiedKB  = timeModified;
                bSomethingChanged = true;
            }

            return(bSomethingChanged);              // indicate whether the data was reinitialized or not
        }