private static void BuildExample(TextExample example, Vocabulary voc, int exampleCount)
        {
            int          dimension = voc.Count;
            SparseVector vector    = new SparseVector(dimension);

            foreach (string word in example.Tokens.Keys)
            {
                int pos = voc.GetWordPosition(word);
                if (pos == Constants.KEY_NOT_FOUND)
                {
                    continue;
                }

                // phi i(x) = tfi log(idfi) /k
                // tfi:     number of occurences of the term i in the document x
                // idfi:    the ratio between the total number of documents and the
                //              number of documents containing the term
                // k:       normalisation constant ensuring that ||phi|| = 1
                double phi = example.Tokens[word] * Math.Log(exampleCount / voc.WordExampleOccurMap[word]);
                vector.Components.Add(pos, phi);
            }
            vector.Normalize();
            example.X = vector;
        }
Beispiel #2
0
        protected void LoadXmlFile(string strTbxAnaXmlFilename)
        {
            string strAnaXmlFileContents = null;
            try
            {
                strAnaXmlFileContents = File.ReadAllText(strTbxAnaXmlFilename);
            }
            catch (Exception ex)
            {
                string strError = String.Format("Unable to read the XML file:{0}{0}'{1}'{0}{0}cause: {2}",
                    Environment.NewLine, strTbxAnaXmlFilename, ex.Message);

                if (ex.InnerException != null)
                    strError += String.Format("{0}{0}cause: {1}", Environment.NewLine, ex.InnerException.Message);

                MessageBox.Show(strError, cstrCaption);
                return;
            }

            #if !UseDataSet
            XmlDocument doc = new XmlDocument();
            try
            {
                doc.LoadXml(strAnaXmlFileContents);
                XPathNavigator navigator = doc.CreateNavigator();
                XPathNodeIterator nodeTextExamples = navigator.Select("/shoebox/t");
                XPathExpression xpeTextExampleValue = navigator.Compile("@value");
                XPathExpression xpeWords = navigator.Compile("w");
                XPathExpression xpeAnalysis = navigator.Compile("a");
                XPathExpression xpeCategory = navigator.Compile("cat");

                while (nodeTextExamples.MoveNext())
                {
                    string strTextExampleValue = GetExpressionValue(nodeTextExamples, xpeTextExampleValue);
                    string strTextExampleXML = nodeTextExamples.Current.OuterXml;
                    TextExample te = new TextExample(strTextExampleValue, strTextExampleXML);

                    XPathNodeIterator nodeWords = nodeTextExamples.Current.Select(xpeWords);
                    while (nodeWords.MoveNext())
                    {
                        string strWordValue = GetExpressionValue(nodeWords, xpeTextExampleValue);
                        string strAnalysis = GetExpressionValue(nodeWords, xpeAnalysis);
                        string strCategory = GetExpressionValue(nodeWords, xpeCategory);
                        string strWordXML = nodeWords.Current.OuterXml;

                        WordExample we = new WordExample(strWordValue, strAnalysis, strCategory, strWordXML);
                        te.Add(we);

                        // probably expand this, but for now, get an indexing mechanism to these examples
                        // split the analysis content to get the list of morphenames
                        int nIndex = 0;
                        if (strAnalysis[0] == '%')
                        {
                            nIndex = strAnalysis.IndexOf('%', 2);
                            System.Diagnostics.Debug.Assert(nIndex != -1);
                            strAnalysis = strAnalysis.Substring(nIndex + 1);
                        }

                        string[] astrAmbiguities = strAnalysis.Split(caSplitAmbsChars, StringSplitOptions.RemoveEmptyEntries);
                        foreach (string strAmbiguity in astrAmbiguities)
                        {
                            string strType = cstrPOS;
                            string[] astrMorphnames = strAmbiguity.Split(caSplitChars, StringSplitOptions.RemoveEmptyEntries);
                            foreach (string strMorphname in astrMorphnames)
                            {
                                // save the type of the token in one map (so we can trigger tooltips on the full list
                                m_mapTokensToType[strMorphname] = strType;
                                strType = cstrMorphname;

                                // put the list of TextExamples into another map keyed on the token (but it may
                                //  already exist)
                                AddMapToTeList(strMorphname, te);
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                string strError = String.Format("Unable to Serialize the XML file:{0}{0}'{1}'{0}{0}cause: {2}",
                    Environment.NewLine, strTbxAnaXmlFilename, ex.Message);

                if (ex.InnerException != null)
                    strError += String.Format("{0}{0}cause: {1}", Environment.NewLine, ex.InnerException.Message);

                strError += String.Format("{0}{0}It looks like it contains something unexpected...{0}send the file '{1}' to [email protected] along with this error message",
                    Environment.NewLine, strTbxAnaXmlFilename);

                MessageBox.Show(strError, cstrCaption);
                return;
            }
            #else
            if (strAnaXmlFileContents.IndexOf("xmlns=\"http://www.sil.org/computing/schemas/AnaTextExamples.xsd\"") == -1)
            {
                strAnaXmlFileContents = strAnaXmlFileContents.Replace(
                    "shoebox type=\"CarlaStudioANA\"",
                    "shoebox type=\"CarlaStudioANA\" xmlns=\"http://www.sil.org/computing/schemas/AnaTextExamples.xsd\"");
            }

            MemoryStream ms = new MemoryStream(Encoding.UTF8.GetBytes(strAnaXmlFileContents));
            AnaTextExamplesXML xmlFile = new AnaTextExamplesXML();
            try
            {
                xmlFile.ReadXml(ms);
            }
            catch (Exception ex)
            {
                string strError = String.Format("Unable to Serialize the XML file:{0}{0}'{1}'{0}{0}cause: {2}",
                    Environment.NewLine, strTbxAnaXmlFilename, ex.Message);

                if (ex.InnerException != null)
                    strError += String.Format("{0}{0}cause: {1}", Environment.NewLine, ex.InnerException.Message);

                strError += String.Format("{0}{0}It looks like it contains something unexpected...{0}send the file '{1}' to [email protected] along with this error message",
                    Environment.NewLine, strTbxAnaXmlFilename);

                MessageBox.Show(strError, cstrCaption);
                return;
            }
            /*
            AnaTextExamplesXML.shoeboxRow aSbxRow = null;
            if (xmlFileCorpus != null)
            {
                System.Diagnostics.Debug.Assert(xmlFileCorpus.shoebox.Count > 0);
                aSbxRow = xmlFileCorpus.shoebox[0];
            }
            */
            foreach (AnaTextExamplesXML.tRow aTRow in xmlFile.t)
            {
                /*
                if (xmlFileCorpus != null)
                    xmlFileCorpus.t.AddtRow(aTRow.value, aSbxRow);
                */
                TextExample te = new TextExample(aTRow.value, aTRow.value);
                foreach (AnaTextExamplesXML.wRow aWRow in aTRow.GetwRows())
                {
                    /*
                    if (xmlFileCorpus != null)
                        xmlFileCorpus.w.AddwRow(aWRow.value, aWRow.a, aWRow.d, aWRow.cat, aWRow.u,
                            ((aWRow.IspNull()) ? null : aWRow.p),
                            ((aWRow.IsnNull()) ? null : aWRow.n),
                            ((aWRow.IsfNull()) ? null : aWRow.f), aTRow);
                    */
                    string strAnalysis = aWRow.a;
                    WordExample we = new WordExample(aWRow.value, strAnalysis, aWRow.cat);
                    te.Add(we);

                    // probably expand this, but for now, get an indexing mechanism to these examples
                    // split the analysis content to get the list of morphenames
                    int nIndex = 0;
                    if (strAnalysis[0] == '%')
                    {
                        nIndex = strAnalysis.IndexOf('%', 2);
                        System.Diagnostics.Debug.Assert(nIndex != -1);
                        strAnalysis = strAnalysis.Substring(nIndex + 1);
                    }

                    string[] astrAmbiguities = strAnalysis.Split(caSplitAmbsChars, StringSplitOptions.RemoveEmptyEntries);
                    foreach (string strAmbiguity in astrAmbiguities)
                    {
                        string strType = cstrPOS;
                        string[] astrMorphnames = strAmbiguity.Split(caSplitChars, StringSplitOptions.RemoveEmptyEntries);
                        foreach (string strMorphname in astrMorphnames)
                        {
                            // save the type of the token in one map (so we can trigger tooltips on the full list
                            m_mapTokensToType[strMorphname] = strType;
                            strType = cstrMorphname;

                            // put the list of TextExamples into another map keyed on the token (but it may
                            //  already exist)
                            AddMapToTeList(strMorphname, te);
                        }
                    }
                }
            }
            #endif
        }
Beispiel #3
0
        protected RichTextBox InitializeRTB(TextExample te)
        {
            RichTextBox rtb = new RichTextBox();
            rtb.Multiline = false;
            rtb.Text = te.Text;
            rtb.ReadOnly = true;
            rtb.AutoWordSelection = true;
            rtb.BorderStyle = BorderStyle.FixedSingle;
            rtb.Font = m_fontExamples;
            rtb.WordWrap = false;

            if (m_g == null)
                m_g = Graphics.FromHwnd(this.Handle);

            Size sz = TextRenderer.MeasureText(m_g, te.Text, m_fontExamples);
            sz.Height += m_nPadFactor;
            sz.Width += m_nIndentFactor;

            rtb.Size = sz;
            rtb.Margin = m_paddingRTF;
            rtb.SelectionIndent = m_nIndentFactor;

            rtb.Tag = te;
            rtb.MouseUp += new MouseEventHandler(rtb_MouseUp);
            return rtb;
        }
Beispiel #4
0
        protected void AddMapToTeList(string strMorphname, TextExample te)
        {
            List<TextExample> teList = null;
            if (!m_mapTextExamples.TryGetValue(strMorphname, out teList))
                teList = new List<TextExample>();

            if (!teList.Contains(te))
                teList.Add(te);
            else
                return;

            m_mapTextExamples[strMorphname] = teList;
        }