Beispiel #1
0
		/// <summary>
		/// Read input data from database
		/// </summary>
		/// <param name="smp">
		/// <returns></returns>

		List<CompoundStructureActivityData> ReadData(
			SasMapParms smp)
		{
			MetaColumn activityMc = smp.EndpointMc;
			QueryColumn keyCriteriaQc = smp.KeyCriteriaQc;

			AssertMx.IsNotNull(activityMc, "mc");
			AssertMx.IsNotNull(keyCriteriaQc, "keyCriteriaQc");

			MetaTable mt, mt2;
			MetaColumn mc2 = null;

			Query q = new Query();
			mt = activityMc.MetaTable;
			QueryTable qt = new QueryTable(mt);
			if (mt.SummarizedExists && !mt.UseSummarizedData)
			{ // retrieve summarized data if exists 
				mt2 = MetaTableCollection.Get(mt.Name + MetaTable.SummarySuffix);
				if (mt2 != null)
				{
					mc2 = mt2.GetMetaColumnByName(activityMc.Name);
					if (mc2 == null) mc2 = mt2.GetMetaColumnByLabel(activityMc.Label);
				}

				if (mc2 != null) // same column available in summarized?
				{
					mt = mt2;
					activityMc = mc2;
				}
			}

			SMP.KeyCriteriaQc.CopyCriteriaToQueryKeyCritera(q);
			q.KeyCriteriaDisplay = SMP.KeyCriteriaQc.CriteriaDisplay;

			qt.SelectKeyOnly();
			QueryColumn qc = qt.GetQueryColumnByName(activityMc.Name);
			qc.Selected = true;
			q.AddQueryTable(qt);

			QueryEngine qe = new QueryEngine();
			List<string> keyList = qe.ExecuteQuery(q); // note that keylist may be empty if single-step query

			HashSet<string> keySet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

			List<CompoundStructureActivityData> data = new List<CompoundStructureActivityData>();

			int rowCount = 0;
			while (true)
			{
				object[] vo = qe.NextRow();
				if (vo == null) break;
				CompoundStructureActivityData cd = new CompoundStructureActivityData();
				string cid = (string)vo[0];
				cd.Cid = cid;
				keySet.Add(cid); // accumulate keys

				object val = vo[2];
				if (NullValue.IsNull(val)) continue;
				if (val is double)
					cd.Activity = (double)val;
				else if (val is Int32)
					cd.Activity = (Int32)val;

				else if (val is NumberMx)
				{
					NumberMx nex = val as NumberMx;
					cd.Activity = nex.Value;
				}

				else if (val is QualifiedNumber)
				{
					QualifiedNumber qn = val as QualifiedNumber;
					cd.Activity = qn.NumberValue;
					//if (qn.Qualifier != null && qn.Qualifier != "" && qn.Qualifier != "=")
					//	continue; // (don't want to do this since may filter out good data (e.g. IC50 <0.0001))
				}

				else continue;

				if (cd.Activity == NullValue.NullNumber) continue;

				data.Add(cd);
				rowCount++;
			}

			// Retrieve structures

			keyList = new List<string>(keySet);
			Dictionary<string, MoleculeMx> csDict = MoleculeUtil.SelectMoleculesForCidList(keyList, qt.MetaTable); // get the structures in a single step

			// Add structures and build/store fingerprints to data

			DebugLog.Message("========== Fingerprints ============");

			foreach (CompoundStructureActivityData cd in data)
			{

				if (!csDict.ContainsKey(cd.Cid) || csDict[cd.Cid] == null) continue;

				if (cd.Cid == "111" || cd.Cid == "222") csDict = csDict; // debug

				MoleculeMx cs = csDict[cd.Cid];
				cd.Structure = cs;

				FingerprintType fpType = FingerprintType.Circular;
				int fpSubtype = -1;

				if (SMP.SimilarityType == SimilaritySearchType.ECFP4) // some issue with ECFP4?
				{
					fpType = FingerprintType.Circular;
					fpSubtype = CircularFingerprintType.ECFP4;
				}

				else if (SMP.SimilarityType == SimilaritySearchType.Normal)
				{
					fpType = FingerprintType.MACCS;
				}

				cd.BitsetFingerprint = cs.BuildBitSetFingerprint(fpType, fpSubtype);
				if (cd.BitsetFingerprint == null) continue; // couldn't build fingerprint (e.g. no structure)

				if (Debug) DebugLog.Message(cd.Cid + ": " + Lex.Join(CdkMolUtil.GetBitSet(cd.BitsetFingerprint), ", "));
			}

			return data;
		}
Beispiel #2
0
        /// <summary>
        /// Search button clicked, process input
        /// </summary>
        /// <returns></returns>

        DialogResult ProcessInput()
        {
            CidList      cidList             = null;
            StreamReader structureFileReader = null;
            string       qid;       // query identifier, compoundId, file name or sdFile key value
            QueryManager qm;
            DataTableMx  dt;
            DataColumn   dc;
            DataRowMx    dr;
            //object[] dr; // if using Qe
            DialogResult   dlgRslt = DialogResult.OK;
            Query          q = null;
            QueryTable     qt = null;
            QueryColumn    simScoreQc, structQc;          // query column containing latest query settings
            MetaTable      mt = null;
            MetaColumn     keyMc = null, structMc = null, dbSetMc = null, simScoreMc = null, mc;
            MetaColumnType storageType;
            string         txt, tok;

            if (DatabasesToSearch.Text == "")
            {
                MessageBoxMx.ShowError("Databases to search must be defined.");
                DatabasesToSearch.Focus();
                return(DialogResult.Cancel);
            }

            // Get list of databases

            string[]         dba    = DatabasesToSearch.Text.Split(',');
            List <MetaTable> tables = new List <MetaTable>();

            foreach (string dbLabel0 in dba)
            {
                string dbLabel = dbLabel0.Trim();

                RootTable dbInfo = RootTable.GetFromTableLabel(dbLabel);
                if (dbInfo == null)
                {
                    MessageBoxMx.ShowError("Can't find database " + DatabasesToSearch.Text);
                    DatabasesToSearch.Focus();
                    return(DialogResult.Cancel);
                }

                mt = MetaTableCollection.Get(dbInfo.MetaTableName);
                if (mt == null)
                {
                    MessageBoxMx.ShowError("Unable to locate parent structure table for database: " + DatabasesToSearch.Text);
                    DatabasesToSearch.Focus();
                    return(DialogResult.Cancel);
                }

                if (dbSetMc == null)
                {
                    dbSetMc = mt.DatabaseListMetaColumn;
                }

                tables.Add(mt);
            }

            if (dbSetMc == null)
            {
                throw new Exception("\"Databases\" metacolumn not found for any of the databases to search");
            }

            // Validate other form values

            RetrieveStructures = RetrieveMatchingStructures.Checked;

            bool fromList        = FromList.Checked;
            int  listCidsRead    = 0;
            int  inputQueryCount = -1;

            if (fromList)             // using list, validate list name
            {
                if (SavedListUo == null)
                {
                    MessageBoxMx.ShowError("Compound list must be defined.");
                    ListName.Focus();
                    return(DialogResult.Cancel);
                }

                cidList = CidListCommand.Read(SavedListUo);
                if (cidList == null)
                {
                    MessageBoxMx.ShowError("Error reading list.");
                    ListName.Focus();
                    return(DialogResult.Cancel);
                }

                inputQueryCount = cidList.Count;
            }

            else             // Using SdFile, validate SdFile name
            {
                StructureFile = FileName.Text;
                if (StructureFile == "")
                {
                    MessageBoxMx.ShowError("File must be defined.");
                    FileName.Focus();
                    return(DialogResult.Cancel);
                }

                try { structureFileReader = new StreamReader(StructureFile); structureFileReader.Close(); }
                catch (Exception ex)
                {
                    MessageBoxMx.ShowError("Can't read file: " + Lex.Dq(StructureFile));
                    FileName.Focus();
                    return(DialogResult.Cancel);
                }

                keyField = KeyField.Text;             // get key, blank to use name in 1st line

                inputQueryCount = -1;                 // don't know how many queries unless we read the file (todo?)
            }

            tok = ResultsName.Text.Trim();             // name to store results under
            if (tok == "")
            {
                MessageBoxMx.ShowError("A name for the results must be provided.");
                ResultsName.Focus();
                return(DialogResult.Cancel);
            }

            if (SubStruct.Checked)
            {
                Psc.SearchType = StructureSearchType.Substructure;
            }
            else if (Full.Checked)
            {
                Psc.SearchType = StructureSearchType.FullStructure;
            }
            else if (Similarity.Checked)
            {
                Psc.SearchType = StructureSearchType.MolSim;
            }
            else
            {
                throw new Exception("Unrecognized search type");
            }

            // Write initial log entries

            SearchCount++;
            string logFileName = ClientDirs.DefaultMobiusUserDocumentsFolder + @"\Multistructure Search " + SearchCount + ".txt";

            if (!UIMisc.CanWriteFileToDefaultDir(logFileName))
            {
                return(DialogResult.Cancel);
            }
            LogStream = new StreamWriter(logFileName);

            if (ResultsUo == null)
            {
                ResultsUo = new UserObject(UserObjectType.Annotation);
            }
            ResultsUo.Name = tok;
            UserObjectTree.GetValidUserObjectTypeFolder(ResultsUo);

            DateTime startTime = DateTime.Now;

            WriteToLog("Multiple " + Psc.SearchType + " Search");
            WriteToLog("Databases: " + DatabasesToSearch.Text);
            WriteToLog("Date: " + startTime);
            if (fromList)
            {
                WriteToLog("Input List: " + SavedListUo.Name);
            }
            else
            {
                WriteToLog("Input Structure File: " + StructureFile);
            }
            WriteToLog("Output List: " + ResultsUo.Name);

            WriteToLog("Log File: " + logFileName);
            WriteToLog("");
            WriteToLog("Query, Match, Score");

            int              queryCount           = 0;
            int              matchAtLeastOneCount = 0;
            MoleculeMx       queryStructure       = null; // current structure being searched
            CidList          matchList            = new CidList();
            List <MatchData> matchData            = new List <MatchData>();

            if (FromFile.Checked)             // open SdFile as required
            {
                structureFileReader = new StreamReader(StructureFile);
            }

            // Search of structures one at a time

            while (true)
            {
                if (fromList)                 // get next structure from list
                {
                    if (listCidsRead >= cidList.Count)
                    {
                        break;
                    }
                    qid = cidList[listCidsRead].Cid;
                    listCidsRead++;
                    if (qid.Trim() == "")
                    {
                        continue;
                    }
                    if (qid.ToLower().IndexOf(".mol") > 0 || qid.ToLower().IndexOf(".skc") > 0)
                    {                     // file reference
                        if (!File.Exists(qid))
                        {
                            continue;
                        }
                        if (qid.ToLower().IndexOf(".mol") > 0)
                        {
                            queryStructure = MoleculeMx.ReadMolfile(qid);
                        }
                        else
                        {
                            queryStructure = MoleculeMx.ReadSketchFile(qid);
                        }
                    }

                    else
                    {
                        queryStructure = MoleculeUtil.SelectMoleculeForCid(qid);
                    }
                    if (queryStructure == null || queryStructure.AtomCount == 0)
                    {
                        continue;                                                                              // oops
                    }
                }

                else                 // get next structure from input file
                {
                    qid = null;

                    if (StructureFile.ToLower().EndsWith(".sdf"))
                    {
                        List <SdFileField> fList = SdFileDao.Read(structureFileReader);
                        if (fList == null)                         // end of sdFile
                        {
                            structureFileReader.Close();
                            break;
                        }
                        if (fList.Count == 0)
                        {
                            continue;
                        }

                        queryStructure = new MoleculeMx(MoleculeFormat.Molfile, fList[0].Data);
                        if (queryStructure == null || queryStructure.AtomCount == 0)
                        {
                            continue;
                        }

                        if (keyField != "")                         // key field specified?
                        {
                            qid = SdFileDao.GetDataField(fList, keyField);
                        }
                        else                         // get name from 1st line of molfile
                        {
                            string molFile = fList[0].Data;
                            int    i1      = molFile.IndexOf("\n");
                            if (i1 == 0)
                            {
                                qid = "";
                            }
                            else
                            {
                                qid = molFile.Substring(0, i1).Trim();
                            }
                        }
                        if (string.IsNullOrEmpty(qid))
                        {
                            qid = SdFileDao.GetDataField(fList, "compound_id");
                        }
                    }

                    else                     // assume smiles file
                    {
                        string smiles = structureFileReader.ReadLine();
                        if (smiles == null)                         // end of sdFile
                        {
                            structureFileReader.Close();
                            break;
                        }
                        smiles = smiles.Trim();
                        if (smiles.Length == 0)
                        {
                            continue;
                        }

                        int i1 = smiles.IndexOf(",");                         // get any preceeding queryId
                        if (i1 < 0)
                        {
                            i1 = smiles.IndexOf("\t");
                        }
                        if (i1 >= 0)
                        {
                            qid    = smiles.Substring(0, i1).Trim();
                            smiles = smiles.Substring(i1 + 1).Trim();
                        }

                        queryStructure = new MoleculeMx(MoleculeFormat.Smiles, smiles);
                        if (queryStructure == null || queryStructure.AtomCount == 0)
                        {
                            continue;
                        }
                    }

                    if (qid == null || qid.Trim() == "")
                    {
                        qid = (queryCount + 1).ToString();                                                      // be sure we have a query id
                    }
                }

                queryCount++;                 // count the query
                if (queryStructure == null || queryStructure.AtomCount == 0)
                {
                    WriteToLog("Error converting specific structure " + queryCount.ToString() + ", " + qid);
                    continue;
                }

                queryStructure.RemoveStructureCaption();                 // remove any Mobius-added caption
                Psc.Molecule = queryStructure;

                string msg =
                    "Searching Structure: " + queryCount.ToString();
                if (inputQueryCount > 0)
                {
                    msg += " of " + inputQueryCount.ToString();
                }
                msg += "\n" +
                       "Structures with one or more matches: " + matchAtLeastOneCount.ToString() + "\n" +
                       "Total Matches: " + matchList.Count.ToString();

                Progress.Show(msg);

                // Do the search over the list of databases

                for (int ti = 0; ti < tables.Count; ti++)
                {
                    mt = tables[ti];

                    q = new Query();                     // build basic query
                    //q.SingleStepExecution = true; // do in single step (doesn't currently return sim score)
                    q.ShowStereoComments = false;

                    qt = new QueryTable(mt);

                    q.AddQueryTable(qt);
                    qt.SelectKeyOnly();                     // start selecting desired cols

                    keyMc = mt.KeyMetaColumn;

                    structMc          = mt.FirstStructureMetaColumn;
                    structQc          = qt.GetQueryColumnByName(structMc.Name);
                    structQc.Selected = RetrieveStructures;

                    dbSetMc = mt.DatabaseListMetaColumn;
                    if (dbSetMc == null)
                    {
                        throw new Exception("\"Databases\" metacolumn not found for table: " + mt.Label);
                    }
                    QueryColumn dbSetQc = qt.GetQueryColumnByName(dbSetMc.Name);
                    dbSetQc.Selected = true;                     // select the database name
                    RootTable root = RootTable.GetFromTableName(mt.Name);
                    txt = " in (" + root.Label + ")";
                    dbSetQc.Criteria        = dbSetMc.Name + txt;
                    dbSetQc.CriteriaDisplay = txt;

                    simScoreMc = mt.SimilarityScoreMetaColumn;
                    simScoreQc = null;
                    if (simScoreMc != null)                     // get sim score if it exists
                    {
                        simScoreQc          = qt.GetQueryColumnByName(simScoreMc.Name);
                        simScoreQc.Selected = true;                         // return sim score
                    }

                    Psc.QueryColumn = structQc;
                    ParsedStructureCriteria psc2 = AdjustSearchForSmallWorldAsNeeded(Psc);
                    psc2.ConvertToQueryColumnCriteria(structQc);                     // format the QC for the structure search

                    DateTime t0 = DateTime.Now;

                    //QueryEngine qe = new QueryEngine();
                    //qe.NextRowsMin = 1000; // minimum number of rows to prefetch
                    //qe.NextRowsMax = -1; // maximum number of rows to prefetch
                    //qe.NextRowsMaxTime = 10000; // max time in milliseconds for next fetch
                    //qe.ExecuteQuery(q);

                    qm = new QueryManager();
                    try { dlgRslt = qm.ExecuteQuery(ref q); }
                    catch (Exception ex)
                    {
                        WriteToLog("Error searching structure: " + ex.Message + ", " + queryCount.ToString() + ", " + qid);
                        continue;
                    }

                    if (dlgRslt != DialogResult.OK)
                    {
                        return(dlgRslt);
                    }

                    double executeTime = TimeOfDay.Delta(ref t0);

                    int offset = qm.DataTableManager.KeyValueVoPos + 1;
                    //int offset = 0; // for QE
                    int keyPos = offset++;
                    int strPos = RetrieveStructures ? offset++ : -1;
                    int dbPos  = offset++;
                    int simPos = offset++;

                    int fetchCnt = 0;
                    while (true)
                    {
                        dr = qm.NextRow();
                        //dr = qe.NextRow(); // for Qe
                        if (dr == null)
                        {
                            break;
                        }

                        fetchCnt++;

                        if (fetchCnt == 1)
                        {
                            matchAtLeastOneCount++;                             // number of queries that have at least one match
                        }
                        MatchData md = new MatchData();
                        md.Qno = queryCount;
                        md.Qid = qid;
                        if (RetrieveStructures)
                        {
                            md.Qstr = "Chime=" + queryStructure.GetChimeString();
                        }

                        CompoundId cid = CompoundId.ConvertTo(dr[keyPos]);
                        md.Mid = cid.Value;

                        if (RetrieveStructures)
                        {
                            MoleculeMx ms = MoleculeMx.ConvertTo(dr[strPos]);
                            if (!NullValue.IsNull(ms))
                            {
                                md.Mstr = "Chime=" + ms.GetChimeString();
                            }
                        }

                        StringMx db = StringMx.ConvertTo(dr[dbPos]);
                        if (!NullValue.IsNull(db))
                        {
                            md.Db = db.Value;
                        }

                        if (Psc.SearchType == StructureSearchType.MolSim)
                        {
                            NumberMx nex = NumberMx.ConvertTo(dr[simPos]);
                            if (!NullValue.IsNull(nex))
                            {
                                md.Score = nex.Value;
                            }
                        }

                        if (matchList.Contains(cid.Value))                         // already have compound id as match for other query?
                        {
                            if (Psc.SearchType != StructureSearchType.MolSim)
                            {
                                continue;                                                             // if similarity search see if more similar
                            }
                            CidListElement le = matchList.Get(cid.Value);                             // reference current score
                            if (le.Tag > md.Score)
                            {
                                continue;                                         // only replace if more similar
                            }
                            matchList.Remove(le.Cid);                             // remove from list
                            for (int mi = 0; mi < matchData.Count; mi++)          // remove from data
                            {
                                if (matchData[mi].Mid == md.Mid)
                                {
                                    matchData.RemoveAt(mi);
                                    break;
                                }
                            }
                        }

                        matchList.Add(md.Mid);
                        matchList.Get(md.Mid).Tag = md.Score;      // keep score in list
                        matchData.Add(md);                         // add to results

                        txt = md.Qid + ", " + md.Mid + ", " + md.Score.ToString();
                        WriteToLog(txt);
                    }                     // Fetch result loop

                    double fetchTime = TimeOfDay.Delta(ref t0);
                }                 // DB loop

                if (Progress.CancelRequested)
                {
                    Progress.Hide();
                    MessageBoxMx.ShowError("Search cancelled.");
                    try { LogStream.Close(); } catch { }
                    return(DialogResult.Cancel);
                }
            }                                           // key loop

            CidListCommand.WriteCurrentList(matchList); // write the list of numbers

            UsageDao.LogEvent("MultipleStructSearch");

            txt =
                "=== Multiple structure search complete ===\r\n\r\n" +

                "Structures Searched: " + queryCount.ToString() + "\r\n";

            txt +=
                "Structures with one or more matches: " + matchAtLeastOneCount.ToString() + "\r\n" +
                "Total Matches: " + matchList.Count.ToString() + "\r\n";

            TimeSpan ts = DateTime.Now.Subtract(startTime);

            ts   = new TimeSpan(ts.Hours, ts.Minutes, ts.Seconds);
            txt += "Total Time: " + ts + "\r\n\r\n";

            WriteToLog("\r\n" + txt);
            try { LogStream.Close(); } catch { }

            if (matchList.Count == 0)
            {
                Progress.Hide();
                MessageBoxMx.ShowError("No matches have been found.");
                return(DialogResult.Cancel);
            }

            tok = "Matching compound ids";
            if (Psc.SearchType == StructureSearchType.MolSim)
            {
                tok = "Similar compound ids";
            }
            txt += tok + " have been written to the current list: " + ResultsUo.Name + "\n" +
                   "Log file written to: " + logFileName + "\n\n" +
                   "Do you want to view the match results?";

            DialogResult dRslt = MessageBoxMx.Show(txt, "Multiple Structure Search", MessageBoxButtons.YesNoCancel, MessageBoxIcon.Question);

            if (dRslt == DialogResult.Cancel)
            {
                return(DialogResult.Cancel);
            }

            else if (dRslt == DialogResult.No)             // show log
            {
                SystemUtil.StartProcess(logFileName);
                return(DialogResult.Cancel);
            }

            // Display results

            Progress.Show("Formatting results...");

            mt                = new MetaTable();
            mt.Name           = "MULTSTRUCTSEARCH_" + SearchCount;
            mt.Label          = "Multiple Structure Search " + SearchCount;
            mt.TableMap       = "Mobius.Tools.MultStructSearch";       // plugin id
            mt.MetaBrokerType = MetaBrokerType.NoSql;

            ColumnSelectionEnum structureColumnSelection = RetrieveStructures ? ColumnSelectionEnum.Selected : ColumnSelectionEnum.Unselected;

            keyMc       = keyMc.Clone();
            keyMc.Name  = "MatchingCid";
            keyMc.Label = "Matching Compound Id";
            mt.AddMetaColumn(keyMc);

            structMc                  = structMc.Clone();
            structMc.Name             = "MatchingStructure";
            structMc.Label            = "Matching Structure";
            structMc.InitialSelection = structureColumnSelection;
            mt.AddMetaColumn(structMc);

            dbSetMc      = dbSetMc.Clone();
            dbSetMc.Name = "Database";
            mt.AddMetaColumn(dbSetMc);
            //if (DatabasesToSearch.Text.Contains(","))
            dbSetMc.InitialSelection = ColumnSelectionEnum.Selected;

            mc = mt.AddMetaColumn("Molsimilarity", "Similarity Search Score", MetaColumnType.Number, ColumnSelectionEnum.Unselected, 10);
            if (Psc.SearchType == StructureSearchType.MolSim)
            {
                mc.InitialSelection = ColumnSelectionEnum.Selected;
            }

            mc = mt.AddMetaColumn("QueryNo", "Query Number", MetaColumnType.Integer);
            //mc = mt.AddMetaColumn("QueryMatchNo", "Query Match Number", MetaColumnType.Integer);

            mc = mt.AddMetaColumn("QueryId", "Query Id", MetaColumnType.String);
            mc = mt.AddMetaColumn("QueryStructure", "Query Structure", MetaColumnType.Structure);
            mc.InitialSelection = structureColumnSelection;

            q  = ToolHelper.InitEmbeddedDataToolQuery(mt);
            dt = q.ResultsDataTable as DataTableMx;

            for (int mi = 0; mi < matchData.Count; mi++)
            {
                MatchData md = matchData[mi];
                dr = dt.NewRow();
                dr[qt.Alias + ".MatchingCid"] = new CompoundId(md.Mid);
                if (RetrieveStructures)
                {
                    dr[qt.Alias + ".MatchingStructure"] = new MoleculeMx(MoleculeFormat.Chime, md.Mstr);
                }
                dr[qt.Alias + ".Database"] = new StringMx(md.Db);
                if (Psc.SearchType == StructureSearchType.MolSim)
                {
                    dr[qt.Alias + ".Molsimilarity"] = new NumberMx(md.Score);
                }
                dr[qt.Alias + ".QueryNo"] = new NumberMx(md.Qno);
                dr[qt.Alias + ".QueryId"] = new StringMx(md.Qid);
                if (RetrieveStructures)
                {
                    dr[qt.Alias + ".QueryStructure"] = new MoleculeMx(MoleculeFormat.Chime, md.Qstr);
                }

                dt.Rows.Add(dr);
            }

            ToolHelper.DisplayData(q, dt, true);

            Progress.Hide();
            return(DialogResult.OK);
        }
Beispiel #3
0
        /// <summary>
        /// OK button clicked, process input
        /// </summary>
        /// <returns></returns>

        DialogResult ProcessInput()
        {
            int           rgCount;                              // number of Rgroups
            List <Rgroup> rgList;                               // list of existing Rgroups

            bool[]        rgExists;                             // entry = true if rgroup exists in core
            Rgroup        rg;
            bool          oneD, twoD;                           // matrix dimensionality
            List <string> keys = null;
            Dictionary <string, List <QualifiedNumber> > mElem; // matrix element dictionary

            List <RgroupSubstituent>[] rSubs;                   // substituents seen for each Rgroup
            Query        q, q0, q2;
            QueryTable   qt, qt2;
            QueryColumn  qc, qc2;
            MetaTable    mt, mt2;
            MetaColumn   mc, mc2;
            DataTableMx  dt;
            DataRowMx    dr;
            DialogResult dlgRslt;
            string       tok;
            int          ri, rii, si, qti, qci, bi, bi2;

            // Get core structure & list of R-groups

            MoleculeMx core = new MoleculeMx(MoleculeFormat.Molfile, SQuery.MolfileString);

            if (core.AtomCount == 0)
            {
                MessageBoxMx.ShowError("A Core structure with R-groups must be defined");
                return(DialogResult.None);
            }

            if (!Structure.Checked && !Smiles.Checked && !Formula.Checked &&
                !Weight.Checked && !Index.Checked)
            {
                MessageBoxMx.ShowError("At least one substituent display format must be selected.");
                return(DialogResult.None);
            }

            mt = MetaTableCollection.GetWithException("Rgroup_Decomposition");
            qt = new QueryTable(mt);
            qc = qt.GetQueryColumnByNameWithException("Core");

            qc.MolString       = core.GetMolfileString();       // put core structure into table criteria
            qc.CriteriaDisplay = "Substructure search (SSS)";
            qc.Criteria        = "CORE SSS SQUERY";

            qc = qt.GetQueryColumnByNameWithException("R1_Structure");
            if (ShowCoreStructure.Checked)
            {
                qc.Label            = "R-group, Core\tChime=" + core.GetChimeString();      // reference core in query col header label
                qc.MetaColumn.Width = 25;
            }

            RgroupDecomposition.SetSelected(qt, "R1_Structure", Structure.Checked);             // select for retrieval if checked
            RgroupDecomposition.SetSelected(qt, "R1_Smiles", Smiles.Checked);
            RgroupDecomposition.SetSelected(qt, "R1_Formula", Formula.Checked);
            RgroupDecomposition.SetSelected(qt, "R1_Weight", Weight.Checked);
            RgroupDecomposition.SetSelected(qt, "R1_SubstNo", Index.Checked);

            string terminateOption = "First mapping";             // terminate on first complete match

            qc                 = qt.GetQueryColumnByNameWithException("Terminate_Option");
            qc.Criteria        = qt.MetaTable.Name + " = " + Lex.AddSingleQuotes(terminateOption);
            qc.CriteriaDisplay = "= " + Lex.AddSingleQuotes(terminateOption);

            QueryTable rgdQt = qt;             // keep a ref to it

            if (QbUtil.Query == null || QbUtil.Query.Tables.Count == 0)
            {
                MessageBoxMx.ShowError("No current query.");
                return(DialogResult.None);
            }

            q0 = QbUtil.Query;          // original query this analysis is based on
            q  = q0.Clone();            // make copy of source query we can modify
            q.SingleStepExecution = false;

            qti = 0;
            while (qti < q.Tables.Count)             // deselect query columns that we don't want
            {
                qt = q.Tables[qti];
                if (Lex.Eq(qt.MetaTable.Name, "Rgroup_Decomposition"))
                {                 // remove any rgroup decomp table
                    qti++;
                    continue;
                }

                mt = qt.MetaTable;
                if (mt.MultiPivot ||                                 // check for tables not allowed in underlying query
                    mt.MetaBrokerType == MetaBrokerType.CalcField || // (called ShouldPresearchAndTransform previously)
                    mt.MetaBrokerType == MetaBrokerType.MultiTable ||
                    mt.MetaBrokerType == MetaBrokerType.RgroupDecomp)
                {
                    MessageBoxMx.ShowError("Multipivot/Rgroup table \"" + qt.ActiveLabel +
                                           "\" can't be included in an underlying Rgroup Matrix query");
                    return(DialogResult.None);
                }

                for (qci = 0; qci < qt.QueryColumns.Count; qci++)
                {
                    qc = qt.QueryColumns[qci];
                    if (qc.MetaColumn == null)
                    {
                        continue;
                    }

                    switch (qc.MetaColumn.DataType)
                    {
                    case MetaColumnType.CompoundId:                             // keep only these
                    case MetaColumnType.Integer:
                    case MetaColumnType.Number:
                    case MetaColumnType.QualifiedNo:
                    case MetaColumnType.String:
                        break;

                    default:
                        qc.Selected = false;
                        break;
                    }
                }

                qti++;
            }

            q.AddQueryTable(rgdQt);             // Add Rgroup decom table to end of cloned source query

            Progress.Show("Retrieving data...");
            try
            {
                dlgRslt = ToolHelper.ExecuteQuery(ref q, out keys);
                if (dlgRslt != DialogResult.OK)
                {
                    return(dlgRslt);
                }
            }

            catch (Exception ex)
            {
                MessageBoxMx.ShowError("Error executing query:\r\n" + ex.Message);
                return(DialogResult.None);
            }

            if (keys == null || keys.Count == 0)
            {
                Progress.Hide();
                MessageBoxMx.ShowError("No results were returned by the query.");
                return(DialogResult.None);
            }

// Scan modified query to get list of rgroup indexes that are present

            rgExists = new bool[32];
            rgList   = new List <Rgroup>();

            QueryTable rgQt = q.GetQueryTableByName("Rgroup_Decomposition");

            foreach (QueryColumn qc0 in rgQt.QueryColumns)
            {
                mc = qc0.MetaColumn;
                if (!(mc.Name.StartsWith("R") && mc.Name.EndsWith("_STRUCTURE") && qc0.Selected))
                {
                    continue;                     // skip if not a selected Rgroup structure
                }
                int len = mc.Name.Length - ("R" + "_STRUCTURE").Length;
                tok = mc.Name.Substring(1, len);
                if (!int.TryParse(tok, out ri))
                {
                    continue;
                }
                rgExists[ri - 1] = true;
                rg        = new Rgroup();
                rg.RIndex = ri;
                rg.VoPos  = qc0.VoPosition;
                rgList.Add(rg);
            }

            for (bi = 1; bi < rgList.Count; bi++)
            {             // sort by increasing R index
                rg = rgList[bi];
                for (bi2 = bi - 1; bi2 >= 0; bi2--)
                {
                    if (rg.RIndex >= rgList[bi2].RIndex)
                    {
                        break;
                    }
                    rgList[bi2 + 1] = rgList[bi2];
                }

                rgList[bi2 + 1] = rg;
            }

            rgCount = rgList.Count;

            twoD = TwoD.Checked;
            if (rgCount == 1)
            {
                twoD = false;                           // if only 1 rgroup can't do as 2d
            }
            oneD = !twoD;

// Read data into mElem and rgroup substituents into rSubs.
// Matrix mElem is keyed on [R1Smiles, R2Smiles,... RnSmiles, FieldName] for 1d and
// [R1Smiles, R2Smiles,... FieldName, RnSmiles] for 2d

            QueryManager     qm  = q.QueryManager as QueryManager;
            DataTableManager dtm = qm.DataTableManager;

            dt = qm.DataTable;

            mElem = new Dictionary <string, List <QualifiedNumber> >(); // matrix element dictionary
            rSubs = new List <RgroupSubstituent> [32];                  // list of substituents seen for each Rgroup
            for (rii = 0; rii < rgCount; rii++)                         // alloc substituent list for rgroup
            {
                rSubs[rii] = new List <RgroupSubstituent>();
            }

            int rowCount = 0;

            while (true)
            {             // scan data accumulating rgroup substituents and data values
                dr = dtm.FetchNextDataRow();
                if (dr == null)
                {
                    break;
                }
                rowCount++;

                string cid = dr[dtm.KeyValueVoPos] as string;
                string lastMapCid = "", rgroupKey = "", rgroupKeyLast = "";
                int    mapCount = 0;
                for (rii = 0; rii < rgCount; rii++)                 // for
                {
                    MoleculeMx rSub = dr[rgList[rii].VoPos] as MoleculeMx;
                    if (rSub == null || rSub.AtomCount == 0)
                    {
                        continue;
                    }

                    ri = rgList[rii].RIndex;                     // actual R index in query
                    int subIdx = RgroupSubstituent.Get(rSub, rSubs[rii]);
                    //					if (ri == 1 && subIdx != 0) subIdx = subIdx; // debug
                    if (subIdx < 0)
                    {
                        continue;
                    }
                    string rKey = "R" + ri.ToString() + "_" + (subIdx + 1).ToString();

                    if (oneD || rii < rgCount - 1)
                    {
                        if (rgroupKey != "")
                        {
                            rgroupKey += "\t";
                        }
                        rgroupKey += rKey;
                    }

                    else
                    {
                        rgroupKeyLast = rKey;
                    }
                    lastMapCid = cid;
                    mapCount++;
                }

                if (lastMapCid == cid)                 // add the data if compound has a mapping
                {
                    AccumulateMatrixElements(mElem, q, dr, rgroupKey, rgroupKeyLast, cid);
                }

                if (Progress.IsTimeToUpdate)
                {
                    Progress.Show("Retrieving data: " + StringMx.FormatIntegerWithCommas(rowCount) + " rows...");
                }
            }
            if (rowCount == 0)
            {
                Progress.Hide();
                MessageBoxMx.ShowError("No data rows retrieved");
                return(DialogResult.None);
            }

            if (twoD && (rSubs[rgCount - 1] == null || rSubs[rgCount - 1].Count == 0))
            {             // if 2D be sure we have at least one substituent for the last Rgroup
                Progress.Hide();
                MessageBoxMx.ShowError("No substituents found for R" + rgCount.ToString());
                return(DialogResult.None);
            }

            // Create a MetaTable & DataTable for matrix results

            Progress.Show("Analyzing data...");

            mt = new MetaTable();             // create output table
            MatrixCount++;
            mt.Name           = "RGROUPMATRIX_" + MatrixCount;
            mt.Label          = "R-group Matrix " + MatrixCount;
            mt.MetaBrokerType = MetaBrokerType.RgroupDecomp;

            mc =                       // use sequence for key
                 mt.AddMetaColumn("RgroupMatrixId", "No.", MetaColumnType.Integer, ColumnSelectionEnum.Selected, 3);
            mc.ClickFunction = "None"; // avoid hyperlink on this key
            mc.IsKey         = true;

            int maxLeftR = rgCount;

            if (twoD)
            {
                maxLeftR = rgCount - 1;
            }
            for (ri = 0; ri < maxLeftR; ri++)
            {
                string rStr = "R" + (ri + 1).ToString();
                if (Structure.Checked)
                {
                    mc = mt.AddMetaColumn(rStr + "Str", rStr, MetaColumnType.Structure, ColumnSelectionEnum.Selected, 12);
                    if (ri == 0 && ShowCoreStructure.Checked)                     // include core structure above R1 if requested
                    {
                        string chimeString = MoleculeMx.MolfileStringToSmilesString(SQuery.MolfileString);
                        mc.Label = "R1, Core\tChime=" + chimeString;
                        mc.Width = 25;
                    }
                }
                if (Smiles.Checked)
                {
                    mc = mt.AddMetaColumn(rStr + "Smi", rStr + " Smiles", MetaColumnType.String, ColumnSelectionEnum.Selected, 12);
                }
                if (Formula.Checked)
                {
                    mc = mt.AddMetaColumn(rStr + "Mf", rStr + " Formula", MetaColumnType.String, ColumnSelectionEnum.Selected, 8);
                }
                if (Weight.Checked)
                {
                    mc = mt.AddMetaColumn(rStr + "MW", rStr + " Mol. Wt.", MetaColumnType.Number, ColumnSelectionEnum.Selected, 6, ColumnFormatEnum.Decimal, 2);
                }

                if (Index.Checked)
                {
                    mc        = mt.AddMetaColumn(rStr + "Index", rStr + " Subst. Idx.", MetaColumnType.Number, ColumnSelectionEnum.Selected, 4);
                    mc.Format = ColumnFormatEnum.Decimal;
                }
            }

            mc =             // add column to contain result type
                 mt.AddMetaColumn("ResultType", "Result Type", MetaColumnType.String, ColumnSelectionEnum.Selected, 12);

            if (oneD)             // add just 1 column to contain results
            {
                mc = mt.AddMetaColumn("Results", "Results", MetaColumnType.QualifiedNo, ColumnSelectionEnum.Selected, 12);
                mc.MetaBrokerType = MetaBrokerType.RgroupDecomp;                 // broker to do special col handling for cond formtting
                if (QbUtil.Query.UserObject.Id > 0)
                {
                    mc.DetailsAvailable = true;
                }
            }

            else             // add col for each substituent for last rgroup
            {
                string rStr = "R" + rgCount.ToString();
                for (si = 0; si < rSubs[rgCount - 1].Count; si++)
                {
                    string            cName  = rStr + "_" + (si + 1).ToString();
                    string            cLabel = cName.Replace("_", ".");
                    RgroupSubstituent rgs    = rSubs[ri][si];  // get substituent info
                    if (Structure.Checked)                     // include structure
                    {
                        cLabel += "\tChime=" + rgs.Struct.GetChimeString();
                    }

                    else if (Smiles.Checked)
                    {
                        cLabel += " = " + rgs.Struct.GetSmilesString();
                    }

                    else if (Formula.Checked)
                    {
                        cLabel += " = " + rgs.Struct.MolFormula;
                    }

                    else if (Weight.Checked)
                    {
                        cLabel += " = " + rgs.Struct.MolWeight;
                    }

                    else if (Index.Checked)
                    {
                        cLabel += " = " + (si + 1).ToString();
                    }

                    mc = mt.AddMetaColumn(cName, cLabel, MetaColumnType.QualifiedNo, ColumnSelectionEnum.Selected, 12);
                    mc.MetaBrokerType = MetaBrokerType.RgroupDecomp;
                    if (QbUtil.Query.UserObject.Id > 0)
                    {
                        mc.DetailsAvailable = true;
                    }
                }
            }

            MetaTableCollection.UpdateGlobally(mt); // add as a known metatable

            if (mElem.Count == 0)                   // be sure we have a matrix
            {
                Progress.Hide();
                MessageBoxMx.ShowError("No matrix can be created because insufficient data was found.");
                return(DialogResult.None);
            }

            // Build the DataTable

            Progress.Show("Building data table...");

            q2  = new Query();            // build single-table query to hold matrix
            qt2 = new QueryTable(q2, mt);
            dt  = DataTableManager.BuildDataTable(q2);

            Dictionary <string, List <QualifiedNumber> > .KeyCollection kc = mElem.Keys;
            string[] rgKeys = new string[mElem.Count];
            kc.CopyTo(rgKeys, 0);
            Array.Sort(rgKeys);

            string[] rgKey = null, lastRgKey = null;
            int      rki   = 0;

            for (rki = 0; rki < rgKeys.Length; rki++)
            {
                rgKey = rgKeys[rki].Split('\t');

                int riTop = rgCount + 1;                 // all r substituents & field name on left
                if (twoD)
                {
                    riTop = rgCount;
                }

                for (ri = 0; ri < riTop; ri++)                 // see if any changes in left side substituents or field name
                {
                    if (lastRgKey == null || rgKey[ri] != lastRgKey[ri])
                    {
                        break;
                    }
                }
                if (ri < riTop || oneD)                 // if 2d then new row only if some change before last R
                {
                    dr = dt.NewRow();
                    dt.Rows.Add(dr);
                    dr[dtm.KeyValueVoPos + 1] = new NumberMx(dt.Rows.Count);                     // integer row key
                }

                if (!HideRepeatingSubstituents.Checked)
                {
                    ri = 0;                                                     // start at first if not hiding
                }
                lastRgKey = rgKey;

                for (ri = ri; ri < riTop; ri++)                 // build row with these
                {
                    string rgSub = rgKey[ri];                   // get substituent id or table.column name
                    if (rgSub == "")
                    {
                        continue;
                    }

                    if (ri < riTop - 1)
                    {                     // output substituent and/or smiles
                        string rStr = "R" + (ri + 1).ToString();
                        si = rgSub.IndexOf("_");
                        si = Int32.Parse(rgSub.Substring(si + 1)) - 1;                 // get substituent index
                        RgroupSubstituent rgs = rSubs[ri][si];                         // get substituent info

                        if (Structure.Checked)
                        {
                            qc2 = qt2.GetQueryColumnByName(rStr + "Str");
                            dr[QcToDcName(qc2)] = rgs.Struct;
                        }

                        if (Smiles.Checked)
                        {
                            qc2 = qt2.GetQueryColumnByName(rStr + "Smi");
                            dr[QcToDcName(qc2)] = new StringMx(rgs.Struct.GetSmilesString());
                        }

                        if (Formula.Checked)
                        {
                            qc2 = qt2.GetQueryColumnByName(rStr + "Mf");
                            dr[QcToDcName(qc2)] = new StringMx(rgs.Struct.MolFormula);
                        }

                        if (Weight.Checked)
                        {
                            qc2 = qt2.GetQueryColumnByName(rStr + "Mw");
                            dr[QcToDcName(qc2)] = new NumberMx(rgs.Struct.MolWeight);
                        }

                        if (Index.Checked)
                        {
                            qc2 = qt2.GetQueryColumnByName(rStr + "Index");
                            dr[QcToDcName(qc2)] = new NumberMx(si + 1);
                        }
                    }

                    else                                // output field name
                    {
                        string[] sa = rgSub.Split('.'); // get field name
                        qt = q.GetQueryTableByName(sa[0]);
                        qc = qt.GetQueryColumnByName(sa[1]);
                        string fieldName = qc.ActiveLabel;
                        if (q0.Tables.Count >= 3)                         // qualify by table if 3 or more tables in original query
                        {
                            fieldName = qt.ActiveLabel + " - " + fieldName;
                        }

                        qc2 = qt2.GetQueryColumnByName("ResultType");
                        dr[QcToDcName(qc2)] = new StringMx(fieldName);
                    }
                }

                // Output value

                string cName;
                if (oneD)
                {
                    cName = "Results";
                }
                else
                {
                    cName = rgKey[rgCount];                  // get key for this substituent (e.g. R2_1)
                }
                if (Lex.IsUndefined(cName))
                {
                    continue;                                         // may be no substituent match
                }
                qc2 = qt2.GetQueryColumnByName(cName);
                QualifiedNumber qn = SummarizeData(mElem[rgKeys[rki]]);                 // get summarized value
                dr[QcToDcName(qc2)] = qn;
            }

            ToolHelper.DisplayData(q2, dt, true);

            UsageDao.LogEvent("RgroupMatrix");
            Progress.Hide();
            return(DialogResult.OK);
        }