% % NOTE -- ONLY EDIT THE .Rnw FILE!!! The .tex file is % likely to be overwritten. % \documentclass[landscape]{article} \usepackage{amsmath,pstricks} \usepackage[authoryear,round]{natbib} \usepackage{hyperref} \usepackage{sectsty} \usepackage{landscape} \usepackage{graphics} \textwidth=10.9in \textheight=6.5in %\parskip=.3cm \oddsidemargin=.0in \evensidemargin=.0in \headheight=-.3in \newcommand{\scscst}{\scriptscriptstyle} \newcommand{\scst}{\scriptstyle} \newcommand{\dimfo}{\fontsize{14}{16}\selectfont} \newcommand{\expfo}{\fontsize{18}{22}\selectfont} \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rpackage}[1]{{\textit{#1}}} \newcommand{\Rmethod}[1]{{\texttt{#1}}} \newcommand{\Rfunarg}[1]{{\texttt{#1}}} \newcommand{\Rclass}[1]{{\textit{#1}}} \newcommand\bi{\begin{itemize}} \newcommand\ei{\end{itemize}} \textwidth=8.2in \bibliographystyle{plainnat} \renewcommand{\familydefault}{\sfdefault} \usepackage[T1]{fontenc} \newcommand\cp{{\clearpage}} \newcommand\sts[1]{\Huge \textbf{#1}} \newcommand\stsh[1]{\huge \textbf{#1}} \newcommand\stsL[1]{\Large \textbf{#1}} \newcommand\stsl[1]{\large \textbf{#1}} \begin{document} \fontsize{18}{22} \selectfont \allsectionsfont{\sffamily} \sectionfont{\fontfamily{phv}\fontsize{18}{22}\selectfont} \subsectionfont{\fontfamily{phv}\fontsize{18}{22}\selectfont} \subsubsectionfont{\fontfamily{phv}\fontsize{18}{22}\selectfont} %\setkeys{Gin}{width=0.85\textwidth} { \Huge \begin{center} \textbf{Pathway annotation case study \\ intro insert, March 2008 bioconductor course \\ \copyright 2008, VJ Carey Ph D} \end{center} \begin{itemize} \item "Gene sets" are popular tools for analysis \item rapid survey of a large family of gene sets is facilitated by programming \item conversion of moderately conventional annotation for genes/gene sets to operators on Bioconductor data structures is illustrated \ei } \clearpage {\sts{Is my gene in any pathways?} \bi \item case of TBX21 \item KEGG -- nothing \item NCBI -- nothing \item What about the Broad GSEA-related gene sets? \item Bioconductor package GSEABase helps navigate these \ei } \clearpage {\stsL{broadsets.rda} <
>= library(GSEABase) if (!exists("broadsets")) load("broadsets.rda") broadsets class(broadsets) getClass(class(broadsets)) @ } \clearpage {\stsL{information on a set} <>= broadsets[[1]] details(broadsets[[1]]) @ } \clearpage {\stsL{GeneSetCollection operations} \bi \item a GeneSetCollection instance is an R list of GeneSets \item iteration over list elements is relatively easy in R \item need to know how to operate usefully on a GeneSet \item poked at one above with `details' method \item another method of interest: geneIds <>= geneIds(broadsets[[1]]) allids = lapply(broadsets, geneIds) tbxchk = sapply(allids, function(x) any(x == "TBX21")) sum(tbxchk) hastbx = which(tbxchk) sapply(broadsets[hastbx], setName) @ \ei } \clearpage {\stsL{More info} \bi \item we have identified 12 'gene sets' that include TBX21 \item what are they? we can see that one is just a cytoband \ei <>= broadsets[["V$LYF1_01"]] @ } \clearpage {\stsL{More info} <>= details(broadsets[["V$LYF1_01"]]) longDescription(broadsets[["V$LYF1_01"]]) @ \bi \item This longDescription result is not very useful ... it is a long string of HTML. If we write it to a file, we can run a browser. Use writeLines \ei } \clearpage \setkeys{Gin}{width=1.15\textwidth} \includegraphics{lyf1} \clearpage \includegraphics{lyf1b} \clearpage {\sts{Upshots} \bi \item A collection of over 3000 sets of genes is bound to a single R variable name (broadsets) \item Each set is self-documenting and includes a list of HUGO identifiers (as given by Broad) \item methods \verb+geneIds+, \verb+details+, \verb+longDescription+ provide uniform information on each set \item programming expertise useful \bi \item general string matching (\texttt{x == 'TBX21'}) or pattern matching (grep, caseconversion etc) available directly to constituents \item shortcuts \verb+broadsets[["V$LYF1_01"]]+ \ei \item exploit sets and their structures for thorough statistical analysis \ei } \clearpage {\stsL{Application -- note number of features retained} <>= library(Biobase) library(ALL) data(ALL) keep = broadsets[["V$LYF1_01"]] geneIdType(keep) = AnnotationIdentifier(annotation(ALL)) ALL[ geneIds(keep), ] @ } \end{document}