% % NOTE -- ONLY EDIT THE .Rnw FILE!!! The .tex file is % likely to be overwritten. % \documentclass[landscape]{article} \usepackage{amsmath,pstricks} \usepackage[authoryear,round]{natbib} \usepackage{hyperref} \usepackage{sectsty} \usepackage{landscape} \usepackage{graphics} \textwidth=10.9in \textheight=6.5in %\parskip=.3cm \oddsidemargin=.0in \evensidemargin=.0in \headheight=-.3in \newcommand{\scscst}{\scriptscriptstyle} \newcommand{\scst}{\scriptstyle} \newcommand{\dimfo}{\fontsize{14}{16}\selectfont} \newcommand{\expfo}{\fontsize{18}{22}\selectfont} \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rpackage}[1]{{\textit{#1}}} \newcommand{\Rmethod}[1]{{\texttt{#1}}} \newcommand{\Rfunarg}[1]{{\texttt{#1}}} \newcommand{\Rclass}[1]{{\textit{#1}}} \newcommand\bi{\begin{itemize}} \newcommand\ei{\end{itemize}} \textwidth=8.2in \bibliographystyle{plainnat} \renewcommand{\familydefault}{\sfdefault} \usepackage[T1]{fontenc} \newcommand\cp{{\clearpage}} \newcommand\sts[1]{\Huge \textbf{#1}} \newcommand\stsh[1]{\huge \textbf{#1}} \newcommand\stsL[1]{\Large \textbf{#1}} \newcommand\stsl[1]{\large \textbf{#1}} \begin{document} \fontsize{18}{22} \selectfont \allsectionsfont{\sffamily} \sectionfont{\fontfamily{phv}\fontsize{18}{22}\selectfont} \subsectionfont{\fontfamily{phv}\fontsize{18}{22}\selectfont} \subsubsectionfont{\fontfamily{phv}\fontsize{18}{22}\selectfont} %\setkeys{Gin}{width=0.85\textwidth} { \Huge \begin{center} \textbf{Lecture 4: annotation in bioconductor \\ \copyright 2008 VJ Carey PhD \\ Channing Lab} \end{center} \begin{itemize} \item commitments \item platform annotations -- SQLite basis \item organism annotation -- org.Hs.eg.db \item web services: biomaRt \ei } \clearpage {\sts{Annotation concept review} \bi \item platforms: feature annotation \item experiments: MIAME annotation \item samples: phenotype, disease, protocol \item genomes, biological processes: ontologies for sequence, gene products, etc. \item resources: networks of databases and tables \ei } \clearpage {\sts{Bioconductor commitments} \bi \item identifier annotation maps for prevalent platforms are created every three months \item open tools for annotating custom platforms are provided; advice given \item large-scale annotation sets for important organisms are also provided \bi \item org.Hs.eg.db \ei \item harder problem: feature maps (e.g., CDF files for affy) for chips depend on manufacturer openness, tractability -- see pd.mapping packages \ei } \clearpage {\stsL{Platforms: Classic examples} <>= library(hgfocus.db) objects("package:hgfocus.db") library(annotate) nn = ls(hgfocusSYMBOL)[1:3] mget( nn, hgfocusSYMBOL ) @ } \clearpage {\stsL{Platforms: Classic examples} <>= library(hgfocuscdf) hgfocuscdf pid = ls(hgfocuscdf)[1:3] lapply( mget(pid, hgfocuscdf), "[", 1:3, 1:2 ) library(affy) indices2xy(62627, cdf="hgfocuscdf") @ } \clearpage {\stsL{Platforms: Classic examples} <>= library(hgfocusprobe) hgfocusprobe as.data.frame(hgfocusprobe)[1:3,] @ } \clearpage {\stsL{Platforms: new approach (oligo)} <>= library(pd.mapping50k.xba240) pd.mapping50k.xba240 @ } \clearpage {\stsL{pd.mapping approach} <>= xb = pd.mapping50k.xba240@getdb() dbListTables(xb) dbGetQuery(xb, "select * from featureSet limit 200,3")[,-11] @ } \clearpage {\stsL{illumina lumi twist} <>= library(lumiHumanV2) library(lumi) kk = ls(lumiHumanV2REFSEQ)[1050:1054] kk nuID2targetID(kk, lib="lumiHumanV2") id2seq(kk) @ } \clearpage {\stsL{illumina expression arrays: SQLite approach} <>= library(illuminaHumanv2.db) ilco = illuminaHumanv2_dbconn() dbListTables(ilco) @ } \clearpage {\stsL{creating a hyperlinked page of annotation resolutions} <>= library(annaffy) melaFrag = c("EGF", "EGFR", "HRAS", "ARAF", "PIK3R5", "MAP2K1", "NRAS", "MAPK1", "BRAF") library(GSEABase) gs = GeneSet(melaFrag, geneIdType=SymbolIdentifier()) geneIdType(gs) = AnnotationIdentifier("illuminaHumanv2.db") imf = geneIds(gs) tab = aafTableAnn(imf, "illuminaHumanv2.db") saveHTML(tab, file="mftab.html") @ } \clearpage {\sts{organism-level annotation} \setkeys{Gin}{width=0.95\textwidth} \includegraphics{orgShot} } \clearpage {\stsL{queries} <>= library(org.Hs.eg.db) oc = org.Hs.eg_dbconn() dbListTables(oc) dbGetQuery(oc, "select * from gene_info limit 199, 5") dbGetQuery(oc, "select * from gene_info INNER JOIN go_MF using(_id) where gene_info.symbol = 'TBX21' ") @ } \clearpage {\stsL{pathway and gene ontology resources} <>= library(KEGG.db) objects("package:KEGG.db") library(GO.db) objects("package:GO.db") @ } \clearpage {\stsL{web services: biomaRt} <>= library(biomaRt) allm = listMarts() allm @ } \clearpage {\stsL{mart examples} <>= mm = useMart("snp") listDatasets(mm)[1:2,] mm = useMart("snp", dataset="hsapiens_snp") listFilters(mm)[1:3,] args(getBM) @ } \clearpage {\stsh{Summary} \bi \item packages exist for platform, organism, pathway/ontology annotation maps \item SQLite database tables are fundamental entities \item annotate package lookUp function resolves vectors of identifiers \item AnnotationDbi get/mget; revmap facilities illustrated previously \item SQLite annotation packages used by annaffy -- aafTableAnn -- regardless of manufacturer \item sqlForge tools for creating .db packages in AnnotationDbi \ei } \end{document}