Bio3D in R Utilities for the analysis of protein structure and sequence data

http://users.mccammon.ucsd.edu/~bgrant/bio3d/user_guide/user_guide.html#example

Some Beginner Examples

 

library(bio3d)     # load the bio3d package

lbio3d()              # list the functions within the package

 

 

## See the help pages of individual functions for full documentation and worked examples.

help(read.pdb)   #  type “q” to exit help page and return to the R prompt

example(read.pdb)

 

## Read a PDB file with the four letter code “1BG2″ from the RCSB PDB database

pdb <- read.pdb(“1bg2″)

 

## Extract SEQRES PDB sequence

s1 <- aa321(pdb$seqres)

 

## Extract actual ATOM PDB sequence for which we have coordinates

s2 <- seq.pdb(pdb)

 

## Align these two sequences to determine where they differ

aln <- seqaln( seqbind(s1, s2), id=c(“seqres”,”atom”) )

aln    # print resulting alignment object to screen

 

## Write a FASTA format sequence file for viewing in programmes such as seaview

write.fasta(aln, file=”eg.fa”)

 

 


 

Basic sequence analysis

 

##– Read an example FASTA alignment file that ships with the bio3 package

aln.file <- system.file(“examples/kinesin_xray.fa”,package=”bio3d”)

aln <- read.fasta( aln.file )             # Put your filename here or generate your own with seqaln()

 

## Entropy conservation score for alignment positions

h <- entropy(aln)    # see also the “conserv()” function for additional scoring options

 

# Alignment consensus sequence at the 60 percent residue identity level

con <- consensus(aln, cutoff=0.6)

 

## Plot of residue conservation (similarity) with secondary structure

plot.bio3d( conserv(aln)[!is.gap(aln$ali[1,])],  sse=sse )

 

## Render the alignment as colored HTML

aln2html(aln, append=FALSE, file=”eg.html”)

 

 


 

Basic structure analysis

 

## Basic distance matrix analysis

k <- dm(pdb, selection=”calpha”)

plot(k)

 

##– Torsion angle analysis and basic Ramachandran plot

tor <- torsion.pdb(pdb)

plot(tor$phi, tor$psi)

 

 

##– Secondary structure assignment with DSSP

sse <- dssp(pdb, resno=FALSE)      # see also the stride() function

 

 

## Plot of B-factor values along with secondary structure

plot.bio3d(pdb$atom[pdb$calpha,”b”], sse=sse, ylab=”B-factor”)

 

 


 

Finding and analyzing related structures

 

##– Search for related structures in the PDB database

pdb <- read.pdb(“4q21″)                 # Read an example PDB (you can also use your own file name here)

blast <- blast.pdb( seq.pdb(pdb) )   # Extract it’s sequence and BLAST against PDB database

head(blast$hit.tbl)                            # Examine some of the resulting hits

 

top.hits <- plot(blast)                      # Pick a sensible threshold of similarity for the best hits

head(top.hits$hits)                          # Have a look at their PDB id codes with additional chain ids

 

## Further steps here include downloading these hits with get.pdb(), their subsequent alignment with pdbaln() and principal component analysis analysis with pca.xyz() etc. See the package vignettes and protein kinase A (PKA) tutorial.

## Below we take a pre-computed alignment for demo purposes

 

##– Read an alignment of sequences and their corresponding structures

aln.file <- system.file(“examples/kinesin_xray.fa”,package=”bio3d”)

aln  <- read.fasta( aln.file )

pdbs <- read.fasta.pdb( aln, pdbext=”.ent”, pdb.path=system.file(“examples”,package=”bio3d”) )

 

##– DDM: Difference Distance Matrix

a <- dm(pdbs$xyz[2,])

b <- dm(pdbs$xyz[3,])

ddm <- a – b

plot(ddm,key=FALSE, grid=FALSE)

 

 

##– Superpose structures on non gap positions

gaps <- gap.inspect(pdbs$xyz)

xyz  <- fit.xyz( fixed  = pdbs$xyz[1,],

mobile = pdbs$xyz,

fixed.inds  = gaps$f.inds,

mobile.inds = gaps$f.inds )

 

 

##– RMSD

rmsd(pdbs$xyz[1,gaps$f.inds], pdbs$xyz[,gaps$f.inds])

rmsd(pdbs$xyz[1,gaps$f.inds], pdbs$xyz[,gaps$f.inds], fit=TRUE)

 

 

 

##– Rigid ‘core’ identification

aln <- read.fasta(system.file(“examples/kinesin_xray.fa”,package=”bio3d”))

pdb.path = system.file(“examples/”,package=”bio3d”)

pdbs <- read.fasta.pdb(aln, pdb.path = pdb.path, pdbext = “.ent”)

core <- core.find(pdbs)

 

## Plot core volume vs length

plot(core)

## Return the atom and xyz indices of a core with 0.5 A^3 volume

core.inds <- print(core, 0.5) #

 

## Core fit the structures (superpose on rigid zones)

xyz <- fit.xyz( fixed = pdbs$xyz[1,],

mobile = pdbs,

#  full.pdbs = TRUE,              # set full.pdbs=TRUE if you would like

#  pdb.path = pdb.path,        # to view the resulting superposed files

#  pdbext = “.ent”,                 # in, for example VMD with the vmd -m option

#  outpath = “fitlsq/”,

fixed.inds  = core$c0.5A.xyz,

mobile.inds = core$c0.5A.xyz)

 

 

##– PCA of PDB structures

cut.seqs <- which(pdbs$id %in% c(“d1n6mb_”,”d1ry6a_”))

 

# Ignore gap containing positions

gaps.res <- gap.inspect(pdbs$ali[-cut.seqs,])

gaps.pos <- gap.inspect(pdbs$xyz[-cut.seqs,])

 

##– Do PCA

pc.xray <- pca.xyz(xyz[-cut.seqs, gaps.pos$f.inds])

 

## Plot results

plot(pc.xray)

 

x11()

plot.pca.loadings(pc.xray$U)

 

## Write PC trajectory

a <- mktrj.pca(pc.xray, pc=1, file=”pc1.pdb”,

resno = pdbs$resno[1, gaps.res$f.inds],

resid = aa123(pdbs$ali[1, gaps.res$f.inds]) )

 

 


 

Basic trajectory analysis

 

 

##– Read a CHARMM/X-PLOR/NAMD trajectory file

trtfile <- system.file(“examples/hivp.dcd”, package=”bio3d”)

trj <- read.dcd(trtfile)

 

## Read the starting PDB file to determine atom correspondence

pdbfile <- system.file(“examples/hivp.pdb”, package=”bio3d”)

pdb <- read.pdb(pdbfile)

 

## Fit trj on PDB based on residues 23 to 31 and 84 to 87 in both chains

inds <- atom.select(pdb, resno=c(23:31,84:87), elety=”CA”)

xyz <- fit.xyz(pdb$xyz, trj, fixed.inds=inds$xyz, mobile.inds=inds$xyz)

 

##– RMSD of trj frames from PDB

r <- rmsd(a=pdb, b=xyz)

 

##– PCA of MD trjectory

pc.trj <- pca.xyz(xyz)

plot.pca.loadings(pc.trj)

 

## Write PC trajectory for viewing as tube in VMD

a <- mktrj.pca(pc.trj, pc=1, file=”pc1_trj.pdb”)

 

 

## other examples include: sdm, alignment, clustering etc…

 

1 comment to Bio3D in R Utilities for the analysis of protein structure and sequence data

  • - 2012.3.20 4:57I always dolownad a full show in parts, that’s always present at YouTube, because my net connection is very slow and YouTube fulfils my wishes.

Leave a Reply

  

  

  

You can use these HTML tags

<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>