Last updated on Sun Feb 11 22:55:10 2024.
Structures of protein-DNA complexes by experimental method
All figures are interactive (you can zoom in, and hovering over
elements will show more information).
# Load required packages
library(magrittr)
library(jsonlite)
library(dplyr)
library(forcats)
library(stringr)
library(ggplot2)
library(plotly)
library(here)
# Query the PDB for all DNA molecules in crystal structures of protein-DNA
# complexes
pdb_query <- 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=molecule_type:%22DNA%22%20AND%20assembly_composition:%22DNA/protein%20complex%22&fl=pdb_id,molecule_sequence,experimental_method&rows=1000000&wt=json'
pdb_data <- pdb_query %>%
fromJSON() %>%
.$response %>%
.$docs %>%
as_tibble() %>%
distinct(pdb_id, .keep_all = TRUE) %>%
filter(experimental_method %in% c("X-ray diffraction",
"Electron Microscopy",
"Solution NMR")) %>%
mutate(dna_length = str_length(molecule_sequence),
experimental_method = as_factor(as.character(experimental_method)))
# Summary statistics
protein_dna_cplx_structures <- ggplot(data = pdb_data) +
geom_bar(mapping = aes(x = experimental_method)) +
theme_bw() +
xlab("") +
ylab("Number of PDB entries") +
ggtitle("Structures of protein-DNA complexes")
ggplotly(protein_dna_cplx_structures)
Download
figure in SVG format
DNA length in crystal structures of protein-DNA complexes
Entire distribution
dna_length_xtal <- pdb_data %>%
filter(experimental_method == "X-ray diffraction") %>%
ggplot() +
geom_histogram(mapping = aes(x = dna_length), binwidth = 1) +
theme_bw() +
ggtitle("DNA length in crystal structures of protein-DNA complexes") +
xlab("DNA length (bp)") +
ylab("Number of crystal structures")
ggplotly(dna_length_xtal)
Download
figure in SVG format
0-150 bp range
dna_length_xtal_150 <- pdb_data %>%
filter(experimental_method == "X-ray diffraction" & dna_length < 151) %>%
ggplot() +
geom_histogram(mapping = aes(x = dna_length), binwidth = 1) +
theme_bw() +
ggtitle("DNA length in crystal structures of protein-DNA complexes (0-150 bp)") +
xlab("DNA length (bp)") +
ylab("Number of crystal structures")
ggplotly(dna_length_xtal_150)
Download
figure in SVG format
DNA length in cryo-EM structures of protein-DNA complexes
dna_length_cryoem <- pdb_data %>%
filter(experimental_method == "Electron Microscopy") %>%
ggplot() +
geom_histogram(mapping = aes(x = dna_length), binwidth = 1) +
theme_bw() +
ggtitle("DNA length in cryo-EM structures of protein-DNA complexes") +
xlab("DNA length (bp)") +
ylab("Number of cryo-EM structures")
ggplotly(dna_length_cryoem)
Download
figure in SVG format
DNA length in NMR structures of protein-DNA complexes
dna_length_nmr <- pdb_data %>%
filter(experimental_method == "Solution NMR") %>%
ggplot() +
geom_histogram(mapping = aes(x = dna_length), binwidth = 1) +
theme_bw() +
ggtitle("DNA length in NMR structures of protein-DNA complexes") +
xlab("DNA length (bp)") +
ylab("Number of NMR structures")
ggplotly(dna_length_nmr)
Download
figure in SVG format
Dataset
The histograms presented above are derived from the following
dataset:
# Format table for display
pdb_table <- pdb_data %>%
arrange(desc(dna_length)) %>%
select(`PDB code` = pdb_id,
`DNA length` = dna_length)
pdb_table
Download
raw dataset in JSON format
