htmlwidgets for R

交互式显示

plot有很多图可以利用。
Author

胡林辉

Published

October 22, 2023

官网

https://www.htmlwidgets.org/

显示交互式图

library(dygraphs)
dygraph(fdeaths, "Female Deaths")
dygraph(mdeaths, "Male Deaths")
dygraph(ldeaths, "All Deaths")

显示表的数据

library(DT)
datatable(iris, options = list(pageLength = 5))

显示有颜色的表格

df <- data.frame(
  id = 1:10,
  name = c("Bob", "Ashley", "James", "David", "Jenny",
    "Hans", "Leo", "John", "Emily", "Lee"),
  age = c(28, 27, 30, 28, 29, 29, 27, 27, 31, 30),
  grade = c("C", "A", "A", "C", "B", "B", "B", "A", "C", "C"),
  test1_score = c(8.9, 9.5, 9.6, 8.9, 9.1, 9.3, 9.3, 9.9, 8.5, 8.6),
  test2_score = c(9.1, 9.1, 9.2, 9.1, 8.9, 8.5, 9.2, 9.3, 9.1, 8.8),
  final_score = c(9, 9.3, 9.4, 9, 9, 8.9, 9.25, 9.6, 8.8, 8.7),
  registered = c(TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE),
  stringsAsFactors = FALSE)


library(formattable)

formattable(df, list(
  age = color_tile("white", "orange"),
  grade = formatter("span", style = x ~ ifelse(x == "A",
    style(color = "green", font.weight = "bold"), NA)),
  area(col = c(test1_score, test2_score)) ~ normalize_bar("pink", 0.2),
  final_score = formatter("span",
    style = x ~ style(color = ifelse(rank(-x) <= 3, "green", "gray")),
    x ~ sprintf("%.2f (rank: %02d)", x, rank(-x))),
  registered = formatter("span",
    style = x ~ style(color = ifelse(x, "green", "red")),
    x ~ icontext(ifelse(x, "ok", "remove"), ifelse(x, "Yes", "No")))
))
id name age grade test1_score test2_score final_score registered
1 Bob 28 C 8.9 9.1 9.00 (rank: 06) Yes
2 Ashley 27 A 9.5 9.1 9.30 (rank: 03) No
3 James 30 A 9.6 9.2 9.40 (rank: 02) Yes
4 David 28 C 8.9 9.1 9.00 (rank: 06) No
5 Jenny 29 B 9.1 8.9 9.00 (rank: 06) Yes
6 Hans 29 B 9.3 8.5 8.90 (rank: 08) Yes
7 Leo 27 B 9.3 9.2 9.25 (rank: 04) Yes
8 John 27 A 9.9 9.3 9.60 (rank: 01) No
9 Emily 31 C 8.5 9.1 8.80 (rank: 09) No
10 Lee 30 C 8.6 8.8 8.70 (rank: 10) No

显示流程图

library(DiagrammeR)
grViz("
  digraph {
    layout = twopi
    node [shape = circle]
    A -> {B C D} 
  }")

plot系列

Preliminary plots

library(plotly)
library(tidymodels)
set.seed(0)
X <- matrix(rnorm(10000),nrow=500)
y <- sample(0:1, 500, replace=TRUE)
data <- data.frame(X,y)
data$y <- as.factor(data$y)
X <- subset(data,select = -c(y))
logistic_glm <-
  logistic_reg() %>%
  set_engine("glm") %>%
  set_mode("classification") %>%
  fit(y ~ ., data = data)

y_scores <- logistic_glm %>%
  predict(X, type = 'prob')

y_score <- y_scores$.pred_1
db <- data.frame(data$y, y_score)

z <- roc_curve(data = db, 'data.y', 'y_score')
z$specificity <- 1 - z$specificity
colnames(z) <- c('threshold', 'tpr', 'fpr')

fig1 <- plot_ly(x= y_score, color = data$y, colors = c('blue', 'red'), type = 'histogram', alpha = 0.5, nbinsx = 50) %>%
  layout(barmode = "overlay")
fig1
fig2 <- plot_ly(data = z, x = ~threshold) %>%
  add_trace(y = ~fpr, mode = 'lines', name = 'False Positive Rate', type = 'scatter')%>%
  add_trace(y = ~tpr, mode = 'lines', name = 'True Positive Rate', type = 'scatter')%>%
  layout(title = 'TPR and FPR at every threshold')
fig2 <- fig2 %>% layout(legend=list(title=list(text='<b> Rate </b>')))
fig2

Basic binary ROC curve

library(dplyr)
library(ggplot2)
library(plotly)
library(pROC)

set.seed(0)
X <- matrix(rnorm(10000),nrow=500)
y <- sample(0:1, 500, replace=TRUE)
db <- data.frame(X,y)
db$y <- as.factor(db$y)
test_data = db[1:20]

model<- logistic_reg() %>%
  set_engine("glm") %>%
  set_mode("classification") %>%
  # Fit the model
  fit(y ~., data = db)


ypred <- predict(model,
                  new_data = test_data,
                  type = "prob")

yscore <- data.frame(ypred$.pred_0)
rdb <- cbind(db$y,yscore)
colnames(rdb) = c('y','yscore')


pdb <- roc_curve(rdb, y, yscore)
pdb$specificity <- 1 - pdb$specificity
auc = roc_auc(rdb, y, yscore)
auc = auc$.estimate

tit = paste('ROC Curve (AUC = ',toString(round(auc,2)),')',sep = '')


fig <-  plot_ly(data = pdb ,x =  ~specificity, y = ~sensitivity, type = 'scatter', mode = 'lines', fill = 'tozeroy') %>%
  layout(title = tit,xaxis = list(title = "False Positive Rate"), yaxis = list(title = "True Positive Rate")) %>%
add_segments(x = 0, xend = 1, y = 0, yend = 1, line = list(dash = "dash", color = 'black'),inherit = FALSE, showlegend = FALSE)
fig

Multiclass ROC Curve

library(plotly)
library(tidymodels)
library(fastDummies)

# Artificially add noise to make task harder
data(iris)
ind <- sample.int(150, 50)
samples <- sample(x = iris$Species, size = 50)
iris[ind,'Species'] = samples

# Define the inputs and outputs
X <- subset(iris, select = -c(Species))
iris$Species <- as.factor(iris$Species)

# Fit the model
logistic <-
  multinom_reg() %>%
  set_engine("nnet") %>%
  set_mode("classification") %>%
  fit(Species ~ ., data = iris)

y_scores <- logistic %>%
  predict(X, type = 'prob')

# One hot encode the labels in order to plot them
y_onehot <- dummy_cols(iris$Species)
colnames(y_onehot) <- c('drop', 'setosa', 'versicolor', 'virginica')
y_onehot <- subset(y_onehot, select = -c(drop))

z = cbind(y_scores, y_onehot)

z$setosa <- as.factor(z$setosa)
roc_setosa <- roc_curve(data = z, setosa, .pred_setosa)
roc_setosa$specificity <- 1 - roc_setosa$specificity
colnames(roc_setosa) <- c('threshold', 'tpr', 'fpr')
auc_setosa <- roc_auc(data = z, setosa, .pred_setosa)
auc_setosa <- auc_setosa$.estimate
setosa <- paste('setosa (AUC=',toString(round(1-auc_setosa,2)),')',sep = '')

z$versicolor <- as.factor(z$versicolor)
roc_versicolor <- roc_curve(data = z, versicolor, .pred_versicolor)
roc_versicolor$specificity <- 1 - roc_versicolor$specificity
colnames(roc_versicolor) <- c('threshold', 'tpr', 'fpr')
auc_versicolor <- roc_auc(data = z, versicolor, .pred_versicolor)
auc_versicolor <- auc_versicolor$.estimate
versicolor <- paste('versicolor (AUC=',toString(round(1-auc_versicolor,2)),')', sep = '')

z$virginica <- as.factor(z$virginica)
roc_virginica <- roc_curve(data = z, virginica, .pred_virginica)
roc_virginica$specificity <- 1 - roc_virginica$specificity
colnames(roc_virginica) <- c('threshold', 'tpr', 'fpr')
auc_virginica <- roc_auc(data = z, virginica, .pred_virginica)
auc_virginica <- auc_virginica$.estimate
virginica <- paste('virginica (AUC=',toString(round(1-auc_virginica,2)),')',sep = '')

# Create an empty figure, and iteratively add a line for each class
fig <- plot_ly()%>%
  add_segments(x = 0, xend = 1, y = 0, yend = 1, line = list(dash = "dash", color = 'black'), showlegend = FALSE) %>%
  add_trace(data = roc_setosa,x = ~fpr, y = ~tpr, mode = 'lines', name = setosa, type = 'scatter')%>%
  add_trace(data = roc_versicolor,x = ~fpr, y = ~tpr, mode = 'lines', name = versicolor, type = 'scatter')%>%
  add_trace(data = roc_virginica,x = ~fpr, y = ~tpr, mode = 'lines', name = virginica, type = 'scatter')%>%
  layout(xaxis = list(
    title = "False Positive Rate"
  ), yaxis = list(
    title = "True Positive Rate"
  ),legend = list(x = 100, y = 0.5))
fig

Precision-Recall Curves

library(dplyr)
library(ggplot2)
library(plotly)
library(pROC)

set.seed(0)
X <- matrix(rnorm(10000),nrow=500)
y <- sample(0:1, 500, replace=TRUE)
db <- data.frame(X,y)
db$y <- as.factor(db$y)
test_data = db[1:20]

model<- logistic_reg() %>%
  set_engine("glm") %>%
  set_mode("classification") %>%
  # Fit the model
  fit(y ~., data = db)

ypred <- predict(model,
                 new_data = test_data,
                 type = "prob")

yscore <- data.frame(ypred$.pred_0)
rdb <- cbind(db$y,yscore)
colnames(rdb) = c('y','yscore')

pdb <- pr_curve(rdb, y, yscore)
auc = roc_auc(rdb, y, yscore)
auc = auc$.estimate

tit = paste('ROC Curve (AUC = ',toString(round(auc,2)),')',sep = '')


fig <-  plot_ly(data = pdb ,x =  ~recall, y = ~precision, type = 'scatter', mode = 'lines', fill = 'tozeroy') %>%
  add_segments(x = 0, xend = 1, y = 1, yend = 0, line = list(dash = "dash", color = 'black'),inherit = FALSE, showlegend = FALSE) %>%
  layout(title = tit, xaxis = list(title = "Recall"), yaxis = list(title = "Precision") )

fig