r - Why heatmap.2 add unwanted replicate columns? -


given data, (complete 1 can found here: http://pastebin.com/raw.php?i=6ntcnlj7):

probes  gene.symbol immgen  foo_yj_06.ip    foo_mi_06.ip    foo_nl_06.id    foo_yj_06.id    foo_mi_06.id    bar_nn_06.ip    bar_pr_06.ip    bar_yj_06.ip    bar_mi_06.ip    bar_nl_06.id    bar_yj_06.id    bar_mi_06.id    bar_nn_24.ip    bar_pr_24.ip    bar_yj_24.ip    bar_mi_24.ip    bar_nn_06.ip    bar_nn_24.ip    bar_pr_06.ip    bar_pr_24.ip    bar_yj_06.ip    bar_yj_24.ip    bar_mi_06.ip    bar_mi_24.ip    bar_nl_06.id    bar_yj_06.id    bar_mi_06.id    txt_nl_06.id    txt_yj_06.ip    txt_mi_06.ip    txt_yj_06.id    txt_mi_06.id    xxx_yj_06.ip    xxx_mi_06.ip    xxx_nl_06.id    xxx_yj_06.id    xxx_mi_06.id    kth_nl_06.id    kth_yj_06.ip    kth_mi_06.ip    k3_yj_06.id k3_mi_06.id uuu_yj_06.in    uuu_mi_06.in    dar_nl_06.id    dar_yj_06.id    dar_mi_06.id 1425352_at  rcor3   stromalcells(12.99),dendriticcells(12.18),stemcells(11.43),nkcells(10.50),macrophages(10.11),abtcells(9.11),neutrophils(8.72),monocytes(8.63),bcells(8.61),gdtcells(7.71)   1.162   0.795   0.695   0.701   1.085   1.052   1.544   0.75    1.305   1.213   1.142   0.814   0.79    0.89    1.691   1.013   1.052   0.79    1.544   0.89    0.75    1.691   1.305   1.013   1.213   1.142   0.814   1.556   0.744   1.22    1.239   1.164   0.827   1.203   0.778   0.929   0.95    0   0.877   0.906   1.294   0.904   0   1.2 0.927   0.704   1.181 1417466_at  rgs5    stromalcells(72.03),neutrophils(3.39),dendriticcells(3.31),nkcells(3.28),monocytes(3.25),macrophages(3.15),gdtcells(3.01),abtcells(2.99),bcells(2.80),stemcells(2.80)   1.149   0.904   1.225   0.821   1.075   0.947   0.969   1.262   0.868   1.013   0.984   0.938   0.925   1.11    1.36    1.014   0.947   0.925   0.969   1.11    1.262   1.36    0.868   1.014   1.013   0.984   0.938   0.877   0.887   1.035   1.226   0.979   1.142   1.126   0.933   0.854   1.033   0.911   1.255   1.038   1.125   1.086   1.18    0.958   1.115   1.017   1.061 

i obtain heatmap, shown tail. note added unwanted replicated columns (marked red box).

for example bar_yj_06.ip appear once in data above. in plot appear twice bar_yj_06.ip , bar_yj_06.ip.1

why that? hand how can remove them?

enter image description here

this code use generate above figure:

#!/usr/bin/env rscript  library(gplots); library(rcolorbrewer);   plot_hclust  <- function(inputfile,clust.height,type.order=c(),row.margins=30) {      dat.bcd <- read.table(inputfile,na.strings=na, sep="\t",header=true);     base <- substr(basename(inputfile), 1, nchar(basename(inputfile)) - 4 )     rownames(dat.bcd) <- do.call(paste,c(dat.bcd[c("probes","gene.symbol","immgen")],sep=" "))     dat.bcd <- dat.bcd[,!names(dat.bcd) %in% c("probes","gene.symbol","immgen")]      dat.bcd <- dat.bcd      # clustering , distance function     hclustfunc <- function(x) hclust(x, method="complete")     distfunc <- function(x) dist(x,method="maximum")       # select based on fc, long of them >= anylim      anylim <- 2.0     dat.bcd <- dat.bcd[ apply(dat.bcd, 1,function(x) (x >= anylim)), ]     nrow(dat.bcd);      #print(heatout):       # clustering functions     height <- clust.height;       # define output file name     heatout <- paste("myheatmap.pdf",sep="");  print(heatout)      # compute distance , clusteirn function     d.bcd <- distfunc(dat.bcd)     fit.bcd <- hclustfunc(d.bcd)       # plot hierarchical dendogram without heatmap     # cluster height     #cutree , rect.huclust has used in tandem     clusters <- cutree(fit.bcd, h=height)      nofclust.height <-  length(unique(as.vector(clusters)));      myorder <- colnames(dat.bcd);      if (length(type.order)>0) {      myorder <- type.order     }      # define colors     #hmcols <- rev(brewer.pal(11,"spectral"));     hmcols <- rev(redgreen(2750));     selcol <- colorramppalette(brewer.pal(12,"set3"))     selcol2 <- colorramppalette(brewer.pal(9,"set1"))     sdcol= selcol(5);     clustcol.height = selcol2(nofclust.height);       # plot heatmap   pdf(file=heatout,width=50,height=80);       par(xaxs="i");    # bi-clustering     heatmap.2(as.matrix(dat.bcd), trace='none', dendrogram='both',colv=t, scale='row',             hclust=hclustfunc, distfun=distfunc, col=hmcols,            symbreak=t,            margins=c(15,200), keysize=0.5,            labrow=rownames(dat.bcd),            lwid=c(2,0.1,4), lhei=c(0.05,3),            lmat=rbind(c(5,0,4),c(3,1,2)),            rowsidecolors=clustcol.height[clusters])     dev.off();  }  # plotting  plot_hclust("http://pastebin.com/raw.php?i=6ntcnlj7",clust.height=3); 

this not heatmap.2 issue. of duplicated samples appear in source data frame. should review workflow , fix step @ duplicates got introduced data.

alternative ad hoc solution remove duplicated columns data frame before plotting heatmap:

data <- read.table(file='http://pastebin.com/raw.php?i=6ntcnlj7', header=t)  # obtain logical vector (true/false), true == duplicated elements ind <- duplicated(t(data))  # retain unique columns # ! == inverts logical vector, true == unique elements subset <- data[,!ind] 

Comments

Popular posts from this blog

Android layout hidden on keyboard show -

google app engine - 403 Forbidden POST - Flask WTForms -

c - Why would PK11_GenerateRandom() return an error -8023? -