代码拉取完成,页面将自动刷新
#!/bin/bash
#Only for debugging
#set -x
usage()
{
cat <<EOF
${txtcyn}
***CREATED BY Chen Tong (chentong_biology@163.com)***
Usage:
$0 options${txtrst}
${bldblu}Function${txtrst}:
This program is designed to test if two boxes have statistically
significant differences.
Currently multiple groups among one set are supported.
fileformat for -f (suitable for data extracted from one sample, the
number of columns is unlimited. Column 'Set' is not necessary unless
you have multiple groups)
Gene hmC expr Set
NM_001003918_26622 0 83.1269257376101 TP16
NM_001011535_3260 0 0 TP16
NM_001012640_14264 0 0 TP16
NM_001012640_30427 0 0 TP16
NM_001003918_2662217393_30486 0 0 TP16
NM_001017393_30504 0 0 TP16
NM_001025241_30464 0 0 TP16
NM_001017393_30504001025241_30513 0 0 TP16
For file using "Set" column, you can use
$0 -f file -a Set
fileformat when -m is true
#The name "value" and "variable" shoud not be altered.
#The "Set" column is optional.
#If you do have several groups, they can put at the "Set" column
#with "Set" or other string as labels. The label should be given
#to parameter -a.
#Actually this format is the melted result of last format.
value variable Set
0 hmC g
1 expr g
2 hmC a
3 expr a
${txtbld}OPTIONS${txtrst}:
-f Data file (with header line, the first column is the
colname, tab seperated)${bldred}[NECESSARY]${txtrst}
-m When true, it will skip preprocess. But the format must be
the same as listed before.
${bldred}[Default FALSE, accept TRUE]${txtrst}
-a Name for x-axis variable
[${txtred}Default variable, which is an inner name, suitable
for data without 'Set' column. For the given example,
'Set' which represents groups of each gene, and should be
supplied to this parameter.
${txtrst}]
-s The statistical method you want to use.${bldred}[Default
shapiro.test, accept lillieTest, ***]${txtrst}
-D Self-define intervals for legend variable when legend is
continuous numbers. Accept either a
numeric vector of two or more cut points or a single number
(greater than or equal to 2) giving the number of intervals
into what 'x' is to be cut. This has higher priority than -l.
[10 will generate 10 intervals or
"c(-1, 0, 1, 2, 5, 10)" will generate (-1,0],(0,1]...(5,10]]
-B Self-define intervals for x-axis variable. Accept either a
numeric vector of two or more cut points or a single number
(greater than or equal to 2) giving the number of intervals
into what 'x' is to be cut.
[10 will generate 10 intervals or
"c(-1, 0, 1, 2, 5, 10)" will generate (-1,0],(0,1]...(5,10]]
-e Execute or not[${bldred}Default TRUE${txtrst}]
-i Install depeneded packages[${bldred}Default FALSE${txtrst}]
EOF
}
file=
melted='FALSE'
xvariable='variable'
level=""
legend_cut=""
x_level=""
x_cut=""
header='TRUE'
execute='TRUE'
ist='FALSE'
method='shapiro.test'
while getopts "ha:B:D:e:f:i:m:s:" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
f)
file=$OPTARG
;;
m)
melted=$OPTARG
;;
a)
xvariable=$OPTARG
;;
s)
method=$OPTARG
;;
B)
x_cut=$OPTARG
;;
D)
legend_cut=$OPTARG
;;
e)
execute=$OPTARG
;;
i)
ist=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
if [ -z $file ]; then
usage
exit 1
fi
mid=".boxplot.${method}"
cat <<END >${file}${mid}.r
if ($ist){
install.packages("fBasics", repo="http://cran.us.r-project.org")
}
if ("${method}" == "lillieTest"){
library('fBasics')
}
if(! $melted){
data <- read.table(file="${file}", sep="\t", header=$header,
row.names=1, quote="")
if ("$xvariable" != "variable"){
data_m <- melt(data, id.vars=c("${xvariable}"))
} else {
data_m <- melt(data)
}
} else {
data_m <- read.table(file="$file", sep="\t",
header=$header)
}
if ("${legend_cut}" != ""){
data_m\$variable <- cut(data_m\$variable, ${legend_cut})
}
if ("${x_cut}" != ""){
data_m\$${xvariable} <- cut(data_m\$${xvariable},${x_cut})
}
if ("$xvariable" == "variable"){
#No Group information
variableL <- unique(data_m\$variable)
len_var <- length(variableL)
for(i in 1:len_var){
var1 <- variableL[i]
new_data <- data_m[data_m\$variable == var1,]\$value
if ("${method}" == "lillieTest"){
if (length(new_data) > 4) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for lillieTest:", var1, "###"))
}
} else if ("${method}" == "shapiro.test") {
if (length(new_data) > 2) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for shapiro.test:",
var1, "###"))
}
} else {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
}
}
} else {
#Compute several groups
group <- names(summary(data_m\$${xvariable}))
for (i in group){
tmp <- data_m[data_m\$${xvariable}==i,]
print(paste("*** Compute for Group ", i, " ***"))
variableL <- unique(tmp\$variable)
len_var <- length(variableL)
for(i in 1:len_var){
var1 <- variableL[i]
new_data <- tmp[tmp\$variable == var1,]\$value
if ("${method}" == "lillieTest"){
if (length(new_data) > 4) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for lillieTest:",
var1, "###"))
}
} else if ("${method}" == "shapiro.test") {
if (length(new_data) > 2) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for shapiro.test:",
var1, "###"))
}
}else {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
}
}
}
}
END
if [ "$execute" == "TRUE" ]; then
Rscript ${file}${mid}.r
if [ "$?" == "0" ]; then /bin/rm -f ${file}${mid}.r; fi
fi
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。