1 Star 0 Fork 0

陈同 / s-plot

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
sp_normTest.sh 5.73 KB
一键复制 编辑 原始数据 按行查看 历史
Admin 提交于 2017-05-25 14:32 . Batch update FOR
#!/bin/bash
#Only for debugging
#set -x
usage()
{
cat <<EOF
${txtcyn}
***CREATED BY Chen Tong (chentong_biology@163.com)***
Usage:
$0 options${txtrst}
${bldblu}Function${txtrst}:
This program is designed to test if two boxes have statistically
significant differences.
Currently multiple groups among one set are supported.
fileformat for -f (suitable for data extracted from one sample, the
number of columns is unlimited. Column 'Set' is not necessary unless
you have multiple groups)
Gene hmC expr Set
NM_001003918_26622 0 83.1269257376101 TP16
NM_001011535_3260 0 0 TP16
NM_001012640_14264 0 0 TP16
NM_001012640_30427 0 0 TP16
NM_001003918_2662217393_30486 0 0 TP16
NM_001017393_30504 0 0 TP16
NM_001025241_30464 0 0 TP16
NM_001017393_30504001025241_30513 0 0 TP16
For file using "Set" column, you can use
$0 -f file -a Set
fileformat when -m is true
#The name "value" and "variable" shoud not be altered.
#The "Set" column is optional.
#If you do have several groups, they can put at the "Set" column
#with "Set" or other string as labels. The label should be given
#to parameter -a.
#Actually this format is the melted result of last format.
value variable Set
0 hmC g
1 expr g
2 hmC a
3 expr a
${txtbld}OPTIONS${txtrst}:
-f Data file (with header line, the first column is the
colname, tab seperated)${bldred}[NECESSARY]${txtrst}
-m When true, it will skip preprocess. But the format must be
the same as listed before.
${bldred}[Default FALSE, accept TRUE]${txtrst}
-a Name for x-axis variable
[${txtred}Default variable, which is an inner name, suitable
for data without 'Set' column. For the given example,
'Set' which represents groups of each gene, and should be
supplied to this parameter.
${txtrst}]
-s The statistical method you want to use.${bldred}[Default
shapiro.test, accept lillieTest, ***]${txtrst}
-D Self-define intervals for legend variable when legend is
continuous numbers. Accept either a
numeric vector of two or more cut points or a single number
(greater than or equal to 2) giving the number of intervals
into what 'x' is to be cut. This has higher priority than -l.
[10 will generate 10 intervals or
"c(-1, 0, 1, 2, 5, 10)" will generate (-1,0],(0,1]...(5,10]]
-B Self-define intervals for x-axis variable. Accept either a
numeric vector of two or more cut points or a single number
(greater than or equal to 2) giving the number of intervals
into what 'x' is to be cut.
[10 will generate 10 intervals or
"c(-1, 0, 1, 2, 5, 10)" will generate (-1,0],(0,1]...(5,10]]
-e Execute or not[${bldred}Default TRUE${txtrst}]
-i Install depeneded packages[${bldred}Default FALSE${txtrst}]
EOF
}
file=
melted='FALSE'
xvariable='variable'
level=""
legend_cut=""
x_level=""
x_cut=""
header='TRUE'
execute='TRUE'
ist='FALSE'
method='shapiro.test'
while getopts "ha:B:D:e:f:i:m:s:" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
f)
file=$OPTARG
;;
m)
melted=$OPTARG
;;
a)
xvariable=$OPTARG
;;
s)
method=$OPTARG
;;
B)
x_cut=$OPTARG
;;
D)
legend_cut=$OPTARG
;;
e)
execute=$OPTARG
;;
i)
ist=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
if [ -z $file ]; then
usage
exit 1
fi
mid=".boxplot.${method}"
cat <<END >${file}${mid}.r
if ($ist){
install.packages("fBasics", repo="http://cran.us.r-project.org")
}
if ("${method}" == "lillieTest"){
library('fBasics')
}
if(! $melted){
data <- read.table(file="${file}", sep="\t", header=$header,
row.names=1, quote="")
if ("$xvariable" != "variable"){
data_m <- melt(data, id.vars=c("${xvariable}"))
} else {
data_m <- melt(data)
}
} else {
data_m <- read.table(file="$file", sep="\t",
header=$header)
}
if ("${legend_cut}" != ""){
data_m\$variable <- cut(data_m\$variable, ${legend_cut})
}
if ("${x_cut}" != ""){
data_m\$${xvariable} <- cut(data_m\$${xvariable},${x_cut})
}
if ("$xvariable" == "variable"){
#No Group information
variableL <- unique(data_m\$variable)
len_var <- length(variableL)
for(i in 1:len_var){
var1 <- variableL[i]
new_data <- data_m[data_m\$variable == var1,]\$value
if ("${method}" == "lillieTest"){
if (length(new_data) > 4) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for lillieTest:", var1, "###"))
}
} else if ("${method}" == "shapiro.test") {
if (length(new_data) > 2) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for shapiro.test:",
var1, "###"))
}
} else {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
}
}
} else {
#Compute several groups
group <- names(summary(data_m\$${xvariable}))
for (i in group){
tmp <- data_m[data_m\$${xvariable}==i,]
print(paste("*** Compute for Group ", i, " ***"))
variableL <- unique(tmp\$variable)
len_var <- length(variableL)
for(i in 1:len_var){
var1 <- variableL[i]
new_data <- tmp[tmp\$variable == var1,]\$value
if ("${method}" == "lillieTest"){
if (length(new_data) > 4) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for lillieTest:",
var1, "###"))
}
} else if ("${method}" == "shapiro.test") {
if (length(new_data) > 2) {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
} else {
print(paste("### No enough data for shapiro.test:",
var1, "###"))
}
}else {
print(paste("### Compute normality for", var1, "###"))
print(${method}(new_data))
}
}
}
}
END
if [ "$execute" == "TRUE" ]; then
Rscript ${file}${mid}.r
if [ "$?" == "0" ]; then /bin/rm -f ${file}${mid}.r; fi
fi
Shell
1
https://gitee.com/ct5869/s-plot.git
git@gitee.com:ct5869/s-plot.git
ct5869
s-plot
s-plot
master

搜索帮助