miRNA analysis script
Here are scripts for miRNA analysis
- Calculate unique read
1.Total Unique read
calculate unique read account from fastq file
------ file process.sh. -------
#only keep read ID in 1st line of fastq files
awk '{if((NR%2)==1)print $1;else print $0}' $1 > input.fq
#extract ID and sequence info
awk '{if(NR%4!=0)ORS=" ";else ORS="\n"}1' input.fq | awk '{print $1 " " $(NF-2) " " $NF}' > ./id_read.txt;
#calculate uniq read and it count
awk '{print $2}' ./id_read.txt |sort |uniq -c| sort -r -nk1 > ./$2
-------- run command-----
$1 : input fastq file
$2 : output file name
./process.sh Low_glucose.50_clean.fq low.txt
2. co-express
./find_common -e /home/xuanzhan/Data/glucose/expreLevel_cor_high.txt -c /home/xuanzhan/Data/glucose/expreLevel_cor_low.txt
# output : common_list_gc.txt
--- 与纠错后的文件进行合并----
--------- output file: [seq] [Before_cor_count_in_high] [After_cor_count_in_high] [Before_cor_count_in_low] [After_cor_count_in_low] -----
awk 'NR==FNR{a[$2]=$1;next}{print $1 " " $2 " " a[$1] " " $3}' expreLevel_cor_corhigh.txt /home/xuanzhan/Data/miRNA/code/common_glu_Rankbyhigh.txt > tmp.txt
awk 'NR==FNR{a[$2]=$1;next}{print $0 " " a[$1]}' expreLevel_cor_corlow.txt tmp.txt > common_glucose_bH_aH_bL_aL.txt
3. tissue-specific expression
---------- exist in Low file, not exsit in High file -------------
#合并列,筛选high中不存在的,按表达量排序 (After correction)
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_corhigh.txt expreLevel_cor_corlow.txt | awk '{if($3=="") print $0}' | sort -k2nr > lowspecific_cor.txt
#合并列,筛选high中不存在的,按表达量排序 (Before correction)
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_high.txt expreLevel_cor_low.txt | awk '{if($3=="") print $0}' | sort -k2nr > lowspecific.txt
----------- exist in High file, not exsit in Low file -------------
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_corlow.txt expreLevel_cor_corhigh.txt | awk '{if($3=="") print $0}' | sort -k2nr > highspecific_cor.txt
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_low.txt expreLevel_cor_high.txt | awk '{if($3=="") print $0}' | sort -k2nr > highspecific.txt
awk 'NR==FNR{a[$1]=$2;next}{print FNR " " $0 " " a[$1]}' highspecific_cor.txt highspecific.txt | sort -k4nr > high_only_BF.txt
----- output file: [Before_rank] [seq] [B_account] [After_correction_account] -----
awk 'NR==FNR{a[$1]=$2;next}{print FNR " " $0 " " a[$1]}' lowspecific_cor.txt lowspecific.txt | sort -k4nr > low_only_BF.txt
---- 均与未纠错的文件进行比较----
---------- exist in Low file, not exsit in High file -------------
#合并列,筛选high中不存在的,按表达量排序 (After correction)
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_high.txt expreLevel_cor_corlow.txt | awk '{if($3=="") print $0}' | sort -k2nr > N_lowspecific_cor.txt
#合并列,筛选high中不存在的,按表达量排序 (Before correction)
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_high.txt expreLevel_cor_low.txt | awk '{if($3=="") print $0}' | sort -k2nr > N_lowspecific.txt
awk 'NR==FNR{a[$1]=$2;next}{print FNR " " $0 " " a[$1]}' N_lowspecific_cor.txt N_lowspecific.txt | sort -k4nr |awk '{print NR " " $0}'> N_low_only_FB.txt
----- output file: [After_correction_rank][Before_correction_rank] [seq] [B_account] [After_correction_account] -----
---------- exist in High file, not exsit in Low file -------------
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_low.txt expreLevel_cor_corhigh.txt | awk '{if($3=="") print $0}' | sort -k2nr > N_highspecific_cor.txt
awk 'NR==FNR{a[$2]=$1;next}{print $2 " " $1 " " a[$2]}' expreLevel_cor_low.txt expreLevel_cor_high.txt | awk '{if($3=="") print $0}' | sort -k2nr > N_highspecific.txt
awk 'NR==FNR{a[$1]=$2;next}{print FNR " " $0 " " a[$1]}' N_highspecific_cor.txt N_highspecific.txt | sort -k4nr |awk '{print NR " " $0}'> N_high_only_FB.txt