my ($self) = @_;
return [
## First analysis: PECAN
{ -logic_name => 'pecan',
-module => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
-parameters => {
# The cmd parameter is required by the SystemCmd module. It defines the command line to be run.
# Note that some values are written between #hashes#. Those will be subtituted by the corresponding input values
'cmd' => 'java -cp /soft/pecan_v0.8/pecan_v0.8.jar bp.pecan.Pecan -E "#tree_string#" -F #input_files# -G #msa_file#',
},
-input_ids => [
# Each input_id is a new job for this analysis. Here we are defining the input_files and the msa_file for
# the first and only job.
{
'tree_string' => '((((HUMAN,(MOUSE,RAT)),COW),OPOSSUM),CHICKEN);',
'input_files' => 'human.fa mouse.fa rat.fa cow.fa opossum.fa chicken.fa',
'msa_file' => "pecan3.mfa",
'chr_name' => "chr13",
'chr_start' => "32878016",
},
],
-flow_into => {
# dataflow rule. Once a 'pecan' job is done, it will create a new 'gerp_col' and
# a new 'gerp_elem' job.
# Using the input_template, we are setting the input_file for the other two analyses
'1->A' => { 'gerp_col' => {'input_file' => '#msa_file#'} },
'A->1' => { 'gerp_elem' => {'input_file' => '#msa_file#.rates',
'chr_name' => '#chr_name#',
'chr_start' => '#chr_start#'}
},
},
},
## Second analysis: GERP_COL
{ -logic_name => 'gerp_col',
-module => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
-parameters => {
# In this case, #msa_file# comes from the parent 'pecan' job.
'cmd' => 'gerpcol -t tree.nw -f #input_file# -a -e HUMAN',
},
},
## Third analysis: GERP_ELEM
{ -logic_name => 'gerp_elem',
-module => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
-parameters => {
'cmd' => 'gerpelem -f #input_file# -c #chr_name# -s #chr_start# -x .bed',
},
},
];
}