Functions
public	main ()

public	generate_worker_cmd ()

public	run_autonomously ()

Function Documentation

public generate_worker_cmd ( )

Undocumented method

Code:

click to view

public main ( )

Undocumented method

Code:

click to view

sub main {
    $| = 1;
        # ok this is a hack, but I'm going to pretend I've got an object here
        # by creating a hash ref and passing it around like an object
        # this is to avoid using global variables in functions, and to consolidate
        # the globals into a nice '$self' package
    my $self = {};
    my $help                        = 0;
    my $report_versions             = 0;
    my $loopit                      = 0;
    my $sync                        = 0;
    my $local                       = 0;
    my $show_failed_jobs            = 0;
    my $default_meadow_type         = undef;
    my $submit_workers_max          = undef;
    my $total_running_workers_max   = undef;
    my $submission_options          = undef;
    my $run                         = 0;
    my $max_loops                   = 0; # not running by default
    my $run_job_id                  = undef;
    my $force                       = undef;
    my $keep_alive                  = 0; # ==1 means run even when there is nothing to do
    my $check_for_dead              = 0;
    my $all_dead                    = 0;
    my $balance_semaphores          = 0;
    my $job_id_for_output           = 0;
    my $show_worker_stats           = 0;
    my $kill_worker_id              = 0;
    my $reset_job_id                = 0;
    my $reset_all_jobs_for_analysis = 0;
    my $reset_failed_jobs_for_analysis = 0;
    $self->{'url'}                  = undef;
    $self->{'reg_conf'}             = undef;
    $self->{'reg_type'}             = undef;
    $self->{'reg_alias'}            = undef;
    $self->{'nosqlvc'}              = undef;
    $self->{'config_files'}         = [];
    $self->{'sleep_minutes'}        = 1;
    $self->{'retry_throwing_jobs'}  = undef;
    $self->{'can_respecialize'}     = undef;
    $self->{'hive_log_dir'}         = undef;
    $self->{'submit_log_dir'}       = undef;
    GetOptions(
                    # connection parameters
               'url=s'              => \$self->{'url'},
               'reg_conf|regfile=s' => \$self->{'reg_conf'},
               'reg_type=s'         => \$self->{'reg_type'},
               'reg_alias|regname=s'=> \$self->{'reg_alias'},
               'nosqlvc=i'          => \$self->{'nosqlvc'},     # can't use the binary "!" as it is a propagated option
                    # json config files
               'config_file=s'      => $self->{'config_files'},
                    # loop control
               'run'                => \$run,
               'loop'               => \$loopit,
               'max_loops=i'        => \$max_loops,
               'keep_alive'         => \$keep_alive,
               'job_id|run_job_id=i'=> \$run_job_id,
               'force=i'            => \$force,
               'sleep=f'            => \$self->{'sleep_minutes'},
                    # meadow control
               'local!'                         => \$local,
               'meadow_type=s'                  => \$default_meadow_type,
               'total_running_workers_max=i'    => \$total_running_workers_max,
               'submit_workers_max=i'           => \$submit_workers_max,
               'submission_options=s'           => \$submission_options,
                    # worker control
               'job_limit=i'            => \$self->{'job_limit'},
               'life_span|lifespan=i'   => \$self->{'life_span'},
               'logic_name=s'           => \$self->{'logic_name'},
               'hive_log_dir|hive_output_dir=s'      => \$self->{'hive_log_dir'},
               'retry_throwing_jobs=i'  => \$self->{'retry_throwing_jobs'},
               'can_respecialize=i'     => \$self->{'can_respecialize'},
               'debug=i'                => \$self->{'debug'},
               'submit_log_dir=s'       => \$self->{'submit_log_dir'},
                    # other commands/options
               'h|help!'           => \$help,
               'v|versions!'       => \$report_versions,
               'sync!'             => \$sync,
               'dead!'             => \$check_for_dead,
               'killworker=i'      => \$kill_worker_id,
               'alldead!'          => \$all_dead,
               'balance_semaphores'=> \$balance_semaphores,
               'no_analysis_stats' => \$self->{'no_analysis_stats'},
               'worker_stats'      => \$show_worker_stats,
               'failed_jobs'       => \$show_failed_jobs,
               'reset_job_id=i'    => \$reset_job_id,
               'reset_failed|reset_failed_jobs_for_analysis=s' => \$reset_failed_jobs_for_analysis,
               'reset_all|reset_all_jobs_for_analysis=s' => \$reset_all_jobs_for_analysis,
               'job_output=i'      => \$job_id_for_output,
    );
    if ($help) { script_usage(0); }
    if($report_versions) {
        report_versions();
        exit(0);
    }
    my $config = Bio::EnsEMBL::Hive::Utils::Config->new(@{$self->{'config_files'}});
    if($run or $run_job_id) {
        $max_loops = 1;
    } elsif ($loopit or $keep_alive) {
        unless($max_loops) {
            $max_loops = -1; # unlimited
        }
    }
    if($self->{'url'} or $self->{'reg_alias'}) {
        $self->{'dba'} = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
            -url                            => $self->{'url'},
            -reg_conf                       => $self->{'reg_conf'},
            -reg_type                       => $self->{'reg_type'},
            -reg_alias                      => $self->{'reg_alias'},
            -no_sql_schema_version_check    => $self->{'nosqlvc'},
        );
    } else {
        print "\nERROR : Connection parameters (url or reg_conf+reg_alias) need to be specified\n\n";
        script_usage(1);
    }
    $self->{'safe_url'} = $self->{'dba'}->dbc->url('WORKER_PASSWORD');
    my $queen = $self->{'dba'}->get_Queen;
    my $pipeline_name = $self->{'dba'}->get_MetaAdaptor->get_value_by_key( 'hive_pipeline_name' );
    if($pipeline_name) {
        warn "Pipeline name: $pipeline_name\n";
    } else {
        print STDERR "+---------------------------------------------------------------------+\n";
        print STDERR "!                                                                     !\n";
        print STDERR "!                  WARNING:                                           !\n";
        print STDERR "!                                                                     !\n";
        print STDERR "! At the moment your pipeline doesn't have 'pipeline_name' defined.   !\n";
        print STDERR "! This may seriously impair your beekeeping experience unless you are !\n";
        print STDERR "! the only farm user. The name should be set in your PipeConfig file, !\n";
        print STDERR "! or if you are running an old pipeline you can just set it by hand   !\n";
        print STDERR "! in the 'meta' table.                                                !\n";
        print STDERR "!                                                                     !\n";
        print STDERR "+---------------------------------------------------------------------+\n";
    }
    if($run_job_id) {
        $submit_workers_max = 1;
    }
    $default_meadow_type = 'LOCAL' if($local);
    my $valley = Bio::EnsEMBL::Hive::Valley->new( $config, $default_meadow_type, $pipeline_name );
    my ($beekeeper_meadow_type, $beekeeper_meadow_name) = $valley->whereami();
    unless($beekeeper_meadow_type eq 'LOCAL') {
        die "beekeeper.pl detected it has been itself submitted to '$beekeeper_meadow_type/$beekeeper_meadow_name', but this mode of operation is not supported.\n"
           ."Please just run beekeeper.pl on a farm head node, preferably from under a 'screen' session.\n";
    }
    $valley->config_set('SubmitWorkersMax', $submit_workers_max) if(defined $submit_workers_max);
    my $default_meadow = $valley->get_default_meadow();
    warn "Default meadow: ".$default_meadow->signature."\n\n";
    $default_meadow->config_set('TotalRunningWorkersMax', $total_running_workers_max) if(defined $total_running_workers_max);
    $default_meadow->config_set('SubmissionOptions', $submission_options) if(defined $submission_options);
    if($reset_job_id) { $queen->reset_job_by_dbID_and_sync($reset_job_id); }
    if($job_id_for_output) {
        printf("===== job output\n");
        my $job = $self->{'dba'}->get_AnalysisJobAdaptor->fetch_by_dbID($job_id_for_output);
        print $job->toString. "\n";
    }
    if(my $reset_logic_name = $reset_all_jobs_for_analysis || $reset_failed_jobs_for_analysis) {
        my $reset_analysis = $self->{'dba'}->get_AnalysisAdaptor->fetch_by_logic_name($reset_logic_name)
              || die( "Cannot AnalysisAdaptor->fetch_by_logic_name($reset_logic_name)"); 
        $self->{'dba'}->get_AnalysisJobAdaptor->reset_jobs_for_analysis_id($reset_analysis->dbID, $reset_all_jobs_for_analysis); 
        $self->{'dba'}->get_Queen->synchronize_AnalysisStats($reset_analysis->stats);
    }
    if ($kill_worker_id) {
        my $kill_worker = $queen->fetch_by_dbID($kill_worker_id);
        unless( $kill_worker->cause_of_death() ) {
            if( my $meadow = $valley->find_available_meadow_responsible_for_worker( $kill_worker ) ) {
                if( $meadow->check_worker_is_alive_and_mine ) {
                    printf("Killing worker: %10d %35s %15s  %20s(%d) : ", 
                            $kill_worker->dbID, $kill_worker->host, $kill_worker->process_id, 
                            $kill_worker->analysis->logic_name, $kill_worker->analysis_id);
                    $meadow->kill_worker($kill_worker);
                    $kill_worker->cause_of_death('KILLED_BY_USER');
                    $queen->register_worker_death($kill_worker);
                         # what about clean-up? Should we do it here or not?
                } else {
                    die "According to the Meadow, the Worker (dbID=$kill_worker_id) is not running, so cannot kill";
                }
            } else {
                die "Cannot access the Meadow responsible for the Worker (dbID=$kill_worker_id), so cannot kill";
            }
        } else {
            die "According to the Queen, the Worker (dbID=$kill_worker_id) is not running, so cannot kill";
        }
    }
    my $analysis = $run_job_id
        ? $self->{'dba'}->get_AnalysisAdaptor->fetch_by_dbID( $self->{'dba'}->get_AnalysisJobAdaptor->fetch_by_dbID( $run_job_id )->analysis_id )
        : ( $self->{'logic_name'} && $self->{'dba'}->get_AnalysisAdaptor->fetch_by_logic_name($self->{'logic_name'}) );
    if($all_dead)           { $queen->register_all_workers_dead(); }
    if($check_for_dead)     { $queen->check_for_dead_workers($valley, 1); }
    if($balance_semaphores) { $self->{'dba'}->get_AnalysisJobAdaptor->balance_semaphores( $analysis && $analysis->dbID ); }
    if ($max_loops) { # positive $max_loop means limited, negative means unlimited
        run_autonomously($self, $max_loops, $keep_alive, $queen, $valley, $analysis, $run_job_id, $force);
    } else {
            # the output of several methods will look differently depending on $analysis being [un]defined
        if($sync) {
            $queen->synchronize_hive($analysis);
        }
        $queen->print_analysis_status($analysis) unless($self->{'no_analysis_stats'});
        if($show_worker_stats) {
            print "\n===== List of live Workers according to the Queen: ======\n";
            foreach my $worker (@{ $queen->fetch_overdue_workers(0) }) {
                print $worker->toString(1)."\n";
            }
        }
        $self->{'dba'}->get_RoleAdaptor->print_active_role_counts;
        Bio::EnsEMBL::Hive::Scheduler::schedule_workers_resync_if_necessary($queen, $valley, $analysis);   # show what would be submitted, but do not actually submit
        $queen->get_remaining_jobs_show_hive_progress( $analysis ) if ($analysis);
        $queen->get_remaining_jobs_show_hive_progress();
        if($show_failed_jobs) {
            print("===== failed jobs\n");
            my $failed_job_list = $self->{'dba'}->get_AnalysisJobAdaptor->fetch_all_by_analysis_id_status($analysis && $analysis->dbID, 'FAILED');
            foreach my $job (@{$failed_job_list}) {
                print $job->toString. "\n";
            }
        }
    }
    exit(0);
}

Undocumented method

Code:

click to view

sub main {
    my ($reg_conf, $reg_type, $reg_alias, $url, $sqlcmd, $extra, $to_params, $verbose, $help, $report_versions);
    GetOptions(
                # connect to the database:
            'reg_conf=s'        => \$reg_conf,
            'reg_type=s'        => \$reg_type,
            'reg_alias=s'       => \$reg_alias,
            'url=s'             => \$url,
            'sqlcmd=s'          => \$sqlcmd,
            'extra=s'           => \$extra,
            'to_params!'        => \$to_params,
            'verbose!'          => \$verbose,
            'help!'             => \$help,
            'v|versions!'       => \$report_versions,
    );
    my $dbc_hash;
    if($help) {
        script_usage(0);
    } elsif($report_versions) {
        report_versions();
        exit(0);
    } elsif($reg_alias) {
        script_usage(1) if $url;
        require Bio::EnsEMBL::Registry;
        Bio::EnsEMBL::Registry->load_all($reg_conf);
        my $species = Bio::EnsEMBL::Registry->get_alias($reg_alias)
            || die "Could not solve the alias '$reg_alias'".($reg_conf ? " via the registry file '$reg_conf'" : "");
        my $dba;
        if ($reg_type) {
            $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, $reg_type)
                || die "Could not find any database for '$species' (alias: '$reg_alias') with the type '$reg_type'".($reg_conf ? " via the registry file '$reg_conf'" : "");
        } else {
            my $dbas = Bio::EnsEMBL::Registry->get_all_DBAdaptors(-species => $species);
            if (scalar(@$dbas) == 0) {
                # I think this case cannot happen: if there are no databases, the alias does not exist and get_alias() should have failed
                die "Could not find any database for '$species' (alias: '$reg_alias')".($reg_conf ? " via the registry file '$reg_conf'" : "");
            } elsif (scalar(@$dbas) >= 2) {
                die "There are several databases for '$species' (alias: '$reg_alias'). Please set -reg_type to one of: ".join(", ", map {$_->group} @$dbas);
            };
            $dba = $dbas->[0];
        }
        my $dbc = $dba->dbc();
        $dbc_hash = {
            'driver'    => $dbc->driver,
            'host'      => $dbc->host,
            'port'      => $dbc->port,
            'user'      => $dbc->username,
            'pass'      => $dbc->password,
            'dbname'    => $dbc->dbname,
        };
    } elsif($url) {
        $dbc_hash = Bio::EnsEMBL::Hive::Utils::URL::parse( $url )
            || die "Could not parse URL '$url'";
    } else {
        script_usage(1);
    }
    my $cmd = dbc_hash_to_cmd( $dbc_hash, $sqlcmd, $extra, $to_params );
    if($to_params) {
        print "$cmd\n";
    } else {
        warn "\nRunning command:\t$cmd\n\n" if($verbose);
        exec($cmd);
    }
}

Undocumented method

Code:

click to view

sub main {
    my $self = {};
    GetOptions(
            # connection parameters
        'url=s'                 => \$self->{'url'},
        'reg_conf|reg_file=s'   => \$self->{'reg_conf'},
        'reg_type=s'            => \$self->{'reg_type'},
        'reg_alias|reg_name=s'  => \$self->{'reg_alias'},
        'nosqlvc=i'             => \$self->{'nosqlvc'},     # using "=i" instead of "!" for consistency with scripts where it is a propagated option
            # json config files
        'config_file=s@'        => \$self->{'config_files'},
        'pipeconfig|pc=s@'      => \$self->{'pipeconfigs'}, # now an array
        'f|format=s'            => \$self->{'format'},
        'o|out|output=s'        => \$self->{'output'},
        'h|help'                => \$self->{'help'},
    );
    if($self->{'help'}) {
        pod2usage({-exitvalue => 0, -verbose => 2});
    }
    if(! $self->{'output'}) {
        pod2usage({
            -message => 'ERROR: No -output flag given',
            -exitvalue => 1,
            -verbose => 2
        });
    }
  
    if(!$self->{'format'}) {
        if($self->{'output'}=~/\.(\w+)$/) {
            $self->{'format'} = $1;
        } else {
            die "Format was not set and could not guess from ".$self->{'output'}.". Please use either way to select it.\n";
        }
    }
    if($self->{'url'} or $self->{'reg_alias'}) {
        $self->{'dba'} = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
            -url                            => $self->{'url'},
            -reg_conf                       => $self->{'reg_conf'},
            -reg_type                       => $self->{'reg_type'},
            -reg_alias                      => $self->{'reg_alias'},
            -no_sql_schema_version_check    => $self->{'nosqlvc'},
        );
        $self->{'dba'}->load_collections();
    } else {
        Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->init_collections();
    }
    if($self->{'pipeconfigs'}) {
        foreach my $pipeconfig (@{ $self->{'pipeconfigs'} }) {
            my $pipeconfig_package_name = load_file_or_module( $pipeconfig );
            my $pipeconfig_object = $pipeconfig_package_name->new();
            $pipeconfig_object->process_options( 0 );
            $pipeconfig_object->add_objects_from_config();
        }
    }
    my $graph = Bio::EnsEMBL::Hive::Utils::Graph->new(
        $self->{'dba'},
        $self->{'config_files'} ? @{ $self->{'config_files'} } : ()
    );
    my $graphviz = $graph->build();
    my $call = 'as_'.$self->{'format'};
    $graphviz->$call($self->{'output'});
}

Undocumented method

Code:

click to view

sub main {
    my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $help, $verbose, $mode, $start_date, $end_date, $output, $top, $default_memory, $default_cores);
    GetOptions(
                # connect to the database:
            'url=s'                      => \$url,
            'reg_conf|regfile=s'         => \$reg_conf,
            'reg_type=s'                 => \$reg_type,
            'reg_alias|regname=s'        => \$reg_alias,
            'nosqlvc=i'                  => \$nosqlvc,      # using "=i" instead of "!" for consistency with scripts where it is a propagated option
            'verbose!'                   => \$verbose,
            'h|help'                     => \$help,
            'start_date=s'               => \$start_date,
            'end_date=s'                 => \$end_date,
            'mode=s'                     => \$mode,
            'top=f'                      => \$top,
            'mem=i'                      => \$default_memory,
            'n_core=i'                   => \$default_cores,
            'output=s'                   => \$output,
    );
    if ($help) { script_usage(0); }
    my $hive_dba;
    if($url or $reg_alias) {
        $hive_dba = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
                -url                            => $url,
                -reg_conf                       => $reg_conf,
                -reg_type                       => $reg_type,
                -reg_alias                      => $reg_alias,
                -no_sql_schema_version_check    => $nosqlvc,
        );
    } else {
        warn "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
        script_usage(1);
    }
    # Check whether $mode is valid
    my %allowed_modes = (
        workers => 'Number of workers',
        memory => 'Memory asked (Gb)',
        cores => 'Number of CPU cores',
        unused_memory => 'Unused memory (Gb)',
        unused_cores => 'Number of unused CPU cores',
        pending_workers => 'Number of pending workers',
    );
    if ($mode) {
        die "Unknown mode '$mode'. Allowed modes are: ".join(", ", keys %allowed_modes) unless exists $allowed_modes{$mode};
        $default_memory = 100 unless $default_memory;
        $default_cores = 1 unless $default_cores;
    } else {
        $mode = 'workers';
    }
    # Palette generated with R: c(brewer.pal(9, "Set1"), brewer.pal(12, "Set3")). #FFFFB3 is removed because it is too close to white
    my @palette = qw(#E41A1C #377EB8 #4DAF4A #984EA3 #FF7F00 #FFFF33 #A65628 #F781BF #999999     #8DD3C7 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9 #BC80BD #CCEBC5 #FFED6F    #2F4F4F);
    my %terminal_mapping = (
        'emf' => 'emf',
        'png' => 'png',
        'svg' => 'svg',
        'jpg' => 'jpeg',
        'gif' => 'gif',
        'ps'  => 'postscript eps enhanced color',
        'pdf' => 'pdf color enhanced',
    );
    my $gnuplot_terminal = undef;
    if ($output and $output =~ /\.(\w+)$/) {
        $gnuplot_terminal = $1;
        die "The format '$gnuplot_terminal' is not currently supported." if not exists $terminal_mapping{$gnuplot_terminal};
        require Chart::Gnuplot;
    }
    my $dbh = $hive_dba->dbc->db_handle();
    # Get the memory usage from each resource_class
    my %mem_resources = ();
    my %cpu_resources = ();
    {
        foreach my $rd (@{$hive_dba->get_ResourceDescriptionAdaptor->fetch_all()}) {
            if ($rd->meadow_type eq 'LSF') {
                $mem_resources{$rd->resource_class_id} = $1 if $rd->submission_cmd_args =~ m/mem=(\d+)/;
                $cpu_resources{$rd->resource_class_id} = $1 if $rd->submission_cmd_args =~ m/-n\s*(\d+)/;
            }
        }
    }
    warn "mem_resources: ", Dumper \%mem_resources if $verbose;
    warn "cpu_resources: ", Dumper \%cpu_resources if $verbose;
    # Get the memory used by each worker
    my %used_res = ();
    if (($mode eq 'unused_memory') or ($mode eq 'unused_cores')) {
        my $sql_used_res = 'SELECT worker_id, mem_megs, cpu_sec/lifespan_sec FROM worker_resource_usage';
        foreach my $db_entry (@{$dbh->selectall_arrayref($sql_used_res)}) {
            my ($worker_id, $mem_megs, $cpu_usage) = @$db_entry;
            $used_res{$worker_id} = [$mem_megs, $cpu_usage];
        }
        warn scalar(keys %used_res), " worker info loaded from worker_resource_usage\n" if $verbose;
    }
    # Get the info about the analysis
    my $analysis_adaptor        = $hive_dba->get_AnalysisAdaptor;
    my %analysis_name           = %{ $analysis_adaptor->fetch_HASHED_FROM_analysis_id_TO_logic_name() };
    my %default_resource_class  = %{ $analysis_adaptor->fetch_HASHED_FROM_analysis_id_TO_resource_class_id() };
    warn "default_resource_class: ", Dumper \%default_resource_class if $verbose;
    warn "analysis_name: ", Dumper \%analysis_name if $verbose;
    warn scalar(keys %analysis_name), " analysis\n" if $verbose;
    # Get the events from the database
    my %events = ();
    if ($mode ne 'pending_workers') {
        my @tmp_dates = @{$dbh->selectall_arrayref('SELECT when_started, when_finished, born, died, analysis_id, worker_id, resource_class_id FROM worker LEFT JOIN role USING (worker_id)')};
        warn scalar(@tmp_dates), " events\n" if $verbose;
        foreach my $db_entry (@tmp_dates) {
            my ($when_started, $when_finished, $born, $died, $analysis_id, $worker_id, $resource_class_id) = @$db_entry;
            unless($analysis_id) {  # in case there was no Role attached to the Worker - i.e. it has never specialized
                $when_started               = $born;
                $when_finished              = $died;
                $analysis_id                = 0;
                $analysis_name{0}           = 'UNSPECIALIZED';
                $default_resource_class{0}  = 'UNKNOWN';
            }
            $resource_class_id  //= $default_resource_class{$analysis_id};
                # temporary Time::Piece values
            my $birth_datetime = Time::Piece->strptime( $when_started  , '%Y-%m-%d %H:%M:%S');
            my $death_datetime = Time::Piece->strptime( $when_finished , '%Y-%m-%d %H:%M:%S');
                # string values:
            my $birth_date = $birth_datetime->date . 'T' . $birth_datetime->hms;
            my $death_date = $death_datetime->date . 'T' . $death_datetime->hms;
            my $offset = 0;
            if ($mode eq 'workers') {
                $offset = 1;
            } elsif ($mode eq 'memory') {
                $offset = ($mem_resources{$resource_class_id} || $default_memory) / 1024.;
            } elsif ($mode eq 'cores') {
                $offset = ($cpu_resources{$resource_class_id} || $default_cores);
            } elsif ($mode eq 'unused_memory') {
                if (exists $used_res{$worker_id}) {
                    $offset = (($mem_resources{$resource_class_id} || $default_memory) - $used_res{$worker_id}->[0]) / 1024.;
                }
            } else {
                if (exists $used_res{$worker_id}) {
                    $offset = ($cpu_resources{$resource_class_id} || $default_cores) - $used_res{$worker_id}->[1];
                }
            }
            $events{$birth_date}{$analysis_id} += $offset if ($offset > 0);
            $events{$death_date}{$analysis_id} -= $offset if ($offset > 0 and $when_finished);
        }
    } else {
        my @tmp_dates = @{$dbh->selectall_arrayref('SELECT min(when_started), pending_sec, analysis_id FROM role LEFT JOIN worker_resource_usage USING (worker_id) WHERE pending_sec IS NOT NULL AND pending_sec > 0 GROUP BY worker_id')};
        warn scalar(@tmp_dates), " events\n" if $verbose;
        foreach my $db_entry (@tmp_dates) {
            my ($when_started, $pending_sec, $analysis_id) = @$db_entry;
                # temporary Time::Piece values
            my $submitted_datetime = Time::Piece->strptime( $when_started, '%Y-%m-%d %H:%M:%S') - $pending_sec;
            my $started_datetime   = Time::Piece->strptime( $when_started, '%Y-%m-%d %H:%M:%S');
                # string values:
            my $start_pending = $submitted_datetime->date . 'T' . $submitted_datetime->hms;
            my $start_running = $started_datetime->date   . 'T' . $started_datetime->hms;
            $events{$start_pending}{$analysis_id} += 1;
            $events{$start_running}{$analysis_id} -= 1;
        }
    }
    my @event_dates = sort {$a cmp $b} (keys %events);
    warn scalar(@event_dates), " dates\n" if $verbose;
    my $max_workers = 0;
    my @data_timings = ();
    my %tot_analysis = ();
    my $num_curr_workers = 0;
    my %hash_curr_workers = (map {$_ => 0 } (keys %analysis_name));
    foreach my $event_date (@event_dates) {
        last if $end_date and ($event_date gt $end_date);
        my $topup_hash = $events{$event_date};
        foreach my $analysis_id (keys %$topup_hash) {
            $hash_curr_workers{$analysis_id} += $topup_hash->{$analysis_id};
            $num_curr_workers += $topup_hash->{$analysis_id};
        }
        # Due to rounding errors, the sums may be slightly different
        die sum(values %hash_curr_workers)."!=$num_curr_workers" if abs(sum(values %hash_curr_workers) - $num_curr_workers) > 0.05;
        next if $start_date and ($event_date lt $start_date);
        my %hash_interval = %hash_curr_workers;
        #FIXME It should be normalised by the length of the time interval
        map {$tot_analysis{$_} += $hash_interval{$_}} keys %hash_interval;
        $max_workers = $num_curr_workers if ($num_curr_workers > $max_workers);
        # We need to repeat the previous value to have an histogram shape
        push @data_timings, [$event_date, $data_timings[-1]->[1]] if @data_timings;
        push @data_timings, [$event_date, \%hash_interval];
    }
    warn $max_workers if $verbose;
    warn Dumper \%tot_analysis if $verbose;
    my $total_total = sum(values %tot_analysis);
    my @sorted_analysis_ids = sort {($tot_analysis{$b} <=> $tot_analysis{$a}) || (lc $analysis_name{$a} cmp lc $analysis_name{$b})} (grep {$tot_analysis{$_}} keys %tot_analysis);
    warn Dumper \@sorted_analysis_ids if $verbose;
    warn Dumper([map {$analysis_name{$_}} @sorted_analysis_ids]) if $verbose;
    if (not $gnuplot_terminal) {
        print join("\t", 'date', "OVERALL_$mode", map {$analysis_name{$_}} @sorted_analysis_ids), "\n";
        print join("\t", 'total', $total_total, map {$tot_analysis{$_}} @sorted_analysis_ids), "\n";
        print join("\t", 'proportion', 'NA', map {$tot_analysis{$_}/$total_total} @sorted_analysis_ids), "\n";
        my $s = 0;
        print join("\t", 'cum_proportion', 'NA', map {$s+=$tot_analysis{$_}/$total_total} @sorted_analysis_ids), "\n";
        foreach my $row (@data_timings) {
            print join("\t", $row->[0], sum(values %{$row->[1]}), map {$row->[1]->{$_}} @sorted_analysis_ids)."\n";
        }
        return;
    }
    # Get the number of analysis we want to display
    my $n_relevant_analysis = scalar(@sorted_analysis_ids);
    if ($top and ($top > 0)) {
        if ($top < 1) {
            my $s = 0;
            $n_relevant_analysis = 0;
            map {my $pre_s = $s; $s += $tot_analysis{$_}/$total_total; $pre_s < $top && $n_relevant_analysis++} @sorted_analysis_ids;
        } elsif ($top < scalar(@sorted_analysis_ids)) {
            $n_relevant_analysis = $top;
        }
    }
    # cap based on the length of the palette
    my $need_other_analysis = $n_relevant_analysis < scalar(@sorted_analysis_ids) ? 1 : 0;
    if (($n_relevant_analysis+$need_other_analysis) > scalar(@palette)) {
        $n_relevant_analysis = scalar(@palette) - 1;
        $need_other_analysis = 1;
    }
    $top = $n_relevant_analysis unless $top;
    warn "$n_relevant_analysis relevant analysis\n" if $verbose;
    my @xdata = map {$_->[0]} @data_timings;
    my @datasets = ();
    my $pseudo_zero_value = -$max_workers / 50;
    # The background plot: the sum of all the analysis
    if ($need_other_analysis) {
        my @ydata = ();
        foreach my $row (@data_timings) {
            push @ydata, sum(map {$row->[1]->{$_}} @sorted_analysis_ids ) || $pseudo_zero_value;
            # Due to rounding errors, values are not always decreased to 0
            $ydata[-1] = $pseudo_zero_value if $ydata[-1] < 0.05;
        }
        push @datasets, Chart::Gnuplot::DataSet->new(
            xdata => \@xdata,
            ydata => \@ydata,
            timefmt => '%Y-%m-%dT%H:%M:%S',
            title => 'OTHER',
            style => 'filledcurves x1',
            linewidth => '0',
            color => $palette[$n_relevant_analysis],
        );
    }
    # Each analysis is plotted as the sum of itself and the top ones
    foreach my $i (reverse 1..$n_relevant_analysis) {
        my @ydata;
        foreach my $row (@data_timings) {
            push @ydata, sum(map {$row->[1]->{$_} || 0} @sorted_analysis_ids[0..($i-1)] ) || $pseudo_zero_value;
            # Due to rounding errors, values are not always decreased to 0
            $ydata[-1] = $pseudo_zero_value if $ydata[-1] < 0.05;
        }
        my $dataset = Chart::Gnuplot::DataSet->new(
            xdata => \@xdata,
            ydata => \@ydata,
            timefmt => '%Y-%m-%dT%H:%M:%S',
            title => $analysis_name{$sorted_analysis_ids[$i-1]},
            style => 'filledcurves x1',
            linewidth => '0',
            color => $palette[$i-1],
        );
        push @datasets, $dataset;
    }
    my $chart = Chart::Gnuplot->new(
        title => sprintf('Profile of %s', $n_relevant_analysis < scalar(@sorted_analysis_ids) ? ($top < 1 ? sprintf('%.1f%% of %s', 100*$top, $url) : "the $top top-analysis of $url") : $url).($start_date ? " from $start_date" : "").($end_date ? " to $end_date" : ""),
        timeaxis => 'x',
        legend => {
            position => 'outside right',
            align => 'left',
        },
        xtics => {
            labelfmt => '%b %d\n %H:%M',
            along => 'out nomirror',
        },
        bg => {
            color => 'white',
        },
        grid => 'on',
        imagesize => '1400, 800',
        output => $output,
        terminal => $terminal_mapping{$gnuplot_terminal},
        ylabel => $allowed_modes{$mode},
        yrange => [$pseudo_zero_value, undef],
    );
    $chart->plot2d(@datasets);
}

Undocumented method

Code:

click to view

Undocumented method

Code:

click to view

Undocumented method

Code:

click to view

sub main {
    my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $source_line, $username, $help);
    GetOptions(
                # connect to the database:
            'url=s'                 => \$url,
            'reg_conf|regfile=s'    => \$reg_conf,
            'reg_type=s'            => \$reg_type,
            'reg_alias|regname=s'   => \$reg_alias,
            'nosqlvc=i'             => \$nosqlvc,       # using "=i" instead of "!" for consistency with scripts where it is a propagated option
            'username=s'            => \$username,      # say "-user all" if the pipeline was run by several people
            'source_line=s'         => \$source_line,
            'h|help'                => \$help,
    );
    if ($help) { script_usage(0); }
    my $hive_dba;
    if($url or $reg_alias) {
        $hive_dba = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
                -url                            => $url,
                -reg_conf                       => $reg_conf,
                -reg_type                       => $reg_type,
                -reg_alias                      => $reg_alias,
                -no_sql_schema_version_check    => $nosqlvc,
        );
    } else {
        warn "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
        script_usage(1);
    }
    my $queen = $hive_dba->get_Queen;
    my $meadow_2_pid_wid = $queen->fetch_HASHED_FROM_meadow_type_AND_meadow_name_AND_process_id_TO_worker_id();
    my $valley = Bio::EnsEMBL::Hive::Valley->new();
    if( $source_line ) {
        my $meadow = $valley->get_available_meadow_list()->[0];
        warn "Taking the resource_usage data from the source ( $source_line ), assuming Meadow ".$meadow->signature."\n";
        if(my $report_entries = $meadow->parse_report_source_line( $source_line ) ) {
            $queen->store_resource_usage( $report_entries, $meadow_2_pid_wid->{$meadow->type}{$meadow->cached_name} );
        }
    } else {
        warn "Searching for Workers without known resource_usage...\n";
        my $meadow_2_interval = $queen->interval_workers_with_unknown_usage();
        foreach my $meadow (@{ $valley->get_available_meadow_list() }) {
            warn "\nFinding out the time interval when the pipeline was run on Meadow ".$meadow->signature."\n";
            if(my $our_interval = $meadow_2_interval->{ $meadow->type }{ $meadow->cached_name } ) {
                if(my $report_entries = $meadow->get_report_entries_for_time_interval( $our_interval->{'min_born'}, $our_interval->{'max_died'}, $username ) ) {
                    $queen->store_resource_usage( $report_entries, $meadow_2_pid_wid->{$meadow->type}{$meadow->cached_name} );
                }
            } else {
                warn "\tNothing new to store for Meadow ".$meadow->signature."\n";
            }
        }
    }
}

Undocumented method

Code:

click to view

sub main {
    my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $analysis_id, $logic_name, $input_id);
    GetOptions(
                # connect to the database:
            'url=s'                      => \$url,
            'reg_conf|regfile=s'         => \$reg_conf,
            'reg_type=s'                 => \$reg_type,
            'reg_alias|regname=s'        => \$reg_alias,
            'nosqlvc=i'                  => \$nosqlvc,      # using "=i" instead of "!" for consistency with scripts where it is a propagated option
                # identify the analysis:
            'analysis_id=i'         => \$analysis_id,
            'logic_name=s'          => \$logic_name,
                # specify the input_id (as a string):
            'input_id=s'            => \$input_id,
    );
    my $hive_dba;
    if($url or $reg_alias) {
        $hive_dba = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(
                -url                            => $url,
                -reg_conf                       => $reg_conf,
                -reg_type                       => $reg_type,
                -reg_alias                      => $reg_alias,
                -no_sql_schema_version_check    => $nosqlvc,
        );
    } else {
        warn "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
        script_usage(1);
    }
    my $analysis_adaptor = $hive_dba->get_AnalysisAdaptor;
    my $analysis; 
    if($logic_name) {
        $analysis = $analysis_adaptor->fetch_by_logic_name( $logic_name )
            or die "Could not fetch analysis '$logic_name'";
    } elsif($analysis_id) {
        $analysis = $analysis_adaptor->fetch_by_dbID( $analysis_id )
            or die "Could not fetch analysis with dbID='$analysis_id'";
    } else {
        show_seedable_analyses($hive_dba);
        exit(0);
    }
    unless($input_id) {
        $input_id = '{}';
        warn "Since -input_id has not been set, assuming input_id='$input_id'\n";
    }
    my $job = Bio::EnsEMBL::Hive::AnalysisJob->new(
        'prev_job'      => undef,   # this job has been created by the initialization script, not by another job
        'analysis'      => $analysis,
        'input_id'      => destringify( $input_id ),    # Make sure all job creations undergo re-stringification to avoid alternative "spellings" of the same input_id hash
    );
    my ($job_id) = @{ $hive_dba->get_AnalysisJobAdaptor->store_jobs_and_adjust_counters( [ $job ] ) };
    if($job_id) {
        print "Job $job_id [ ".$analysis->logic_name.'('.$analysis->dbID.")] : '$input_id'\n";
    } else {
        warn "Could not create job '$input_id' (it may have been created already)\n";
    }
}

public run_autonomously ( )

Undocumented method

Code:

click to view

sub run_autonomously {
    my ($self, $max_loops, $keep_alive, $queen, $valley, $run_analysis, $run_job_id, $force) = @_;
    my $resourceless_worker_cmd = generate_worker_cmd($self, $run_analysis, $run_job_id, $force);
    my $special_task            = $run_analysis || $run_job_id;
    my $rc_id2name  = $self->{'dba'}->get_ResourceClassAdaptor->fetch_HASHED_FROM_resource_class_id_TO_name();
    my %meadow_type_rc_name2resource_param_list = ();
    foreach my $rd (@{ $self->{'dba'}->get_ResourceDescriptionAdaptor->fetch_all() }) {
        $meadow_type_rc_name2resource_param_list{ $rd->meadow_type() }{ $rc_id2name->{$rd->resource_class_id} } = [ $rd->submission_cmd_args, $rd->worker_cmd_args ];
    }
    my $beekeeper_pid = $$;
    my $iteration=0;
    my $num_of_remaining_jobs=0;
    my $failed_analyses=0;
    do {
        if($iteration++) {
            $self->{'dba'}->dbc->disconnect_if_idle;
            printf("sleep %.2f minutes. Next loop at %s\n", $self->{'sleep_minutes'}, scalar localtime(time+$self->{'sleep_minutes'}*60));
            sleep($self->{'sleep_minutes'}*60);  
        }
        print("\n======= beekeeper loop ** $iteration **==========\n");
        $queen->check_for_dead_workers($valley, 0);
        $queen->print_analysis_status unless($self->{'no_analysis_stats'});
        $self->{'dba'}->get_RoleAdaptor->print_active_role_counts;
        my $workers_to_submit_by_meadow_type_rc_name
            = Bio::EnsEMBL::Hive::Scheduler::schedule_workers_resync_if_necessary($queen, $valley, $run_analysis);
        if( keys %$workers_to_submit_by_meadow_type_rc_name ) {
            my $submit_log_subdir;
            if( $self->{'submit_log_dir'} ) {
                $submit_log_subdir = $self->{'submit_log_dir'}."/submit_bk${beekeeper_pid}_iter${iteration}";
                make_path( $submit_log_subdir );
            }
            foreach my $meadow_type (keys %$workers_to_submit_by_meadow_type_rc_name) {
                my $this_meadow = $valley->available_meadow_hash->{$meadow_type};
                foreach my $rc_name (keys %{ $workers_to_submit_by_meadow_type_rc_name->{$meadow_type} }) {
                    my $this_meadow_rc_worker_count = $workers_to_submit_by_meadow_type_rc_name->{$meadow_type}{$rc_name};
                    print "Submitting $this_meadow_rc_worker_count workers (rc_name=$rc_name) to ".$this_meadow->signature()."\n";
                    my ($submission_cmd_args, $worker_cmd_args) = @{ $meadow_type_rc_name2resource_param_list{ $meadow_type }{ $rc_name } || [] };
                    my $specific_worker_cmd = $resourceless_worker_cmd
                                            . " -rc_name $rc_name"
                                            . (defined($worker_cmd_args) ? " $worker_cmd_args" : '');
                    $this_meadow->submit_workers($specific_worker_cmd, $this_meadow_rc_worker_count, $iteration,
                                                    $rc_name, $submission_cmd_args || '', $submit_log_subdir);
                }
            }
        } else {
            print "Not submitting any workers this iteration\n";
        }
        $failed_analyses       = $queen->get_num_failed_analyses( $run_analysis );
        $num_of_remaining_jobs = $queen->get_remaining_jobs_show_hive_progress( $run_analysis );
    } while( $keep_alive
            or (!$failed_analyses and $num_of_remaining_jobs and $iteration!=$max_loops) );
    print "The Beekeeper has stopped because ".(
          $failed_analyses ? "there were $failed_analyses failed analyses"
        : !$num_of_remaining_jobs ? "there is nothing left to do"
        : "the number of loops was limited by $max_loops and this limit expired"
    )."\n";
    printf("dbc %d disconnect cycles\n", $self->{'dba'}->dbc->disconnect_count);
}

Functions

Function Documentation