HPC Lab - Software - BiBench

Source code for bibench.algorithms.coalesce

####################################################################
###     ____  _ ____                  _                          ###
###    | __ )(_) __ )  ___ _ __   ___| |__                       ###
###    |  _ \| |  _ \ / _ \ '_ \ / __| '_ \                      ###
###    | |_) | | |_) |  __/ | | | (__| | | |                     ###
###    |____/|_|____/ \___|_| |_|\___|_| |_|                     ###
###                                                              ###
###--------------------------------------------------------------###
###                                                              ###
### This file is part of the BiBench package for biclustering    ###
### analysis.                                                    ###
###                                                              ###
### Copyright (c) 2011 by:                                       ###
###   * Kemal Eren,                                              ###
###   * Mehmet Deveci,                                           ###
###   * Umit V. Catalyurek                                       ###
###                                                              ###
###--------------------------------------------------------------###
###                                                              ###
### For license info, please see the README and LICENSE files    ###
### in the main directory.                                       ###
###                                                              ###
###--------------------------------------------------------------###

"""Coalesce algorithm wrapper for finding biclusters with up and down regulated TF."""
import os, subprocess

from bibench.algorithms.wrapper import wrapper_helper
from bibench.bicluster import \
    Bicluster, BiclusterList, bicluster_algorithm
from bibench.datasets import io
import bibench.util as util

BINARY = 'COALESCE'

@bicluster_algorithm
[docs]def coalesce(data, geneModuleProbability=0.95, conditionPvalueThreshold=0.05, conditionZThreshold=0.5, normalize=False): """ Wrapper for the COALESCE binary. Args: * data: numpy.ndarray * geneModuleProbability: the probability threshhold for including genes in a regulatory module. * conditionPvalueThreshold: the P-value threshhold for including conditions in a regulatory module. * conditionZThreshold: the Z-score threshhold for including conditions in a regulatory module. * normalize: whether to normalize the data. Returns: A list of biclusters. """ if normalize is False: normalize = 0 else: normalize = 1 kwargs = locals() return wrapper_helper(BINARY, _write_dataset_, _read_results_, _do_call_, **kwargs)
def _do_call_(data, datafile, results_dir, **kwargs): """Executes the COALESCE with given parameters""" command = "{binary} -i {0}" \ " -p {geneModuleProbability}" \ " -c {conditionPvalueThreshold}" \ " -C {conditionZThreshold}".format(datafile, binary=BINARY, **kwargs) if kwargs["normalize"] is not 0: command += " -e " stndout = os.path.join(results_dir, "bic.out") stnderr = os.path.join(results_dir, "debug.out") with open(stndout, 'w') as out: with open(stnderr, 'w') as err: subprocess.check_call(command.split(), stdout=out, stderr=err) def _read_results_(dirname, data): """ Read the result file which is bic.out and returns the list of bicluster objects. """ bicOut = os.path.join(dirname, "bic.out") f = open(bicOut,'r') biclusters = [] for clusterLine, geneLine, conditionLine, motifLine in util.grouper(f, 4): bic = _createBicluster_(geneLine, conditionLine, data) biclusters.append(bic) f.close() return biclusters def _createBicluster_(geneLine, conditionLine, data): """ Extracts the rows and columns of the bicluster from the given gene and condition line """ genes = map(int, geneLine.split('\t')[1:]) conditions = map(int, conditionLine.split('\t')[1:]) return Bicluster(genes, conditions, data) def _write_dataset_(data, filename): """Writes a dataset in the format for Coalesce into pcl format.""" io.write_pcl_dataset(data, filename)