Source code for bibench.algorithms.coalesce
### ____ _ ____ _ ###
### | __ )(_) __ ) ___ _ __ ___| |__ ###
### | _ \| | _ \ / _ \ '_ \ / __| '_ \ ###
### | |_) | | |_) | __/ | | | (__| | | | ###
### |____/|_|____/ \___|_| |_|\___|_| |_| ###
### ###
### ###
### This file is part of the BiBench package for biclustering ###
### analysis. ###
### ###
### Copyright (c) 2011 by: ###
### * Kemal Eren, ###
### * Mehmet Deveci, ###
### * Umit V. Catalyurek ###
### ###
### ###
### For license info, please see the README and LICENSE files ###
### in the main directory. ###
### ###
"""Coalesce algorithm wrapper for finding biclusters with up and down regulated TF."""
import os, subprocess
from bibench.algorithms.wrapper import wrapper_helper
from bibench.bicluster import \
Bicluster, BiclusterList, bicluster_algorithm
from bibench.datasets import io
import bibench.util as util
[docs]def coalesce(data,
Wrapper for the COALESCE binary.
* data: numpy.ndarray
* geneModuleProbability: the probability threshhold for including
genes in a regulatory module.
* conditionPvalueThreshold: the P-value threshhold for including
conditions in a regulatory module.
* conditionZThreshold: the Z-score threshhold for including
conditions in a regulatory module.
* normalize: whether to normalize the data.
A list of biclusters.
if normalize is False:
normalize = 0
normalize = 1
kwargs = locals()
return wrapper_helper(BINARY,
def _do_call_(data, datafile, results_dir, **kwargs):
"""Executes the COALESCE with given parameters"""
command = "{binary} -i {0}" \
" -p {geneModuleProbability}" \
" -c {conditionPvalueThreshold}" \
" -C {conditionZThreshold}".format(datafile, binary=BINARY, **kwargs)
if kwargs["normalize"] is not 0:
command += " -e "
stndout = os.path.join(results_dir, "bic.out")
stnderr = os.path.join(results_dir, "debug.out")
with open(stndout, 'w') as out:
with open(stnderr, 'w') as err:
subprocess.check_call(command.split(), stdout=out, stderr=err)
def _read_results_(dirname, data):
Read the result file which is bic.out and returns the list of
bicluster objects.
bicOut = os.path.join(dirname, "bic.out")
f = open(bicOut,'r')
biclusters = []
for clusterLine, geneLine, conditionLine, motifLine in util.grouper(f, 4):
bic = _createBicluster_(geneLine, conditionLine, data)
return biclusters
def _createBicluster_(geneLine, conditionLine, data):
Extracts the rows and columns of the bicluster from the given gene
and condition line
genes = map(int, geneLine.split('\t')[1:])
conditions = map(int, conditionLine.split('\t')[1:])
return Bicluster(genes, conditions, data)
def _write_dataset_(data, filename):
"""Writes a dataset in the format for Coalesce into pcl format."""
io.write_pcl_dataset(data, filename)