From e0eddbcd5b824b4eb27c6a8591808ee58e4d1c31 Mon Sep 17 00:00:00 2001 From: LouisonF <fresnaislouison@gmail.com> Date: Mon, 8 Jan 2024 09:15:12 +0100 Subject: [PATCH 1/2] update batchs and results processing parameters --- mana/batchs.py | 24 ++++++++++++++---------- mana/results_processing.py | 6 ++++-- props.properties | 4 ++-- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/mana/batchs.py b/mana/batchs.py index 2bcb59e..8451e3c 100644 --- a/mana/batchs.py +++ b/mana/batchs.py @@ -4,7 +4,7 @@ import os import pandas as pd def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_directory, modelfile, weightfile,\ - reactionFile, prev_sol_dir ='prev_sol_dir/', log_dir='log_dir',dist_anneal=0.9, obj_tol=0.01,\ + reactionFile, prev_sol_dir ='prev_sol_dir/', log_dir='log_dir',env="MANA",dist_anneal=0.9, obj_tol=0.01,\ iters=100,para_batchs=False): """write_div_enum_script. @@ -29,6 +29,8 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d process should be saved log_dir : str path to the directory were log files should be stored + env : str + name of the anaconda environment to be activated dist_anneal : float dexom-python parameter, 0<=a<=1 controls the distance between each successive solution obj_tol : float @@ -66,8 +68,8 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d if para_batchs: with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' - '-e %s/runerr%s_div.out\nsource activate cobrapy \n' - % (str(log_dir),str(barcode),str(log_dir),str(barcode))) + '-e %s/runerr%s_div.out\nsource activate %s \n' + % (str(log_dir),str(barcode),str(log_dir),str(barcode), str(env))) with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "dist_anneal") as f: f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f' % (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol)) @@ -79,11 +81,11 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d if para_batchs == False: with open(batch_directory+"/runfiles_"+barcode+"_diversity_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' - '-e %s/runerr%s_div.out\nsource activate cobrapy\nls %s/batch/%s_*_diversity_enum.sh|xargs -n 1 -P 1 bash' - % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(batch_directory),str(barcode))) + '-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_*_diversity_enum.sh|xargs -n 1 -P 1 bash' + % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode))) def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfile, weightfile,\ - reactionFile="", log_dir='log_dir',obj_tol=0.001, iters=100,para_batchs=False): + reactionFile="", log_dir='log_dir',env="MANA",obj_tol=0.001, iters=100,para_batchs=False): """write_rxn_enum_script. Parameters @@ -102,6 +104,8 @@ def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfil path to the file that contains the list of reactions in the model log_dir : str path to the directory were log files should be stored + env : str + name of the anaconda environment to be activated obj_tol : float dexom-python parameter, objective value tolerance, as a fraction of the original value iters : int @@ -121,8 +125,8 @@ def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfil for i in range(rxn_num): with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' - '-e %s/runerr%s_div.out\nsource activate cobrapy \n' - % (str(log_dir),str(barcode),str(log_dir),str(barcode))) + '-e %s/runerr%s_div.out\nsource activate %s \n' + % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env))) with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "a") as f: f.write('python %s -o %s/%s_rxn_enum_%i --range %i_%i -m %s -r %s -l %s ' '-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol)) @@ -133,5 +137,5 @@ def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfil '-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol)) with open(batch_directory+"/runfiles_"+barcode+"_reaction_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' - '-e %s/runerr%s_div.out\nsource activate cobrapy\nls %s/batch/%s_{0..%i}_reaction_enum.sh|xargs -n 1 -P 1 bash' - % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(batch_directory),str(barcode), int(rxn_num-1))) \ No newline at end of file + '-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_{0..%i}_reaction_enum.sh|xargs -n 1 -P 1 bash' + % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode), int(rxn_num-1))) \ No newline at end of file diff --git a/mana/results_processing.py b/mana/results_processing.py index f40cf15..0193dc8 100644 --- a/mana/results_processing.py +++ b/mana/results_processing.py @@ -142,11 +142,13 @@ def concatenate_csv(filenames,out_dir,col_index,single_csv,index_suffix=""): combined_csv = pd.concat(list_csvs,ignore_index=False) if nrenum > 0: #Modify index after reaction_enum solutions - index_list = list(os.path.basename(filenames[0]).split('_')[0]+'_' + combined_csv.index.astype(str) + str(index_suffix)) + line_count = pd.RangeIndex(0,combined_csv.shape[0],1) + index_list = list(os.path.basename(filenames[0]).split('_')[0]+'_' + line_count.astype(str) + str(index_suffix)) index_list[0:nrenum] = list(combined_csv[0:nrenum]['Solutions_IDS']) combined_csv.index = index_list else: - combined_csv.index = os.path.basename(filenames[0]).split('_')[0]+'_' + combined_csv.index.astype(str) + str(index_suffix) + line_count = pd.RangeIndex(0,combined_csv.shape[0],1) + combined_csv.index = os.path.basename(filenames[0]).split('_')[0]+'_' + line_count.astype(str) + str(index_suffix) combined_csv.drop(combined_csv.columns[0],axis=1,inplace=True) combined_csv.drop_duplicates(inplace=True) #remove identical solutions if single_csv: diff --git a/props.properties b/props.properties index ac4ad45..4820490 100644 --- a/props.properties +++ b/props.properties @@ -15,8 +15,8 @@ time=24 hr ### Batch generation parameters ### -rxn_enum_script_path=~/Documents/softs/dexom-python/dexom_python/enum_functions/rxn_enum_functions.py -div_enum_script_path=~/Documents/softs/dexom-python/dexom_python/enum_functions/diversity_enum_functions.py +rxn_enum_script_path=~/work/dexom-python/dexom_python/enum_functions/rxn_enum_functions.py +div_enum_script_path=~/work/dexom-python/dexom_python/enum_functions/diversity_enum_functions.py ### DAR extraction parameters ### cutoff=0.2 -- GitLab From bafedc1e5f34dbfccdcff216a0c2c152eade58ef Mon Sep 17 00:00:00 2001 From: Louison Fresnais <louison.fresnais@inrae.fr> Date: Thu, 11 Jan 2024 15:29:11 +0100 Subject: [PATCH 2/2] update batch functions --- BRANCH AIM.md | 20 ++++++++++++++++++++ __init__.py | 0 mana/batchs.py | 2 +- mana/results_processing.py | 10 ++++++---- props.properties | 14 +++++++------- 5 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 BRANCH AIM.md create mode 100644 __init__.py diff --git a/BRANCH AIM.md b/BRANCH AIM.md new file mode 100644 index 0000000..c781959 --- /dev/null +++ b/BRANCH AIM.md @@ -0,0 +1,20 @@ +# AIM : Assessing the robustness to the sampling approach + +To reduce the computing time, we adapted the DEXOM approach. +The adapted DEXOM approach consist of : +* **Full Reaction-Enum procedure** +* **Stratified + Random sampling to select 1% of Reaction-Enum solutions** +* **Diversity-Enum starting from each selected solution** + +# Assessment methodology + +To assess how the solutions sampling may affect the results (*e.g.* the list of DARs), we will perform the adapted DEXOM approach several time (5) for the amiodarone usecase. +Practically it means: + 5 adapted DEXOM runs for 003016028014.CEL (amiodarone, ctrl, 24, sample1) + 5 adapted DEXOM runs for 003016028015.CEL (amiodarone, ctrl, 24, sample2) + 5 adapted DEXOM runs for 003016028020.CEL (amiodarone, high, 24, sample1) + 5 adapted DEXOM runs for 003016028021.CEL (amiodarone, high, 24, sample2) + +Since in each adapted DEXOM runs, a random sampling is performed in each range defined by the stratified sampling step, we will assess how the random solution sampling might affect the results. + +# Assessment results \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mana/batchs.py b/mana/batchs.py index 8451e3c..524c039 100644 --- a/mana/batchs.py +++ b/mana/batchs.py @@ -70,7 +70,7 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' '-e %s/runerr%s_div.out\nsource activate %s \n' % (str(log_dir),str(barcode),str(log_dir),str(barcode), str(env))) - with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "dist_anneal") as f: + with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "a") as f: f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f' % (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol)) else: diff --git a/mana/results_processing.py b/mana/results_processing.py index 0193dc8..8213939 100644 --- a/mana/results_processing.py +++ b/mana/results_processing.py @@ -158,7 +158,7 @@ def concatenate_csv(filenames,out_dir,col_index,single_csv,index_suffix=""): return -def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = False,enum_type="reaction_enum", para_batch=False): +def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = False,enum_type="reaction_enum", para_batch=False, env="MANA"): """remove_done_batchs. Parameters @@ -175,6 +175,8 @@ def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = F string indicating which type of enumeration is being processed (optional) para_batch : boolean if True, launch each batch file independantly (instead of parallel on conditions, parallel on batch) + env : str + name of the anaconda environment to be activated Returns ------- @@ -212,12 +214,12 @@ def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = F if '#!/bin/bash' in content: continue with open(batch_dir+batch,'w') as f: - f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 48:00:00\n#SBATCH -J '+enum_type+'\n#SBATCH -o log_dir/runout_relaunch.out\n#SBATCH ' - '-e log_dir/runerr_relaunch.out\nsource activate cobrapy\n'+content) + f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=4\n#SBATCH -t 48:00:00\n#SBATCH -J '+enum_type+'\n#SBATCH -o log_dir/runout_relaunch.out\n#SBATCH ' + '-e log_dir/runerr_relaunch.out\nsource activate '+env+'\n'+content) if launch_undone == True: with open(batch_dir.split('/')[0]+"/launch_failed_batch_"+enum_type+".sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 48:00:00\n#SBATCH -J '+enum_type+'\n#SBATCH -o log_dir/runout_relaunch.out\n#SBATCH ' - '-e log_dir/runerr_relaunch.out\nsource activate cobrapy\n ls '+batch_dir+'*enum.sh|xargs -n 1 -P 1 bash') + '-e log_dir/runerr_relaunch.out\nsource activate '+env+'\n ls '+batch_dir+'*enum.sh|xargs -n 1 -P 1 bash') return removed_batchs def remove_zerobiomass_solutions(enum_dir,reaction_list,separator=','): diff --git a/props.properties b/props.properties index 4820490..9219cc6 100644 --- a/props.properties +++ b/props.properties @@ -1,14 +1,14 @@ ### General parameters ### -working_path=tests/ +#working_path=~/work/MANA/tests/ #table files must be provided in the tsv format (delimiter = tabulation) -data=tests/input_data/test_dataset.tsv -pheno=tests/input_data/pheno_annotated.tsv -modelFile=tests/input_data/recon2v2_biomass_corrected.json -rListFile=tests/input_data/recon2_2_reactions.csv +data=/input_data/test_dataset.tsv +pheno=/input_data/pheno_annotated.tsv +modelFile=/input_data/recon2v2_biomass_corrected.json +rListFile=/input_data/recon2_2_reactions.csv #compounds must be separated with a / cpds=amiodarone -mappingFile=tests/input_data/hgnc_custom_set.txt +mappingFile=/input_data/hgnc_custom_set.txt modelId=recon2.2 dose=Control time=24 hr @@ -26,4 +26,4 @@ baseline_noise_filtering=True all_cpds=amiodarone/valproic_acid/ethanol/tetracycline/rifampicin/allopurinol/indomethacin/sulindac ### Met4j input parameters -sbmlModel=tests/input_data/recon2v2_biomass_corrected.sbml \ No newline at end of file +sbmlModel=/input_data/recon2v2_biomass_corrected.sbml \ No newline at end of file -- GitLab