From e0eddbcd5b824b4eb27c6a8591808ee58e4d1c31 Mon Sep 17 00:00:00 2001
From: LouisonF <fresnaislouison@gmail.com>
Date: Mon, 8 Jan 2024 09:15:12 +0100
Subject: [PATCH 1/2] update batchs and results processing parameters

---
 mana/batchs.py             | 24 ++++++++++++++----------
 mana/results_processing.py |  6 ++++--
 props.properties           |  4 ++--
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/mana/batchs.py b/mana/batchs.py
index 2bcb59e..8451e3c 100644
--- a/mana/batchs.py
+++ b/mana/batchs.py
@@ -4,7 +4,7 @@ import os
 import pandas as pd
 
 def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_directory, modelfile, weightfile,\
-						   reactionFile, prev_sol_dir ='prev_sol_dir/', log_dir='log_dir',dist_anneal=0.9, obj_tol=0.01,\
+						   reactionFile, prev_sol_dir ='prev_sol_dir/', log_dir='log_dir',env="MANA",dist_anneal=0.9, obj_tol=0.01,\
 							  iters=100,para_batchs=False):
 	"""write_div_enum_script.
 
@@ -29,6 +29,8 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d
 		process should be saved
 	log_dir : str
 		path to the directory were log files should be stored
+	env : str
+		name of the anaconda environment to be activated
 	dist_anneal : float
 		dexom-python parameter, 0<=a<=1 controls the distance between each successive solution
 	obj_tol : float
@@ -66,8 +68,8 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d
 			if para_batchs:
 				with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "w+") as f:
 					f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
-						'-e %s/runerr%s_div.out\nsource activate cobrapy \n'
-						% (str(log_dir),str(barcode),str(log_dir),str(barcode)))
+						'-e %s/runerr%s_div.out\nsource activate %s \n'
+						% (str(log_dir),str(barcode),str(log_dir),str(barcode), str(env)))
 				with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "dist_anneal") as f:
 					f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f'
 						% (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol))
@@ -79,11 +81,11 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d
 	if para_batchs == False:
 		with open(batch_directory+"/runfiles_"+barcode+"_diversity_enum.sh", "w+") as f:
 			f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
-					'-e %s/runerr%s_div.out\nsource activate cobrapy\nls %s/batch/%s_*_diversity_enum.sh|xargs -n 1 -P 1 bash'
-					% (str(log_dir),str(barcode),str(log_dir),str(barcode),str(batch_directory),str(barcode)))
+					'-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_*_diversity_enum.sh|xargs -n 1 -P 1 bash'
+					% (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode)))
 
 def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfile, weightfile,\
-						   reactionFile="", log_dir='log_dir',obj_tol=0.001, iters=100,para_batchs=False):
+						   reactionFile="", log_dir='log_dir',env="MANA",obj_tol=0.001, iters=100,para_batchs=False):
 	"""write_rxn_enum_script.
 
 	Parameters
@@ -102,6 +104,8 @@ def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfil
 		path to the file that contains the list of reactions in the model
 	log_dir : str
 		path to the directory were log files should be stored
+	env : str
+		name of the anaconda environment to be activated
 	obj_tol : float
 		dexom-python parameter, objective value tolerance, as a fraction of the original value
 	iters : int
@@ -121,8 +125,8 @@ def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfil
 		for i in range(rxn_num):
 			with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "w+") as f:
 				f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
-						'-e %s/runerr%s_div.out\nsource activate cobrapy \n'
-						% (str(log_dir),str(barcode),str(log_dir),str(barcode)))
+						'-e %s/runerr%s_div.out\nsource activate %s \n'
+						% (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env)))
 			with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "a") as f:
 				f.write('python %s -o %s/%s_rxn_enum_%i --range %i_%i -m %s -r %s -l %s '
 						'-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol))
@@ -133,5 +137,5 @@ def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfil
 						'-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol))
 			with open(batch_directory+"/runfiles_"+barcode+"_reaction_enum.sh", "w+") as f:
 				f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
-						'-e %s/runerr%s_div.out\nsource activate cobrapy\nls %s/batch/%s_{0..%i}_reaction_enum.sh|xargs -n 1 -P 1 bash'
-						 % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(batch_directory),str(barcode), int(rxn_num-1)))
\ No newline at end of file
+						'-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_{0..%i}_reaction_enum.sh|xargs -n 1 -P 1 bash'
+						 % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode), int(rxn_num-1)))
\ No newline at end of file
diff --git a/mana/results_processing.py b/mana/results_processing.py
index f40cf15..0193dc8 100644
--- a/mana/results_processing.py
+++ b/mana/results_processing.py
@@ -142,11 +142,13 @@ def concatenate_csv(filenames,out_dir,col_index,single_csv,index_suffix=""):
 	combined_csv = pd.concat(list_csvs,ignore_index=False)
 	if nrenum > 0:
 		#Modify index after reaction_enum solutions
-		index_list = list(os.path.basename(filenames[0]).split('_')[0]+'_' + combined_csv.index.astype(str) + str(index_suffix))
+		line_count = pd.RangeIndex(0,combined_csv.shape[0],1)
+		index_list = list(os.path.basename(filenames[0]).split('_')[0]+'_' + line_count.astype(str) + str(index_suffix))
 		index_list[0:nrenum] = list(combined_csv[0:nrenum]['Solutions_IDS'])
 		combined_csv.index = index_list
 	else:
-		combined_csv.index = os.path.basename(filenames[0]).split('_')[0]+'_' + combined_csv.index.astype(str) + str(index_suffix)
+		line_count = pd.RangeIndex(0,combined_csv.shape[0],1)
+		combined_csv.index = os.path.basename(filenames[0]).split('_')[0]+'_' + line_count.astype(str) + str(index_suffix)
 	combined_csv.drop(combined_csv.columns[0],axis=1,inplace=True)
 	combined_csv.drop_duplicates(inplace=True) #remove identical solutions
 	if single_csv:
diff --git a/props.properties b/props.properties
index ac4ad45..4820490 100644
--- a/props.properties
+++ b/props.properties
@@ -15,8 +15,8 @@ time=24 hr
 
 ### Batch generation parameters ###
 
-rxn_enum_script_path=~/Documents/softs/dexom-python/dexom_python/enum_functions/rxn_enum_functions.py
-div_enum_script_path=~/Documents/softs/dexom-python/dexom_python/enum_functions/diversity_enum_functions.py
+rxn_enum_script_path=~/work/dexom-python/dexom_python/enum_functions/rxn_enum_functions.py
+div_enum_script_path=~/work/dexom-python/dexom_python/enum_functions/diversity_enum_functions.py
 
 ### DAR extraction parameters  ###
 cutoff=0.2
-- 
GitLab


From bafedc1e5f34dbfccdcff216a0c2c152eade58ef Mon Sep 17 00:00:00 2001
From: Louison Fresnais <louison.fresnais@inrae.fr>
Date: Thu, 11 Jan 2024 15:29:11 +0100
Subject: [PATCH 2/2] update batch functions

---
 BRANCH AIM.md              | 20 ++++++++++++++++++++
 __init__.py                |  0
 mana/batchs.py             |  2 +-
 mana/results_processing.py | 10 ++++++----
 props.properties           | 14 +++++++-------
 5 files changed, 34 insertions(+), 12 deletions(-)
 create mode 100644 BRANCH AIM.md
 create mode 100644 __init__.py

diff --git a/BRANCH AIM.md b/BRANCH AIM.md
new file mode 100644
index 0000000..c781959
--- /dev/null
+++ b/BRANCH AIM.md	
@@ -0,0 +1,20 @@
+# AIM : Assessing the robustness to the sampling approach
+
+To reduce the computing time, we adapted the DEXOM approach.
+The adapted DEXOM approach consist of :
+* **Full Reaction-Enum procedure**
+* **Stratified + Random sampling to select 1% of Reaction-Enum solutions**
+* **Diversity-Enum starting from each selected solution**
+
+# Assessment methodology
+
+To assess how the solutions sampling may affect the results (*e.g.* the list of DARs), we will perform the adapted DEXOM approach several time (5) for the amiodarone usecase.
+Practically it means:
+    5 adapted DEXOM runs for 003016028014.CEL (amiodarone, ctrl, 24, sample1)
+    5 adapted DEXOM runs for 003016028015.CEL (amiodarone, ctrl, 24, sample2)
+    5 adapted DEXOM runs for 003016028020.CEL (amiodarone, high, 24, sample1)
+    5 adapted DEXOM runs for 003016028021.CEL (amiodarone, high, 24, sample2)
+
+Since in each adapted DEXOM runs, a random sampling is performed in each range defined by the stratified sampling step, we will assess how the random solution sampling might affect the results.
+
+# Assessment results
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mana/batchs.py b/mana/batchs.py
index 8451e3c..524c039 100644
--- a/mana/batchs.py
+++ b/mana/batchs.py
@@ -70,7 +70,7 @@ def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_d
 					f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
 						'-e %s/runerr%s_div.out\nsource activate %s \n'
 						% (str(log_dir),str(barcode),str(log_dir),str(barcode), str(env)))
-				with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "dist_anneal") as f:
+				with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "a") as f:
 					f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f'
 						% (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol))
 			else:
diff --git a/mana/results_processing.py b/mana/results_processing.py
index 0193dc8..8213939 100644
--- a/mana/results_processing.py
+++ b/mana/results_processing.py
@@ -158,7 +158,7 @@ def concatenate_csv(filenames,out_dir,col_index,single_csv,index_suffix=""):
 	return
 
 
-def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = False,enum_type="reaction_enum", para_batch=False):
+def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = False,enum_type="reaction_enum", para_batch=False, env="MANA"):
 	"""remove_done_batchs.
 
 	Parameters
@@ -175,6 +175,8 @@ def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = F
 		string indicating which type of enumeration is being processed (optional)
 	para_batch : boolean
 		if True, launch each batch file independantly (instead of parallel on conditions, parallel on batch)
+	env : str
+	name of the anaconda environment to be activated
 
 	Returns
 	-------
@@ -212,12 +214,12 @@ def remove_done_batchs(batch_dir,result_dir,launch_undone = True,relax_param = F
 			if '#!/bin/bash' in content:
 				continue
 			with open(batch_dir+batch,'w') as f:
-				f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 48:00:00\n#SBATCH -J '+enum_type+'\n#SBATCH -o log_dir/runout_relaunch.out\n#SBATCH '
-				'-e log_dir/runerr_relaunch.out\nsource activate cobrapy\n'+content)
+				f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=4\n#SBATCH -t 48:00:00\n#SBATCH -J '+enum_type+'\n#SBATCH -o log_dir/runout_relaunch.out\n#SBATCH '
+				'-e log_dir/runerr_relaunch.out\nsource activate '+env+'\n'+content)
 		if launch_undone == True:
 			with open(batch_dir.split('/')[0]+"/launch_failed_batch_"+enum_type+".sh", "w+") as f:
 				f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 48:00:00\n#SBATCH -J '+enum_type+'\n#SBATCH -o log_dir/runout_relaunch.out\n#SBATCH '
-				'-e log_dir/runerr_relaunch.out\nsource activate cobrapy\n ls '+batch_dir+'*enum.sh|xargs -n 1 -P 1 bash')
+				'-e log_dir/runerr_relaunch.out\nsource activate '+env+'\n ls '+batch_dir+'*enum.sh|xargs -n 1 -P 1 bash')
 	return removed_batchs
 
 def remove_zerobiomass_solutions(enum_dir,reaction_list,separator=','):
diff --git a/props.properties b/props.properties
index 4820490..9219cc6 100644
--- a/props.properties
+++ b/props.properties
@@ -1,14 +1,14 @@
 ### General parameters ###
 
-working_path=tests/
+#working_path=~/work/MANA/tests/
 #table files must be provided in the tsv format (delimiter = tabulation)
-data=tests/input_data/test_dataset.tsv
-pheno=tests/input_data/pheno_annotated.tsv
-modelFile=tests/input_data/recon2v2_biomass_corrected.json
-rListFile=tests/input_data/recon2_2_reactions.csv
+data=/input_data/test_dataset.tsv
+pheno=/input_data/pheno_annotated.tsv
+modelFile=/input_data/recon2v2_biomass_corrected.json
+rListFile=/input_data/recon2_2_reactions.csv
 #compounds must be separated with a /
 cpds=amiodarone
-mappingFile=tests/input_data/hgnc_custom_set.txt
+mappingFile=/input_data/hgnc_custom_set.txt
 modelId=recon2.2
 dose=Control
 time=24 hr
@@ -26,4 +26,4 @@ baseline_noise_filtering=True
 all_cpds=amiodarone/valproic_acid/ethanol/tetracycline/rifampicin/allopurinol/indomethacin/sulindac
 
 ### Met4j input parameters
-sbmlModel=tests/input_data/recon2v2_biomass_corrected.sbml
\ No newline at end of file
+sbmlModel=/input_data/recon2v2_biomass_corrected.sbml
\ No newline at end of file
-- 
GitLab