You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello,
I have multiple files consisting of the index of compounds, smiles of compound and two properties. I would like to do pareto opimization to find the compounds with maximum properties. To do so, I defined the problem as below, but I get an error that x is not an interger. I was wondering if you have any suggestion for the issue.
Thank you
Best Regards
Soodabeh
from pymoo.termination import get_termination
from pymoo.optimize import minimize
from pymoo.visualization.scatter import Scatter
from pymoo.core.problem import ElementwiseProblem
from pymoo.algorithms.moo.nsga2 import NSGA2
import numpy as np
import pandas as pd
import gc
import matplotlib.pyplot as plt
Define the problem
class CompoundProblem(ElementwiseProblem):
def init(self, chunk_paths):
super().init(n_var=1, n_obj=2, n_constr=0, type_var=int)
self.chunk_paths = chunk_paths
self.cumulative_sizes = [0]
for path in chunk_paths:
chunk = pd.read_csv(path)
# Filter out compounds based on uncertainty and toxicity
chunk = chunk[(chunk['Uncertainty']*100 > 50) & (chunk['Toxic']*100 < 50)]
self.cumulative_sizes.append(self.cumulative_sizes[-1] + len(chunk))
self.xl = np.array([0]) # Lower bound (index of first compound)
self.xu = np.array([self.cumulative_sizes[-1] - 1]) # Upper bound (index of last compound)
print(self.xl)
print(self.xu)
def _evaluate(self, x, out, *args, **kwargs):
print(f"x[0] type: {type(x[0])}, value: {x[0]}") # Debug print statement
compound_index = x[0]
chunk_index = np.searchsorted(self.cumulative_sizes, compound_index, side='right') - 1
index_in_chunk = compound_index - self.cumulative_sizes[chunk_index]
chunk = pd.read_csv(self.chunk_paths[chunk_index])
# Filter out compounds based on uncertainty and toxicity
chunk = chunk[(chunk['Uncertainty']*100 > 50) & (chunk['Toxic']*100 < 50)]
compound = chunk.iloc[index_in_chunk]
out["F"] = np.array([compound['Uncertainty'] * 100, compound['Toxic'] * 100])
List of chunk paths
chunk_paths = [f'/scratch/gpfs/sg6615/retraining/zinc/chunk_uncertainty_toxicity_{i}.csv' for i in range(5)]
Initialize the problem
problem = CompoundProblem(chunk_paths)
Define the algorithm
algorithm = NSGA2(pop_size=100)
Define the termination criterion
termination = get_termination("n_gen", 100)
Run the optimization
res = minimize(problem,
algorithm,
termination,
save_history=True,
verbose=True)
It is difficult for me to follow your code how you posted it. But are you just trying to implement non-dominated sorting?
You can filter your data set by the constrains you have defined and then do something like this:
import pandas as pd
import numpy as np
import seaborn as sns
from numpy.random import RandomState
import matplotlib.pyplot as plt
from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
# random generator with seed
random_state = RandomState(1)
# create objective values randomly
n_obj = 2
F = random_state.random(size=(20, n_obj))
# convert to a pandas data frame
df = pd.DataFrame(F, columns=[f"f_{k+1}" for k in range(2)])
# apply non-dominated sorting to the objectives
objs = ['f_1', 'f_2']
dz = (df
.assign(rank=lambda dd: NonDominatedSorting().do(dd[objs].values, return_rank=True)[1])
.sort_values(['rank'] + objs)
)
# plot the results (highlight each non-dominated rank)
plt.subplots(1, 1, figsize=(12, 4))
sns.scatterplot(data=dz, x='f_1', y='f_2', hue='rank', style='rank', palette="deep")
dz.head(10)
Hello,
I have multiple files consisting of the index of compounds, smiles of compound and two properties. I would like to do pareto opimization to find the compounds with maximum properties. To do so, I defined the problem as below, but I get an error that x is not an interger. I was wondering if you have any suggestion for the issue.
Thank you
Best Regards
Soodabeh
from pymoo.termination import get_termination
from pymoo.optimize import minimize
from pymoo.visualization.scatter import Scatter
from pymoo.core.problem import ElementwiseProblem
from pymoo.algorithms.moo.nsga2 import NSGA2
import numpy as np
import pandas as pd
import gc
import matplotlib.pyplot as plt
Define the problem
class CompoundProblem(ElementwiseProblem):
def init(self, chunk_paths):
super().init(n_var=1, n_obj=2, n_constr=0, type_var=int)
self.chunk_paths = chunk_paths
self.cumulative_sizes = [0]
for path in chunk_paths:
chunk = pd.read_csv(path)
# Filter out compounds based on uncertainty and toxicity
chunk = chunk[(chunk['Uncertainty']*100 > 50) & (chunk['Toxic']*100 < 50)]
self.cumulative_sizes.append(self.cumulative_sizes[-1] + len(chunk))
self.xl = np.array([0]) # Lower bound (index of first compound)
self.xu = np.array([self.cumulative_sizes[-1] - 1]) # Upper bound (index of last compound)
print(self.xl)
print(self.xu)
def _evaluate(self, x, out, *args, **kwargs):
print(f"x[0] type: {type(x[0])}, value: {x[0]}") # Debug print statement
compound_index = x[0]
chunk_index = np.searchsorted(self.cumulative_sizes, compound_index, side='right') - 1
index_in_chunk = compound_index - self.cumulative_sizes[chunk_index]
chunk = pd.read_csv(self.chunk_paths[chunk_index])
# Filter out compounds based on uncertainty and toxicity
chunk = chunk[(chunk['Uncertainty']*100 > 50) & (chunk['Toxic']*100 < 50)]
compound = chunk.iloc[index_in_chunk]
out["F"] = np.array([compound['Uncertainty'] * 100, compound['Toxic'] * 100])
List of chunk paths
chunk_paths = [f'/scratch/gpfs/sg6615/retraining/zinc/chunk_uncertainty_toxicity_{i}.csv' for i in range(5)]
Initialize the problem
problem = CompoundProblem(chunk_paths)
Define the algorithm
algorithm = NSGA2(pop_size=100)
Define the termination criterion
termination = get_termination("n_gen", 100)
Run the optimization
res = minimize(problem,
algorithm,
termination,
save_history=True,
verbose=True)
Plot the results
plot = Scatter()
plot.add(res.F, color="red")
plot.show()
plot.save("pareto_front.png")
Plot the hypervolume vs. number of generations
hv = [algorithm.pop.get("F").hv(ref_point=np.array([10000, 10000])) for algorithm in res.history]
The text was updated successfully, but these errors were encountered: