Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter update #67

Merged
merged 29 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
fa27bfd
remove unnecessary inits and refactor
frederik-sandfort1 Aug 19, 2024
b706268
include smarts filter, smiles filter, descriptors filter
frederik-sandfort1 Aug 20, 2024
476d65a
Fix wrong typing that caused thousands of type ignores
frederik-sandfort1 Aug 20, 2024
f14b71a
linting and fix element number test
frederik-sandfort1 Aug 20, 2024
e3f5d2d
Merge branch 'main' into filter_update
frederik-sandfort1 Aug 21, 2024
c352144
reset name typing
frederik-sandfort1 Aug 21, 2024
5c95f81
Christians first review
frederik-sandfort1 Aug 22, 2024
16088db
more changes
frederik-sandfort1 Aug 22, 2024
b2ca26d
linting
frederik-sandfort1 Aug 22, 2024
81ffb7c
pylint
frederik-sandfort1 Aug 22, 2024
9fed198
rewrite filter logic (#71)
c-w-feldmann Aug 22, 2024
f49cb70
Combine filters with one base logic
frederik-sandfort1 Aug 22, 2024
91feed1
change dict to Mapping
c-w-feldmann Aug 22, 2024
1d70f17
Merge branch 'main' into filter_update
frederik-sandfort1 Aug 26, 2024
93e6183
isort
frederik-sandfort1 Aug 26, 2024
cd18310
Include comments
frederik-sandfort1 Sep 12, 2024
c0427ab
linting
frederik-sandfort1 Sep 12, 2024
cfdfd83
linting and ComplexFilter
frederik-sandfort1 Sep 12, 2024
b843657
typing, tests, complex filter naming
frederik-sandfort1 Sep 12, 2024
a93344c
finalize filter refactoring
frederik-sandfort1 Sep 12, 2024
235c8f8
Merge branch 'main' into filter_update
frederik-sandfort1 Oct 1, 2024
47d4d90
review Christian
frederik-sandfort1 Oct 1, 2024
1f8dc1c
pylint
frederik-sandfort1 Oct 1, 2024
d345191
include check for failed patterns in init
frederik-sandfort1 Oct 7, 2024
08d58f4
final review
frederik-sandfort1 Oct 7, 2024
ede7d39
final linting
frederik-sandfort1 Oct 7, 2024
222675c
final final linting
frederik-sandfort1 Oct 7, 2024
30e5701
final final final linting
frederik-sandfort1 Oct 7, 2024
aa2ab98
Merge branch 'main' into filter_update
frederik-sandfort1 Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions molpipeline/abstract_pipeline_elements/mol2mol/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Initialize the module for abstract mol2mol elements."""

from molpipeline.abstract_pipeline_elements.mol2mol.filter import (
BaseKeepMatchesFilter,
BasePatternsFilter,
)

__all__ = ["BasePatternsFilter", "BaseKeepMatchesFilter"]
196 changes: 196 additions & 0 deletions molpipeline/abstract_pipeline_elements/mol2mol/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
"""Abstract classes for filters."""

import abc
from typing import Any, Literal, Optional, Union

try:
from typing import Self # type: ignore[attr-defined]
except ImportError:
from typing_extensions import Self

from molpipeline.abstract_pipeline_elements.core import MolToMolPipelineElement
from molpipeline.utils.value_conversions import count_value_to_tuple


class BaseKeepMatchesFilter(MolToMolPipelineElement, abc.ABC):
"""Filter to keep or remove molecules based on patterns."""

c-w-feldmann marked this conversation as resolved.
Show resolved Hide resolved
keep_matches: bool
mode: Literal["any", "all"]

def __init__(
self,
keep_matches: bool = True,
mode: Literal["any", "all"] = "any",
frederik-sandfort1 marked this conversation as resolved.
Show resolved Hide resolved
name: Optional[str] = None,
n_jobs: int = 1,
uuid: Optional[str] = None,
) -> None:
"""Initialize BasePatternsFilter.

Parameters
----------
keep_matches: bool, optional (default: True)
If True, molecules containing the specified patterns are kept, else removed.
mode: Literal["any", "all"], optional (default: "any")
If "any", at least one of the specified patterns must be present in the molecule.
If "all", all of the specified patterns must be present in the molecule.
name: Optional[str], optional (default: None)
Name of the pipeline element.
n_jobs: int, optional (default: 1)
Number of parallel jobs to use.
uuid: str, optional (default: None)
Unique identifier of the pipeline element.
"""
super().__init__(name=name, n_jobs=n_jobs, uuid=uuid)
self.keep_matches = keep_matches
self.mode = mode

def set_params(self, **parameters: Any) -> Self:
"""Set parameters of BaseKeepMatchesFilter.

Parameters
----------
parameters: Any
Parameters to set.

Returns
-------
Self
Self.
"""
parameter_copy = dict(parameters)
if "keep_matches" in parameter_copy:
self.keep_matches = parameter_copy.pop("keep_matches")
if "mode" in parameter_copy:
self.mode = parameter_copy.pop("mode")
super().set_params(**parameter_copy)
return self

def get_params(self, deep: bool = True) -> dict[str, Any]:
"""Get parameters of PatternFilter.

Parameters
----------
deep: bool, optional (default: True)
If True, return the parameters of all subobjects that are PipelineElements.

Returns
-------
dict[str, Any]
Parameters of BaseKeepMatchesFilter.
"""
params = super().get_params(deep=deep)
params["keep_matches"] = self.keep_matches
params["mode"] = self.mode
return params


class BasePatternsFilter(BaseKeepMatchesFilter, abc.ABC):
"""Filter to keep or remove molecules based on patterns."""

_patterns: dict[str, tuple[Optional[int], Optional[int]]]

def __init__(
self,
patterns: Union[
list[str], dict[str, Union[int, tuple[Optional[int], Optional[int]]]]
frederik-sandfort1 marked this conversation as resolved.
Show resolved Hide resolved
],
keep_matches: bool = True,
mode: Literal["any", "all"] = "any",
name: Optional[str] = None,
n_jobs: int = 1,
uuid: Optional[str] = None,
) -> None:
"""Initialize BasePatternsFilter.

Parameters
----------
patterns: Union[list[str], dict[str, Union[int, tuple[Optional[int], Optional[int]]]]]
List of patterns to allow in molecules.
Alternatively, a dictionary can be passed with patterns as keys
and an int for exact count or a tuple of minimum and maximum.
keep_matches: bool, optional (default: True)
If True, molecules containing the specified patterns are kept, else removed.
mode: Literal["any", "all"], optional (default: "any")
If "any", at least one of the specified patterns must be present in the molecule.
If "all", all of the specified patterns must be present in the molecule.
name: Optional[str], optional (default: None)
Name of the pipeline element.
n_jobs: int, optional (default: 1)
Number of parallel jobs to use.
uuid: str, optional (default: None)
Unique identifier of the pipeline element.
"""
super().__init__(
keep_matches=keep_matches, mode=mode, name=name, n_jobs=n_jobs, uuid=uuid
)
self.patterns = patterns # type: ignore

@property
def patterns(self) -> dict[str, tuple[Optional[int], Optional[int]]]:
"""Get allowed patterns as dict."""
return self._patterns

@patterns.setter
def patterns(
self,
patterns: Union[
list[str], dict[str, Union[int, tuple[Optional[int], Optional[int]]]]
],
) -> None:
"""Set allowed patterns as dict.

Parameters
----------
patterns: Union[list[str], dict[str, Union[int, tuple[Optional[int], Optional[int]]]]]
List of patterns.
"""
if isinstance(patterns, (list, set)):
self._patterns = {pat: (1, None) for pat in patterns}
else:
self._patterns = {
pat: count_value_to_tuple(count) for pat, count in patterns.items()
}

def get_params(self, deep: bool = True) -> dict[str, Any]:
"""Get parameters of PatternFilter.

Parameters
----------
deep: bool, optional (default: True)
If True, return the parameters of all subobjects that are PipelineElements.

Returns
-------
dict[str, Any]
Parameters of PatternFilter.
"""
params = super().get_params(deep=deep)
if deep:
params["patterns"] = {
pat: (count_tuple[0], count_tuple[1])
for pat, count_tuple in self.patterns.items()
}
else:
params["patterns"] = self.patterns
return params

def set_params(self, **parameters: Any) -> Self:
"""Set parameters of PatternFilter.

Parameters
----------
parameters: Any
Parameters to set.

Returns
-------
Self
Self.
"""
parameter_copy = dict(parameters)
if "patterns" in parameter_copy:
self.patterns = parameter_copy.pop("patterns")
super().set_params(**parameter_copy)
return self
6 changes: 6 additions & 0 deletions molpipeline/mol2mol/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Init the module for mol2mol pipeline elements."""

from molpipeline.mol2mol.filter import (
DescriptorsFilter,
ElementFilter,
EmptyMoleculeFilter,
InorganicsFilter,
MixtureFilter,
SmartsFilter,
SmilesFilter,
)
from molpipeline.mol2mol.reaction import MolToMolReaction
from molpipeline.mol2mol.scaffolds import MakeScaffoldGeneric, MurckoScaffold
Expand Down Expand Up @@ -41,4 +44,7 @@
"SolventRemover",
"Uncharger",
"InorganicsFilter",
"SmartsFilter",
"SmilesFilter",
"DescriptorsFilter",
)
Loading
Loading