diff --git a/docs/platform/ref/data.md b/docs/platform/ref/data.md new file mode 100644 index 00000000..90a610b0 --- /dev/null +++ b/docs/platform/ref/data.md @@ -0,0 +1,71 @@ +# Data Platform API. + +The DeepOriginClient can be used to access the data platform API using: + +```{.python notest} +from deeporigin.platform.client import DeepOriginClient + +client = DeepOriginClient() +``` + +Then, the following methods can be used, for example: + +```{.python notest} +# Check the health status of the data platform +health_status = client.data.health() + +# Search ligands joined with tool results +results = client.data.search_ligands_with_results( + limit=10, + experiments=[{"toolId": "deeporigin.docking"}], +) + +# Search an entity (e.g., ligands) +results = client.data.search("ligands") + +# Search ligands using convenience method +results = client.data.search_ligands(limit=10) + +# Search proteins using convenience method +results = client.data.search_proteins(limit=10) + +# Create a new ligand +ligand = client.data.create_ligand( + project_id="\\x0011223344556677", + canonical_smiles="CCOc1ccc2nc(S(=O)(=O)N3CCN(CC3)C)c(N)c2c1", + inchi_key="BSYNRYMUTXBXSQ-UHFFFAOYSA-N", + inchi="InChI=1S/C20H24N4O4S/.../h1-4,6-9H,5,10-14H2,(H,22,23)", + smiles="CCOc1ccc2nc(S(=O)(=O)N3CCN(CC3)C)c(N)c2c1", + name="Compound-12345", + formal_charge=0, + hbond_donor_count=1, + hbond_acceptor_count=6, + rotatable_bond_count=5, + tpsa=85.12, + molecular_weight=447.5, +) + +# List projects +projects = client.data.list_projects() + +# List public models +models = client.data.list_models() +``` + + +::: src.platform.data.Data + options: + heading_level: 2 + docstring_style: google + show_root_heading: true + show_category_heading: true + show_object_full_path: false + show_root_toc_entry: false + inherited_members: true + members_order: alphabetical + filters: + - "!^_" # Exclude private members (names starting with "_") + show_signature: true + show_signature_annotations: true + show_if_no_docstring: true + group_by_category: true diff --git a/mkdocs.yaml b/mkdocs.yaml index 3fe49e68..d1b1d235 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -87,6 +87,7 @@ nav: - functions: platform/ref/functions.md - organizations: platform/ref/organizations.md - billing: platform/ref/billing.md + - data: platform/ref/data.md - Developing: - Installing: dev/install.md - Clients: dev/clients.md diff --git a/src/platform/client.py b/src/platform/client.py index c834655a..21550093 100644 --- a/src/platform/client.py +++ b/src/platform/client.py @@ -22,6 +22,7 @@ from deeporigin.exceptions import DeepOriginException from deeporigin.platform.billing import Billing from deeporigin.platform.clusters import Clusters +from deeporigin.platform.data import Data from deeporigin.platform.executions import Executions from deeporigin.platform.files import Files from deeporigin.platform.functions import Functions @@ -306,6 +307,7 @@ def __init__( self.executions = Executions(self) self.organizations = Organizations(self) self.billing = Billing(self) + self.data = Data(self) # Retry configuration self.max_retries = max_retries diff --git a/src/platform/data.py b/src/platform/data.py new file mode 100644 index 00000000..139a736e --- /dev/null +++ b/src/platform/data.py @@ -0,0 +1,387 @@ +"""Data Platform API wrapper for DeepOriginClient.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from deeporigin.platform.client import DeepOriginClient + + +class Data: + """Data Platform API wrapper. + + Provides access to data platform-related endpoints through the DeepOriginClient. + """ + + def __init__(self, client: DeepOriginClient) -> None: + """Initialize Data wrapper. + + Args: + client: The DeepOriginClient instance to use for API calls. + """ + self._c = client + self._models: dict | None = None + + def health(self) -> dict: + """Check the health status of the data platform. + + Returns: + Dictionary containing the health status response. + """ + return self._c.get_json("/data-platform/health") + + def list_models(self) -> dict: + """List public models. + + The result is cached per instance. + + Returns: + Dictionary containing the list of models. + """ + if self._models is None: + self._models = self._c.get_json( + f"/data-platform/{self._c.org_key}/meta/models" + ) + return self._models + + def search_ligands_with_results( + self, + *, + cursor: str | None = None, + experiments: list[dict[str, str]] | None = None, + filter_dict: dict[str, Any] | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search ligands joined with tool results (wide pivot view). + + Args: + cursor: Cursor for pagination. + experiments: List of experiment filters, each containing toolId and + optionally toolVersion. + filter_dict: Additional filter criteria as a dictionary. + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + """ + body: dict[str, Any] = {} + if cursor is not None: + body["cursor"] = cursor + if experiments is not None: + body["experiments"] = experiments + if filter_dict is not None: + body["filter"] = filter_dict + if limit is not None: + body["limit"] = limit + if offset is not None: + body["offset"] = offset + if select is not None: + body["select"] = select + if sort is not None: + body["sort"] = sort + + return self._c.post_json( + f"/data-platform/{self._c.org_key}/ligands_with_results/search", + body=body, + ) + + def search( + self, + entity: str, + *, + cursor: str | None = None, + filter_dict: dict[str, Any] | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search an entity (table). + + Args: + entity: Entity (table) name to search (e.g., "ligands"). + cursor: Cursor for pagination. + filter_dict: Additional filter criteria as a dictionary. + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + + Raises: + ValueError: If the entity is not a valid table name. + """ + # Validate entity against list of available models + models_response = self.list_models() + valid_table_names = { + model["tableName"] for model in models_response.get("models", []) + } + if entity not in valid_table_names: + raise ValueError( + f"Invalid entity '{entity}'. Valid entities are: {', '.join(sorted(valid_table_names))}" + ) + + if filter_dict is None: + filter_dict = {"deleted": False} + else: + filter_dict = filter_dict.copy() + filter_dict["deleted"] = False + + body: dict[str, Any] = {} + if cursor is not None: + body["cursor"] = cursor + + body["filter"] = filter_dict + if limit is not None: + body["limit"] = limit + if offset is not None: + body["offset"] = offset + if select is not None: + body["select"] = select + if sort is not None: + body["sort"] = sort + + return self._c.post_json( + f"/data-platform/{self._c.org_key}/{entity}/search", + body=body, + ) + + def search_ligands( + self, + *, + cursor: str | None = None, + filter: dict[str, Any] | None = None, + min_molecular_weight: float | int | None = None, + max_molecular_weight: float | int | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search ligands entity. + + Convenience method that calls search(entity="ligands"). + + Args: + cursor: Cursor for pagination. + filter: Additional filter criteria as a dictionary. + min_molecular_weight: Minimum molecular weight filter (inclusive). + max_molecular_weight: Maximum molecular weight filter (inclusive). + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + + Raises: + ValueError: If ligands is not a valid table name (should not happen). + """ + # Build filter dict, starting with provided filter or empty dict + filter_dict = filter.copy() if filter is not None else {} + filter_dict.setdefault("deleted", False) + + # Build molecular weight filters + props = [] + if min_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "gte", + "value": min_molecular_weight, + } + ) + if max_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "lte", + "value": max_molecular_weight, + } + ) + + if props: + # Merge with existing props if any + existing_props = filter_dict.get("props", []) + filter_dict["props"] = existing_props + props + + return self.search( + "ligands", + cursor=cursor, + filter_dict=filter_dict, + limit=limit, + offset=offset, + select=select, + sort=sort, + ) + + def search_proteins( + self, + *, + cursor: str | None = None, + pdb_id: str | None = None, + min_molecular_weight: float | int | None = None, + max_molecular_weight: float | int | None = None, + sequence: str | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search proteins entity. + + Convenience method that calls search(entity="proteins"). + + Args: + cursor: Cursor for pagination. + pdb_id: Filter by PDB ID. + min_molecular_weight: Minimum molecular weight filter (inclusive). + max_molecular_weight: Maximum molecular weight filter (inclusive). + sequence: Filter by FASTA sequence (exact match). + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + + Raises: + ValueError: If proteins is not a valid table name (should not happen). + """ + + filter_dict = {"deleted": False} + if pdb_id is not None: + filter_dict["pdb_id"] = pdb_id + + # Build molecular weight filters + props = [] + if min_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "gte", + "value": min_molecular_weight, + } + ) + if max_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "lte", + "value": max_molecular_weight, + } + ) + if sequence is not None: + props.append( + { + "column": "fasta_sequence", + "op": "eq", + "value": sequence, + } + ) + + if props: + filter_dict["props"] = props + + return self.search( + "proteins", + cursor=cursor, + filter_dict=filter_dict, + limit=limit, + offset=offset, + select=select, + sort=sort, + ) + + def create_ligand( + self, + *, + project_id: str, + canonical_smiles: str, + inchi_key: str, + inchi: str, + smiles: str, + name: str, + formal_charge: int = 0, + hbond_donor_count: int | None = None, + hbond_acceptor_count: int | None = None, + rotatable_bond_count: int | None = None, + tpsa: float | None = None, + molecular_weight: float | None = None, + variant_name_tag: str = "", + ) -> dict: + """Create a new ligand. + + Args: + project_id: Project ID for the ligand. + canonical_smiles: Canonical SMILES string. + inchi_key: InChI key. + inchi: InChI string. + smiles: SMILES string. + name: Name of the ligand. + formal_charge: Formal charge. Defaults to 0. + hbond_donor_count: Number of hydrogen bond donors. + hbond_acceptor_count: Number of hydrogen bond acceptors. + rotatable_bond_count: Number of rotatable bonds. + tpsa: Topological polar surface area. + molecular_weight: Molecular weight. + variant_name_tag: Variant name tag. Defaults to empty string. + + Returns: + Dictionary containing the created ligand data. + """ + # Build the set object with all ligand properties + set_dict: dict[str, Any] = { + "project_id": project_id, + "subtable_name": "ligands", + "canonical_smiles": canonical_smiles, + "inchi_key": inchi_key, + "inchi": inchi, + "smiles": smiles, + "name": name, + "formal_charge": formal_charge, + "variant_name_tag": variant_name_tag, + } + + # Add optional fields only if provided + if hbond_donor_count is not None: + set_dict["hbond_donor_count"] = hbond_donor_count + if hbond_acceptor_count is not None: + set_dict["hbond_acceptor_count"] = hbond_acceptor_count + if rotatable_bond_count is not None: + set_dict["rotatable_bond_count"] = rotatable_bond_count + if tpsa is not None: + set_dict["tpsa"] = tpsa + if molecular_weight is not None: + set_dict["molecular_weight"] = molecular_weight + + body: dict[str, Any] = { + "set": set_dict, + } + + return self._c.post_json( + f"/data-platform/{self._c.org_key}/ligands", + body=body, + ) + + def list_projects(self) -> dict: + """List projects. + + Returns: + Dictionary containing the list of projects. + """ + return self._c.post_json( + f"/data-platform/{self._c.org_key}/projects/search", + body={}, + ) diff --git a/tests/mock_server/server.py b/tests/mock_server/server.py index 5ab6a86f..5b4f7f0c 100644 --- a/tests/mock_server/server.py +++ b/tests/mock_server/server.py @@ -957,6 +957,93 @@ def health() -> dict[str, str]: """Health check endpoint.""" return {"status": "ok"} + @self.app.get("/data-platform/health") + def data_platform_health() -> dict[str, str]: + """Data platform health check endpoint.""" + return {"status": "ok"} + + @self.app.post("/data-platform/{org_key}/ligands_with_results/search") + async def search_ligands_with_results( + org_key: str, request: Request + ) -> dict[str, Any]: + """Search ligands joined with tool results.""" + await request.json() # Consume request body + # Return a mock response with empty data list + return { + "data": [], + "count": 0, + } + + @self.app.post("/data-platform/{org_key}/{entity}/search") + async def search_entity( + org_key: str, entity: str, request: Request + ) -> dict[str, Any]: + """Search an entity.""" + await request.json() # Consume request body + # Return a mock response with empty data list + return { + "data": [], + "count": 0, + } + + @self.app.post("/data-platform/{org_key}/projects/search") + async def list_projects(org_key: str, request: Request) -> dict[str, Any]: + """List projects.""" + await request.json() # Consume request body + # Return a mock response with empty projects list + return { + "data": [], + "count": 0, + } + + @self.app.post("/data-platform/{org_key}/ligands") + async def create_ligand(org_key: str, request: Request) -> dict[str, Any]: + """Create a new ligand.""" + body = await request.json() + set_data = body.get("set", {}) + returning = body.get("returning", []) + + # Generate mock response with canonical_id and version + now = datetime.now(timezone.utc) + canonical_id = str(uuid.uuid4()) + response_data: dict[str, Any] = { + "canonical_id": canonical_id, + "version": 1, + "valid_from": now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z", + "valid_to": None, + "modified_by": "test-user", + "deleted": False, + "structure_key": str(uuid.uuid4()), + } + + # Include all fields from set_data + response_data.update(set_data) + + # Filter to only return requested fields if specified + if returning: + response_data = { + k: v for k, v in response_data.items() if k in returning + } + + return response_data + + @self.app.get("/data-platform/{org_key}/meta/models") + def list_models(org_key: str) -> dict[str, Any]: + """List public models.""" + return { + "models": [ + {"tableName": "ligands", "visibility": "public"}, + {"tableName": "proteins", "visibility": "public"}, + {"tableName": "patents", "visibility": "public"}, + {"tableName": "projects", "visibility": "public"}, + {"tableName": "ui_settings", "visibility": "public"}, + {"tableName": "executions", "visibility": "public"}, + {"tableName": "execution_subjects", "visibility": "public"}, + {"tableName": "results", "visibility": "public"}, + {"tableName": "result_table_catalog", "visibility": "public"}, + ] + } + def start(self) -> tuple[str, int]: """Start the test server. diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 00000000..bbcc95ee --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,159 @@ +"""Tests for the Data Platform API wrapper.""" + +import pytest + +from deeporigin.platform.client import DeepOriginClient + + +def test_data_platform_health_lv1(): + """Test the data platform health endpoint.""" + client = DeepOriginClient() + response = client.data.health() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "status" in response, "Expected 'status' key in response" + assert response["status"] == "ok", "Expected status to be 'ok'" + + +def test_search_ligands_with_results_lv1(): + """Test searching ligands with results.""" + client = DeepOriginClient() + response = client.data.search_ligands_with_results( + limit=10, + experiments=[{"toolId": "test-tool"}], + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_entity_lv1(): + """Test searching an entity.""" + client = DeepOriginClient() + response = client.data.search("ligands") + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_entity_invalid_entity(): + """Test searching with an invalid entity raises ValueError.""" + client = DeepOriginClient() + with pytest.raises(ValueError, match="Invalid entity 'invalid_table'"): + client.data.search("invalid_table") + + +def test_search_ligands_lv1(): + """Test searching ligands using convenience method.""" + client = DeepOriginClient() + response = client.data.search_ligands() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_ligands_molecular_weight(): + """Test searching ligands with molecular weight filters.""" + client = DeepOriginClient() + response = client.data.search_ligands( + min_molecular_weight=250, max_molecular_weight=550 + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_proteins_lv1(): + """Test searching proteins using convenience method.""" + client = DeepOriginClient() + response = client.data.search_proteins() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_proteins_molecular_weight(): + """Test searching proteins with molecular weight filters.""" + client = DeepOriginClient() + response = client.data.search_proteins( + min_molecular_weight=250, max_molecular_weight=550 + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_proteins_sequence(): + """Test searching proteins with sequence filter.""" + client = DeepOriginClient() + response = client.data.search_proteins( + sequence="MKTAYIAKQRQISFVKSHFSRQLEERLGLIEVQAPILSRVGDGTQDNLSGAEKAVQVKVKALPDAQFEVVHSLAKWKRQTLGQHDFSAGEGLYTHMKALRPDEDRLSPLHSVYVDQWDWERVMGDGERQFSTLKSTVEAIWAGIKATEAAVSEEFGLAPFLPDQIHFVHSQELLSRYPDLDAKGRERAIAKDLGAVFLVGIGGKLSDGHRHDVRAPDYDDWSTPSELGHAGLNGDILVWNPVLEDAFELSSMGIRVDADTLKHQLALTGDEDRLELEWHQALLRGEMPQTIGGGIGQSRLTMLLLQLPHIGQVQAGVWPAAVRESVPSLL" + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_list_models_lv1(): + """Test listing models.""" + client = DeepOriginClient() + response = client.data.list_models() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "models" in response, "Expected 'models' key in response" + assert isinstance(response["models"], list), "Expected 'models' to be a list" + assert len(response["models"]) > 0, "Expected at least one model" + # Verify structure of first model + model = response["models"][0] + assert "tableName" in model, "Expected 'tableName' key in model" + assert "visibility" in model, "Expected 'visibility' key in model" + assert model["visibility"] == "public", "Expected visibility to be 'public'" + + +def test_create_ligand_lv1(): + """Test creating a ligand.""" + client = DeepOriginClient() + response = client.data.create_ligand( + project_id="\\x0011223344556677", + canonical_smiles="CCOc1ccc2nc(S(=O)(=O)N3CCN(CC3)C)c(N)c2c1", + inchi_key="BSYNRYMUTXBXSQ-UHFFFAOYSA-N", + inchi="InChI=1S/C20H24N4O4S/.../h1-4,6-9H,5,10-14H2,(H,22,23)", + smiles="CCOc1ccc2nc(S(=O)(=O)N3CCN(CC3)C)c(N)c2c1", + name="Compound-12345", + formal_charge=0, + hbond_donor_count=1, + hbond_acceptor_count=6, + rotatable_bond_count=5, + tpsa=85.12, + molecular_weight=447.5, + variant_name_tag="", + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "canonical_id" in response, "Expected 'canonical_id' key in response" + assert "version" in response, "Expected 'version' key in response" + assert response["version"] == 1, "Expected version to be 1" + assert "name" in response, "Expected 'name' key in response" + assert response["name"] == "Compound-12345", "Expected name to match" + assert "canonical_smiles" in response, "Expected 'canonical_smiles' key in response" + assert ( + response["canonical_smiles"] == "CCOc1ccc2nc(S(=O)(=O)N3CCN(CC3)C)c(N)c2c1" + ), "Expected canonical_smiles to match" + + +def test_list_projects_lv1(): + """Test listing projects.""" + client = DeepOriginClient() + response = client.data.list_projects() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + assert "count" in response, "Expected 'count' key in response"