From 3a05544b046b5bd42ccb569a3c2e4737f07c8f5c Mon Sep 17 00:00:00 2001 From: Shubham Kaushal <63925481+shubham-kaushal@users.noreply.github.com> Date: Mon, 26 Feb 2024 10:33:29 +0530 Subject: [PATCH 01/57] update doc (#3) --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6ec1acb..085838f 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ endpoints_client = Endpoints(client=client, entity=client.user.name) template = EndpointAutogeneratedTemplateConfig(modelSource="huggingface",huggingfaceModel=EndpointAutogeneratedHFModelDetails(id="Falconsai/text_summarization")) endpoint = endpoints_client.create(templateConfig=template) ``` -To import gated/private models, you can add your HuggingFace key as a third party token and provide its `id` in the config. +To import gated/private models from Hugging Face, you can add your Hugging Face key as a third party token and provide its `id` in the config. ```py EndpointAutogeneratedHFModelDetails(id="Falconsai/text_summarization",keyId="") ``` @@ -116,3 +116,6 @@ print(resp.json()) } ] ``` + +## Development +See [CONTRIBUTING.md](CONTRIBUTING.md) From 58a034e356dd696084fd9f6dde8f2033612771e9 Mon Sep 17 00:00:00 2001 From: Shubham Kaushal Date: Mon, 26 Feb 2024 10:35:00 +0530 Subject: [PATCH 02/57] update version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4b10706..243a34f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.38" +version = "0.0.39" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 4fa656cbf8f9fa0762fd20099dfb4754e23b893a Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 26 Feb 2024 10:48:26 +0530 Subject: [PATCH 03/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 085838f..56dbb51 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.39` +`0.0.40` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 70a8366..a9e4030 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.38" +__version__ = "0.0.40" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index c7825b9..5a3e074 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.38", + "User-Agent": "outpost-python/0.0.40", } if ( diff --git a/pyproject.toml b/pyproject.toml index 243a34f..afdc42a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.39" +version = "0.0.40" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 97a99129e63a323b579e6cfb9fc20990d8ac8fd6 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 26 Feb 2024 10:53:06 +0530 Subject: [PATCH 04/57] added list third party tokens --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/user.py | 11 +++++++++++ pyproject.toml | 2 +- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 56dbb51..ebdae95 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.40` +`0.0.41` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index a9e4030..1b32542 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.40" +__version__ = "0.0.41" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 5a3e074..5de449b 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.40", + "User-Agent": "outpost-python/0.0.41", } if ( diff --git a/outpostkit/user.py b/outpostkit/user.py index c3398fa..7efb3eb 100644 --- a/outpostkit/user.py +++ b/outpostkit/user.py @@ -26,6 +26,17 @@ def get(self) -> UserDetails: return UserDetails(**resp.json()) + def list_third_party_tokens(self) -> UserDetails: + """Get User + + Returns: + The User details. + """ + resp = self._client._request(path="/user/tokens", method="GET") + resp.raise_for_status() + + return resp.json() + async def async_get(self) -> UserDetails: """Get User diff --git a/pyproject.toml b/pyproject.toml index afdc42a..7b2a0fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.40" +version = "0.0.41" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From a1af84873a13d0d7d96cfdaf5daf8358d9f2ba30 Mon Sep 17 00:00:00 2001 From: Shubham Kaushal Date: Mon, 26 Feb 2024 11:18:02 +0530 Subject: [PATCH 05/57] release 0.0.42 --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ebdae95..edf9323 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.41` +`0.0.42` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 1b32542..596eadb 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.41" +__version__ = "0.0.42" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 5de449b..e81872b 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.41", + "User-Agent": "outpost-python/0.0.42", } if ( diff --git a/pyproject.toml b/pyproject.toml index 7b2a0fc..2472fb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.41" +version = "0.0.42" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From e4ac27a37f6967643506b8a0fcc83c82fae197ff Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 6 Mar 2024 16:09:09 +0530 Subject: [PATCH 06/57] prebuilt container configs: transformers --- README.md | 194 +++++++++++++++++++++++++++++---- outpostkit/_types/endpoint.py | 32 +++++- outpostkit/_utils/constants.py | 35 ++++++ outpostkit/endpoints.py | 55 ++++++---- 4 files changed, 270 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index edf9323..ec90929 100644 --- a/README.md +++ b/README.md @@ -60,29 +60,9 @@ client = Client(api_token=os.environ.get('OUTPOST_API_TOKEN')) endpoints_client = Endpoints(client=client, entity=client.user.name) template = EndpointAutogeneratedTemplateConfig(modelSource="huggingface",huggingfaceModel=EndpointAutogeneratedHFModelDetails(id="Falconsai/text_summarization")) -endpoint = endpoints_client.create(templateConfig=template) -``` -To import gated/private models from Hugging Face, you can add your Hugging Face key as a third party token and provide its `id` in the config. -```py -EndpointAutogeneratedHFModelDetails(id="Falconsai/text_summarization",keyId="") -``` - -If you have a specific revision of the model that you want to deploy, provide the revision in the `revision` field. - -```py -template = EndpointAutogeneratedTemplateConfig(modelSource="outpost", revision="df5ef1a0e2d2579726d74b5d617b17c7049c5a89",outpostModel=EndpointAutogeneratedOutpostModelDetails(id="aj-ya/text-gen")) -``` -The horizontal scaling configurations of the endpoint are based on the number of requests to the prediction request path. -You can tweak the settings at creation too. -```py -from outpostkit._types.endpoint import ReplicaScalingConfig -scaling_config = ReplicaScalingConfig(min=0,max=1,scaledownPeriod=900,targetPendingRequests=100) # Defaults -endpoint = endpoints_client.create(templateConfig=template,replica_scaling_config = scaling_config) +endpoint = endpoints_client.create(template=template) ``` -`scaledownPeriod`: The period to wait after the last reported active before scaling the resource back to 0. -`targetPendingRequests`: This is the number of pending (or in-progress) requests that your application needs to have before it is scaled up. Conversely, if your application has below this number of pending requests, it will scaled down. - ## Deploy the endpoint once you create the endpoint, you need to deploy it. ```py @@ -119,3 +99,175 @@ print(resp.json()) ## Development See [CONTRIBUTING.md](CONTRIBUTING.md) + + +## Endpoint Creation Payload +``` + def create( + self, + template: Union[ + EndpointAutogeneratedTemplateConfig, EndpointCustomTemplateConfig + ], + container: Optional[EndpointPrebuiltContainerDetails] = None, + hardware_instance: str = "cpu-sm", + task_type: Optional[str] = None, + name: Optional[str] = None, + secrets: Optional[List[EndpointSecret]] = None, + visibility: ServiceVisibility = ServiceVisibility.public, + replica_scaling_config: Optional[ReplicaScalingConfig] = None, + ) -> Endpoint: ... +``` +An endpoint server needs to know certain things like model loading, prediction request handling, exception handling, etc. and these things vary with each usecase. +Thus the server follows a template which tells it how to behave. + +### Template configuration +You can either let us autogenerate templates based on the model information, or create a custom template yourself. + +#### Configs to Autogenerate Template +Currently we can autogenerate templates models stored at 'Outpost' or 'Hugging Face'. + +Task types supported +--TODO: List all task types supported-- + +To Import a model stored at Outpost, you can directly use: + +```py +from outpostkit._types.endpoints import EndpointAutogeneratedTemplateConfig, EndpointAutogeneratedOutpostModelDetails + +template = EndpointAutogeneratedTemplateConfig(modelSource="outpost",outpostModel=EndpointAutogeneratedOutpostModelDetails(id="aj-ya/text-gen")) +``` + +If you have a specific revision of the model that you want to deploy, provide the revision in the `revision` field. + +```py +template = EndpointAutogeneratedTemplateConfig(modelSource="outpost", revision="df5ef1a0e2d2579726d74b5d617b17c7049c5a89",outpostModel=EndpointAutogeneratedOutpostModelDetails(id="aj-ya/text-gen")) +``` + +To import gated/private models from Hugging Face, you can add your Hugging Face key as a third party token and provide its `id` in the config. +```py +from outpostkit._types.endpoints import EndpointAutogeneratedTemplateConfig, EndpointAutogeneratedHFModelDetails +hf_model = EndpointAutogeneratedHFModelDetails(id="Falconsai/text_summarization",keyId="") +template = EndpointAutogeneratedTemplateConfig(modelSource="huggingface",huggingfaceModel=hf_model) +``` + +#### Create a custom template +A Template class needs to mainly define model initialization and prediction request handling. +For demonstration purposes, lets create a template file for the `openai/shap-e` model. + +First of all we need to create a class and the load the model at the initialization phase. +Then, we need to define the request handler for the `/predict` route. this is done by defining +the `predict` member function. +This function itself acts as the handler, thus you can define any parameters that FastAPI supports to the function. Here we will use a pydantic class to validate the request json body and get generation arguements. (ref: https://fastapi.tiangolo.com/tutorial/body/) +Finally, we would like to also stream the output GIF, for this we will use the `StreamingResponse` object by fastapi. + +```py +from io import BytesIO +from typing import List +from diffusers import ShapEPipeline +from fastapi.responses import StreamingResponse +from pydantic import BaseModel +from PIL.Image import Image + + +class Item(BaseModel): + prompt: str + frame_size: int = 256 + num_inference_steps: int = 64 + guidance_scale: float = 15 + + +def pil_gif_resp(image: List[Image]) -> StreamingResponse: + temp = BytesIO() + image[0].save( + temp, + save_all=True, + append_images=image[1:], + optimize=False, + duration=100, + loop=0, + ) + return StreamingResponse(temp, media_type="image/gif") + + +class ShapEHandler: + pipeline: ShapEPipeline + + def __init__(self) -> None: + ckpt_id = "openai/shap-e" + self.pipeline = ShapEPipeline.from_pretrained(ckpt_id).to("cuda") + + def predict(self, item: Item): + images = self.pipeline( + item.prompt, + guidance_scale=item.guidance_scale, + num_inference_steps=item.num_inference_steps, + frame_size=item.frame_size, + ).images + return pil_gif_resp(images[0]) +``` + +##### Installing extra packages and modules +If your application needs a specific python package or system dependency that is not already installed in the container image (--TODO-- docs showing the list of prebuilt container images and the packages installed there.). +Then, you can define these members in the Template class + +```py +class Template: + # extra system dependencies required + system_dependencies: List[str] = ['curl'] + + # extra python packages required + python_requirements: List[str] = ['gif==23.0'] + ... +``` + +##### Exception Handling + +To define exception handling outside the prediction handler, you can extend the default expection handling done by the server like this: +```py +from fastapi.responses import JSONResponse + +async def generic_exception_handler(_, exc: Exception): + return JSONResponse( + json.dumps({"error": str(exc), "type": "unhandled_error"}), + status_code=500, + ) + +class Template: + # define custom exception handlers for the fastapi app + exception_handlers: Dict[Union[int, Type[Exception]], Callable] = dict({ + Exception: generic_exception_handler + }) + ... +``` + + +### Container configuration + +if youre already using a prebuilt template, most of the times, you wont need to define this. It is already selected based on the library and task type. + +But you can manually configure this as well. +Currently, you can only use any one of many prebuilt containers that are provided by outpost. + + +Namely, +--TODO-- list all container images here. + +To use the tensorflow image with pytorch loaded, with some extra configs, use: +``` +from outpostkit import EndpointPrebuiltContainerDetails +container = EndpointPrebuiltContainerDetails(name="transformers-pt", configs = {torch_dtype:'float32'}) +``` + + +### Scaling Configuration + +The horizontal scaling configurations of the endpoint are based on the number of requests to the prediction request path. +You can tweak the settings at creation too. +```py +from outpostkit._types.endpoint import ReplicaScalingConfig +scaling_config = ReplicaScalingConfig(min=0,max=1,scaledownPeriod=900,targetPendingRequests=100) # Defaults +endpoint = endpoints_client.create(template=template,replica_scaling_config = scaling_config) +``` + +`scaledownPeriod`: The period to wait after the last reported active before scaling the resource back to 0. +`targetPendingRequests`: This is the number of pending (or in-progress) requests that your application needs to have before it is scaled up. Conversely, if your application has below this number of pending requests, it will scaled down. diff --git a/outpostkit/_types/endpoint.py b/outpostkit/_types/endpoint.py index 664df7e..0b1cd62 100644 --- a/outpostkit/_types/endpoint.py +++ b/outpostkit/_types/endpoint.py @@ -5,7 +5,7 @@ @dataclass -class DomainInEndpoint: +class EndpointDomainDetails: protocol: str name: str id: str @@ -58,11 +58,17 @@ def __init__(self, *args, **kwargs) -> None: @dataclass class EndpointAutogeneratedTemplateConfig: modelSource: Literal["huggingface", "outpost"] - config: Optional[Dict[str, Any]] = None revision: Optional[str] = None huggingfaceModel: Optional[EndpointAutogeneratedHFModelDetails] = None outpostModel: Optional[EndpointAutogeneratedOutpostModelDetails] = None +@dataclass +class EndpointPrebuiltContainerDetails: + name: str + image: str + config: Optional[Any] = None + serverArgs: Optional[Any] = None + @dataclass class EndpointCustomTemplateConfig: @@ -153,7 +159,7 @@ class EndpointResource: healthcheckPath: str """Relative path used for healthcheck and readiness probes""" - primaryDomain: Optional[DomainInEndpoint] + primaryDomain: Optional[EndpointDomainDetails] createdAt: str @@ -169,7 +175,9 @@ class EndpointResource: # creatorId: Optional[str]=None - # currentDeploymentId: Optional[str]=None + prebuiltContainerDetails: Optional[EndpointPrebuiltContainerDetails] = None + + currentDeploymentId: Optional[str] = None currentDeployment: Optional[EndpointDeployment] = None @@ -198,7 +206,9 @@ def __init__(self, *args, **kwargs: Mapping[str, Any]) -> None: **kwargs.get("customTemplateConfig") ) elif _field == "primaryDomain" and kwargs.get("primaryDomain") is not None: - self.primaryDomain = DomainInEndpoint(**kwargs.get("primaryDomain")) + self.primaryDomain = EndpointDomainDetails( + **kwargs.get("primaryDomain") + ) elif _field == "hardwareInstance": self.hardwareInstance = EndpointHardwareInstanceDetails( **kwargs.get("hardwareInstance") @@ -217,6 +227,13 @@ def __init__(self, *args, **kwargs: Mapping[str, Any]) -> None: self.replicaScalingConfig = ReplicaScalingConfig( **kwargs.get("replicaScalingConfig") ) + elif ( + _field == "prebuiltContainerDetails" + and kwargs.get("prebuiltContainerDetails") is not None + ): + self.pre = EndpointPrebuiltContainerDetails( + **kwargs.get("prebuiltContainerDetails") + ) else: setattr(self, _field, kwargs.get(_field)) @@ -230,6 +247,11 @@ class EndpointReplicaStatusCondition: status: str type: str +@dataclass +class EndpointSecret: + name: str + value: str + @dataclass class EndpointReplicaStatus: diff --git a/outpostkit/_utils/constants.py b/outpostkit/_utils/constants.py index 77693d5..34ee330 100644 --- a/outpostkit/_utils/constants.py +++ b/outpostkit/_utils/constants.py @@ -5,3 +5,38 @@ class ServiceVisibility(Enum): public = "public" internal = "internal" private = "private" + +scaffolding_file = """ +from abc import ABC, abstractmethod +from typing import Callable, Dict, List, Type, Union + +from fastapi import Request, Response + + +class PredictionTemplate(ABC): + # define custom exception handlers for the fastapi app + exception_handlers: Dict[Union[int, Type[Exception]], Callable] = dict({}) + + # extra system dependencies required + system_dependencies: List[str] = [] + + # extra python packages required + python_requirements: List[str] = [] + + # define mandatory environment variables needed for the template to run + secrets: List[str] = [] + + @abstractmethod + def __init__(self, **kwargs): + \"\"\" + An init method to download prepare the model. + \"\"\" + pass + + @abstractmethod + async def predict(self, Request: Request) -> Response: + \"\"\" + prediction handler that can take paramaters like a FastAPI route handler + \"\"\" + pass +""" diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index caa6dbb..0862d77 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -11,11 +11,13 @@ EndpointAutogeneratedTemplateConfig, EndpointCustomTemplateConfig, EndpointDeployment, + EndpointPrebuiltContainerDetails, EndpointReplicaStatus, EndpointResource, + EndpointSecret, ReplicaScalingConfig, ) -from outpostkit._utils.constants import ServiceVisibility +from outpostkit._utils.constants import ServiceVisibility, scaffolding_file from outpostkit.client import Client from outpostkit.exceptions import OutpostError from outpostkit.predictor import Predictor @@ -236,6 +238,10 @@ def __init__(self, client: Client, entity: str) -> None: self.entity = entity super().__init__(client) + def scaffold(self, name: str) -> None: + with open(name, "x") as f: + f.write(scaffolding_file) + def list( self, ) -> EndpointListResponse: @@ -249,21 +255,21 @@ def list( def create( self, - templateConfig: Union[ + template: Union[ EndpointAutogeneratedTemplateConfig, EndpointCustomTemplateConfig ], + container: Optional[EndpointPrebuiltContainerDetails] = None, hardware_instance: str = "cpu-sm", task_type: Optional[str] = None, - prebuilt_image_name: Optional[str] = None, name: Optional[str] = None, + secrets: Optional[List[EndpointSecret]] = None, visibility: ServiceVisibility = ServiceVisibility.public, replica_scaling_config: Optional[ReplicaScalingConfig] = None, - containerType: Literal["prebuilt"] = "prebuilt", # will be extended soon ) -> Endpoint: """ Create an endpoint by providing the model details or use a custom template file. """ - if isinstance(templateConfig, EndpointAutogeneratedTemplateConfig): + if isinstance(template, EndpointAutogeneratedTemplateConfig): resp = self._client._request( "POST", f"/endpoints/{self.entity}", @@ -275,22 +281,25 @@ def create( if replica_scaling_config else None, "name": name, - "prebuiltImageName": prebuilt_image_name, - "containerType": containerType, + "secrets": secrets, + "prebuiltContainerDetails": asdict(container) + if container + else None, + "containerType": "prebuilt", "taskType": task_type, - "autogeneratedTemplateConfig": asdict(templateConfig), + "autogeneratedTemplateConfig": asdict(template), }, ) else: - if templateConfig.type == "file": - if not os.path.exists(templateConfig.path) or not os.path.isfile( - templateConfig.path + if template.type == "file": + if not os.path.exists(template.path) or not os.path.isfile( + template.path ): raise OutpostError("No template file found.") resp = self._client._request( "POST", f"/endpoints/{self.entity}", - files={"template": open(templateConfig.path)}, + files={"template": open(template.path)}, data={ "metadata": json.dumps( { @@ -300,18 +309,21 @@ def create( if replica_scaling_config else None, "name": name, - "prebuiltImageName": prebuilt_image_name, - "containerType": containerType, + "secrets": secrets, + "prebuiltContainerDetails": asdict(container) + if container + else None, + "containerType": "prebuilt", "taskType": task_type, "customTemplateConfig": { - "className": templateConfig.className + "className": template.className }, } ) }, ) else: - parsed = urlparse(templateConfig.path) + parsed = urlparse(template.path) if not all([parsed.scheme, parsed.netloc]): raise OutpostError("Invalid url specified in path.") resp = self._client._request( @@ -325,12 +337,15 @@ def create( if replica_scaling_config else None, "name": name, - "prebuiltImageName": prebuilt_image_name, - "containerType": containerType, + "secrets": secrets, + "prebuiltContainerDetails": asdict(container) + if container + else None, + "containerType": "prebuilt", "taskType": task_type, "customTemplateConfig": { - "className": templateConfig.className, - "url": templateConfig.path, + "className": template.className, + "url": template.path, }, }, ) From 27447a130da30d03e37cd17b4bc3d9992688f4bc Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Fri, 8 Mar 2024 08:14:38 +0530 Subject: [PATCH 07/57] new releae --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ec90929..3e629e8 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.42` +`0.0.43` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 596eadb..a1cbdef 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.42" +__version__ = "0.0.43" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index e81872b..6e7723d 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.42", + "User-Agent": "outpost-python/0.0.43", } if ( diff --git a/pyproject.toml b/pyproject.toml index 2472fb5..1ba062d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.42" +version = "0.0.43" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 29f4b5aa0b6d3c874a23638ba96bb049a68e88f1 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Fri, 8 Mar 2024 09:39:26 +0530 Subject: [PATCH 08/57] new release --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/_types/endpoint.py | 2 ++ outpostkit/_utils/constants.py | 1 + outpostkit/client.py | 2 +- outpostkit/endpoints.py | 55 ++++++++++++++++++++++------------ pyproject.toml | 2 +- 7 files changed, 43 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 3e629e8..176349f 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.43` +`0.0.44` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index a1cbdef..3fc6cf2 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.43" +__version__ = "0.0.44" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/_types/endpoint.py b/outpostkit/_types/endpoint.py index 0b1cd62..e66a88a 100644 --- a/outpostkit/_types/endpoint.py +++ b/outpostkit/_types/endpoint.py @@ -62,6 +62,7 @@ class EndpointAutogeneratedTemplateConfig: huggingfaceModel: Optional[EndpointAutogeneratedHFModelDetails] = None outpostModel: Optional[EndpointAutogeneratedOutpostModelDetails] = None + @dataclass class EndpointPrebuiltContainerDetails: name: str @@ -247,6 +248,7 @@ class EndpointReplicaStatusCondition: status: str type: str + @dataclass class EndpointSecret: name: str diff --git a/outpostkit/_utils/constants.py b/outpostkit/_utils/constants.py index 34ee330..6b20a4e 100644 --- a/outpostkit/_utils/constants.py +++ b/outpostkit/_utils/constants.py @@ -6,6 +6,7 @@ class ServiceVisibility(Enum): internal = "internal" private = "private" + scaffolding_file = """ from abc import ABC, abstractmethod from typing import Callable, Dict, List, Type, Union diff --git a/outpostkit/client.py b/outpostkit/client.py index 6e7723d..2b8fd20 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.43", + "User-Agent": "outpost-python/0.0.44", } if ( diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 0862d77..af38963 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -156,7 +156,7 @@ def delete(self) -> None: """ self._client._request("DELETE", f"/endpoints/{self.fullName}") - def get_replica_status(self) -> EndpointReplicaStatus: + def replica_status(self) -> EndpointReplicaStatus: """ Get the current replica status of the endpoint Note: throws if there are no currently deployed runtimes of the endpoint. @@ -167,6 +167,17 @@ def get_replica_status(self) -> EndpointReplicaStatus: ) return EndpointReplicaStatus(**resp.json()) + def status(self) -> EndpointReplicaStatus: + """ + Get the current replica status of the endpoint + Note: throws if there are no currently deployed runtimes of the endpoint. + """ + resp = self._client._request( + "GET", + f"/endpoints/{self.fullName}/status", + ) + return resp.json().get("status") + def get_logs( self, log_type: Optional[Literal["dep", "runtime", "event"]] = None, @@ -277,14 +288,16 @@ def create( "templateType": "autogenerated", "hardwareInstance": hardware_instance, "visibility": visibility.name, - "replicaScalingConfig": asdict(replica_scaling_config) - if replica_scaling_config - else None, + "replicaScalingConfig": ( + asdict(replica_scaling_config) + if replica_scaling_config + else None + ), "name": name, "secrets": secrets, - "prebuiltContainerDetails": asdict(container) - if container - else None, + "prebuiltContainerDetails": ( + asdict(container) if container else None + ), "containerType": "prebuilt", "taskType": task_type, "autogeneratedTemplateConfig": asdict(template), @@ -305,14 +318,16 @@ def create( { "hardwareInstance": hardware_instance, "visibility": visibility.name, - "replicaScalingConfig": asdict(replica_scaling_config) - if replica_scaling_config - else None, + "replicaScalingConfig": ( + asdict(replica_scaling_config) + if replica_scaling_config + else None + ), "name": name, "secrets": secrets, - "prebuiltContainerDetails": asdict(container) - if container - else None, + "prebuiltContainerDetails": ( + asdict(container) if container else None + ), "containerType": "prebuilt", "taskType": task_type, "customTemplateConfig": { @@ -333,14 +348,16 @@ def create( "templateType": "custom", "hardwareInstance": hardware_instance, "visibility": visibility, - "replicaScalingConfig": asdict(replica_scaling_config) - if replica_scaling_config - else None, + "replicaScalingConfig": ( + asdict(replica_scaling_config) + if replica_scaling_config + else None + ), "name": name, "secrets": secrets, - "prebuiltContainerDetails": asdict(container) - if container - else None, + "prebuiltContainerDetails": ( + asdict(container) if container else None + ), "containerType": "prebuilt", "taskType": task_type, "customTemplateConfig": { diff --git a/pyproject.toml b/pyproject.toml index 1ba062d..ff9b7c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.43" +version = "0.0.44" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 394dab772cc899f677873f27bc1d4eb342bfc7a6 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 13 Mar 2024 18:06:14 +0530 Subject: [PATCH 09/57] update --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/_types/endpoint.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 176349f..e89177d 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.44` +`0.0.45` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 3fc6cf2..1f6b823 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.44" +__version__ = "0.0.45" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/_types/endpoint.py b/outpostkit/_types/endpoint.py index e66a88a..9d48514 100644 --- a/outpostkit/_types/endpoint.py +++ b/outpostkit/_types/endpoint.py @@ -66,7 +66,7 @@ class EndpointAutogeneratedTemplateConfig: @dataclass class EndpointPrebuiltContainerDetails: name: str - image: str + image: Optional[str] = None config: Optional[Any] = None serverArgs: Optional[Any] = None diff --git a/outpostkit/client.py b/outpostkit/client.py index 2b8fd20..a14f244 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.44", + "User-Agent": "outpost-python/0.0.45", } if ( diff --git a/pyproject.toml b/pyproject.toml index ff9b7c0..e001788 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.44" +version = "0.0.45" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 151ae36d4b1a6274f96db3f742981a9ec82bd245 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 25 Mar 2024 15:41:18 +0530 Subject: [PATCH 10/57] bug fix: template upload + version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/endpoints.py | 2 +- pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e89177d..f31f644 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.45` +`0.0.46` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 1f6b823..e829280 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.45" +__version__ = "0.0.46" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index a14f244..2658129 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.45", + "User-Agent": "outpost-python/0.0.46", } if ( diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index af38963..ca2b4d5 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -312,7 +312,7 @@ def create( resp = self._client._request( "POST", f"/endpoints/{self.entity}", - files={"template": open(template.path)}, + files={"template": open(template.path, "b")}, data={ "metadata": json.dumps( { diff --git a/pyproject.toml b/pyproject.toml index e001788..0050e64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.45" +version = "0.0.46" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From a2f5b0b8265bfd257f4dfa97ec4216558f6e0c15 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 25 Mar 2024 15:49:00 +0530 Subject: [PATCH 11/57] bug fix: template upload + version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/endpoints.py | 2 +- pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f31f644..68fc827 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.46` +`0.0.47` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index e829280..84cbef1 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.46" +__version__ = "0.0.47" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 2658129..4f6190e 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.46", + "User-Agent": "outpost-python/0.0.47", } if ( diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index ca2b4d5..cebab5a 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -312,7 +312,7 @@ def create( resp = self._client._request( "POST", f"/endpoints/{self.entity}", - files={"template": open(template.path, "b")}, + files={"template": open(template.path, "b+r")}, data={ "metadata": json.dumps( { diff --git a/pyproject.toml b/pyproject.toml index 0050e64..1100c4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.46" +version = "0.0.47" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From b23ac35a1bf6a7bebb753752ef1409b7479f5590 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 30 Mar 2024 12:14:01 +0530 Subject: [PATCH 12/57] skaffold moved to the top --- outpostkit/endpoints.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index cebab5a..3e2f60a 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -233,6 +233,10 @@ def __init__(self, total: int, endpoints: List[Dict]) -> None: infs.append(EndpointResource(**inf)) self.endpoints = infs +def scaffold(self, name: str) -> None: + with open(name, "x") as f: + f.write(scaffolding_file) + @dataclass class EndpointCreateResponse: @@ -249,9 +253,6 @@ def __init__(self, client: Client, entity: str) -> None: self.entity = entity super().__init__(client) - def scaffold(self, name: str) -> None: - with open(name, "x") as f: - f.write(scaffolding_file) def list( self, From 23ce50ace2fa67287c6e4b727f9e42765c8b6252 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 1 Apr 2024 18:40:44 +0530 Subject: [PATCH 13/57] replica status --- outpostkit/_utils/constants.py | 11 ++++------- outpostkit/endpoints.py | 7 +++---- template.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 template.py diff --git a/outpostkit/_utils/constants.py b/outpostkit/_utils/constants.py index 6b20a4e..706b064 100644 --- a/outpostkit/_utils/constants.py +++ b/outpostkit/_utils/constants.py @@ -8,13 +8,12 @@ class ServiceVisibility(Enum): scaffolding_file = """ -from abc import ABC, abstractmethod from typing import Callable, Dict, List, Type, Union -from fastapi import Request, Response +from fastapi import Request -class PredictionTemplate(ABC): +class PredictionTemplate: # define custom exception handlers for the fastapi app exception_handlers: Dict[Union[int, Type[Exception]], Callable] = dict({}) @@ -27,17 +26,15 @@ class PredictionTemplate(ABC): # define mandatory environment variables needed for the template to run secrets: List[str] = [] - @abstractmethod def __init__(self, **kwargs): \"\"\" An init method to download prepare the model. \"\"\" pass - @abstractmethod - async def predict(self, Request: Request) -> Response: + async def predict(self, Request: Request): \"\"\" prediction handler that can take paramaters like a FastAPI route handler \"\"\" - pass + return { "ping":"pong" } """ diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 3e2f60a..98d601e 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -1,7 +1,6 @@ import json import os from dataclasses import asdict, dataclass -from datetime import datetime from typing import Any, Dict, List, Literal, Optional, Tuple, Union from urllib.parse import urlparse @@ -182,8 +181,8 @@ def get_logs( self, log_type: Optional[Literal["dep", "runtime", "event"]] = None, deployment_id: Optional[str] = None, - start: Optional[Union[int, str, datetime]] = None, - end: Optional[Union[int, str, datetime]] = None, + start: Optional[Union[int, str]] = None, + end: Optional[Union[int, str]] = None, limit: Optional[int] = 1000, ) -> List[Tuple[str, str]]: """ @@ -233,7 +232,7 @@ def __init__(self, total: int, endpoints: List[Dict]) -> None: infs.append(EndpointResource(**inf)) self.endpoints = infs -def scaffold(self, name: str) -> None: +def scaffold(name: str) -> None: with open(name, "x") as f: f.write(scaffolding_file) diff --git a/template.py b/template.py new file mode 100644 index 0000000..24ef7d2 --- /dev/null +++ b/template.py @@ -0,0 +1,29 @@ +from typing import Callable, Dict, List, Type, Union + +from fastapi import Request + + +class PredictionTemplate: + # define custom exception handlers for the fastapi app + exception_handlers: Dict[Union[int, Type[Exception]], Callable] = dict({}) + + # extra system dependencies required + system_dependencies: List[str] = [] + + # extra python packages required + python_requirements: List[str] = [] + + # define mandatory environment variables needed for the template to run + secrets: List[str] = [] + + def __init__(self, **kwargs): + \"\"\" + An init method to download prepare the model. + \"\"\" + pass + + async def predict(self, Request: Request): + \"\"\" + prediction handler that can take paramaters like a FastAPI route handler + \"\"\" + return { "ping":"pong"} From 9f467ed20ca7045efd180dddec4f0948793ecb8f Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 1 Apr 2024 18:41:29 +0530 Subject: [PATCH 14/57] upgrade --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 68fc827..563cf57 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.47` +`0.0.48` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 84cbef1..f150a84 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.47" +__version__ = "0.0.48" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 4f6190e..e111af3 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.47", + "User-Agent": "outpost-python/0.0.48", } if ( diff --git a/pyproject.toml b/pyproject.toml index 1100c4a..d890ea6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.47" +version = "0.0.48" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 1970c060937b8e4dda56e70058c1b618c93d86a5 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Tue, 2 Apr 2024 00:49:31 +0530 Subject: [PATCH 15/57] feat: log --- outpostkit/_types/endpoint.py | 20 ++++++++++++++++++ outpostkit/endpoints.py | 14 ++++++++++--- outpostkit/utils.py | 38 +++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/outpostkit/_types/endpoint.py b/outpostkit/_types/endpoint.py index 9d48514..8b6c630 100644 --- a/outpostkit/_types/endpoint.py +++ b/outpostkit/_types/endpoint.py @@ -284,3 +284,23 @@ def __init__(self, *args, **kwargs) -> None: ) else: setattr(self, _field, kwargs.get(_field)) + + +@dataclass +class EndpointLogData: + level_num: int + log_type: Literal["runtime", "dep", "event"] + level: str + logger_name: str + message: str + exc_info: Optional[str] = None + stack_info: Optional[str] = None + replica: Optional[str] = None + extra: Dict[str, Any] = field(default_factory=lambda: {}) + # TODO extend for all the info + + +@dataclass +class EndpointLog: + timestamp: str + data: EndpointLogData diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 98d601e..d9e2875 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -1,7 +1,7 @@ import json import os from dataclasses import asdict, dataclass -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, List, Literal, Optional, Union from urllib.parse import urlparse from httpx import Response @@ -10,6 +10,7 @@ EndpointAutogeneratedTemplateConfig, EndpointCustomTemplateConfig, EndpointDeployment, + EndpointLog, EndpointPrebuiltContainerDetails, EndpointReplicaStatus, EndpointResource, @@ -21,6 +22,7 @@ from outpostkit.exceptions import OutpostError from outpostkit.predictor import Predictor from outpostkit.resource import Namespace +from outpostkit.utils import parse_endpoint_log_data @dataclass @@ -184,7 +186,7 @@ def get_logs( start: Optional[Union[int, str]] = None, end: Optional[Union[int, str]] = None, limit: Optional[int] = 1000, - ) -> List[Tuple[str, str]]: + ) -> List[EndpointLog]: """ Retrieve logs related to the endpoint Available log types:runtime, dep (deployment) and event. @@ -202,7 +204,13 @@ def get_logs( }, ) - return [(str(log.time), str(log.message)) for log in resp.json()] + return [ + EndpointLog( + timestamp=str(log.get("timestamp")), + data=parse_endpoint_log_data(log.get("data")), + ) + for log in resp.json() + ] def get_custom_template(self) -> Union[bytes, Any]: # noqa: ANN401 """ diff --git a/outpostkit/utils.py b/outpostkit/utils.py index 6870534..135d29a 100644 --- a/outpostkit/utils.py +++ b/outpostkit/utils.py @@ -1,5 +1,43 @@ from datetime import datetime +from typing import Any, Dict, List, Tuple + +from outpostkit._types.endpoint import EndpointLogData def convert_outpost_date_str_to_date(date_string: str) -> datetime: return datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ") + + +def separate_keys( + dictionary: Dict[str, Any], known_keys: List[str] +) -> Tuple[Dict[str, Any], Dict[str, Any]]: + known_dict = {} + unknown_dict = {} + + for key, value in dictionary.items(): + if key in known_keys: + known_dict[key] = value + else: + unknown_dict[key] = value + + return (known_dict, unknown_dict) + + +def parse_endpoint_log_data(log_data: Dict[str, Any]) -> EndpointLogData: + known_keys = [ + "level_num", + "log_type", + "level", + "logger_name", + "message", + "exc_info", + "stack_info", + ] + (known_dict, extra) = separate_keys(log_data, known_keys=known_keys) + replica = None + kube_data = extra.get("kubernetes") + if kube_data and isinstance(kube_data, dict): + if "pod_name" in kube_data and isinstance(kube_data["pod_name"], str): + parts = kube_data["pod_name"].split("-") + replica = parts[-1] + return EndpointLogData(**known_dict, replica=replica, extra=extra) From 279a6589535d6a80874b1748600f5194e752c3f3 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Tue, 2 Apr 2024 01:00:35 +0530 Subject: [PATCH 16/57] version upgrade --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 563cf57..c6abbfa 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.48` +`0.0.49` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index f150a84..9847d19 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.48" +__version__ = "0.0.49" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index e111af3..9047638 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.48", + "User-Agent": "outpost-python/0.0.49", } if ( diff --git a/pyproject.toml b/pyproject.toml index d890ea6..c7e512d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.48" +version = "0.0.49" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From ba8292ae69eed582312962ac44149cb1f3aac0be Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Thu, 11 Apr 2024 12:32:06 +0530 Subject: [PATCH 17/57] added new primitives --- outpostkit/__init__.py | 2 + outpostkit/_types/repository.py | 4 ++ outpostkit/constants.py | 2 +- outpostkit/repository.py | 107 ++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 outpostkit/_types/repository.py create mode 100644 outpostkit/repository.py diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 9847d19..42f8d95 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -2,5 +2,7 @@ from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints +from outpostkit.repository import Repository as Repository +from outpostkit.repository import RepositoryAtRef as RepositoryAtRef from outpostkit.team import Team as Team from outpostkit.user import User as User diff --git a/outpostkit/_types/repository.py b/outpostkit/_types/repository.py new file mode 100644 index 0000000..1ef7d8a --- /dev/null +++ b/outpostkit/_types/repository.py @@ -0,0 +1,4 @@ + +from typing import Literal + +REPOSITORY_TYPES = Literal["model", "dataset"] diff --git a/outpostkit/constants.py b/outpostkit/constants.py index 5c6e871..5b2dd75 100644 --- a/outpostkit/constants.py +++ b/outpostkit/constants.py @@ -1 +1 @@ -V1_API_URL = "https://api.outpost.run/v1" +V1_API_URL = "https://api-gateway.outpost.run/v1" diff --git a/outpostkit/repository.py b/outpostkit/repository.py new file mode 100644 index 0000000..19a46aa --- /dev/null +++ b/outpostkit/repository.py @@ -0,0 +1,107 @@ +from outpostkit._types.repository import REPOSITORY_TYPES +from outpostkit.client import Client +from outpostkit.resource import Namespace + + +# Assuming path always starts with `/`, can create a parser for this (src/... -> /src/...) +class Repository(Namespace): + def __init__( + self, client: Client, repo_type: REPOSITORY_TYPES, entity: str, name: str + ) -> None: + self.entity = entity + self.name = name + self.repo_type = repo_type + self.fullName = f"{entity}/{name}" + super().__init__(client) + + def view_blob(self, path: str, ref: str = "HEAD", raw: bool = True): + resp = self._client._request( + path=f"/git/blobs/{self.repo_type}/{self.fullName}/view/{ref}{path}", + method="GET", + params={"raw": raw}, + ) + resp.raise_for_status() + + return resp.json() # TODO Type + + def download_blob(self, path: str, ref: str = "HEAD", raw: bool = True): + resp = self._client._request( + path=f"/git/blobs/{self.repo_type}/{self.fullName}/download/{ref}{path}", + method="GET", + params={"raw": raw}, + ) + resp.raise_for_status() + + return resp.json() # TODO Type + + def view_tree( + self, + ref: str = "HEAD", + path: str = "/", + with_commit=False, + with_metadata=False, + ): + resp = self._client._request( + path=f"/git/tree/{self.repo_type}/{self.fullName}/view/{ref}{path}", + method="GET", + params={"with_commit": with_commit, "with_metadata": with_metadata}, + ) + resp.raise_for_status() + + return resp.json() # TODO Type + + def search_tree( + self, + search: str, + ref: str = "HEAD", + ): + resp = self._client._request( + path=f"/git/tree/{self.repo_type}/{self.fullName}/search", + method="GET", + params={"search": search, "ref": ref}, + ) + resp.raise_for_status() + + return resp.json() # TODO Type + + +class RepositoryAtRef(Namespace): + + def __init__( + self, + client: Client, + repo_type: REPOSITORY_TYPES, + entity: str, + name: str, + ref: str, + ) -> None: + self.repo = Repository( + client=client, repo_type=repo_type, entity=entity, name=name + ) + self.ref = ref + super().__init__(client) + + def view_blob(self, path: str, raw: bool = True): + return self.repo.view_blob(path=path, ref=self.ref, raw=raw) + + def download_blob(self, path: str, raw: bool = True): + return self.repo.download_blob(path=path, ref=self.ref, raw=raw) + + def view_tree( + self, + path: str = "/", + with_commit=False, + with_metadata=False, + ): + return self.repo.view_tree( + path=path, + ref=self.ref, + with_commit=with_commit, + with_metadata=with_metadata, + ) + + def search_tree( + self, + search: str, + ): + return self.repo.search_tree(search=search, ref=self.ref) From 08ff4b069002a756398d68c01163cd98e03ed858 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Thu, 11 Apr 2024 12:36:20 +0530 Subject: [PATCH 18/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 4 +++- outpostkit/_types/endpoint.py | 4 ---- outpostkit/_types/finetuning.py | 7 ++++++ outpostkit/_utils/constants.py | 7 ++++++ outpostkit/_utils/finetuning.py | 14 ++++++++++++ outpostkit/client.py | 2 +- outpostkit/endpoints.py | 9 +++++--- outpostkit/finetuning.py | 39 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 10 files changed, 79 insertions(+), 11 deletions(-) create mode 100644 outpostkit/_types/finetuning.py create mode 100644 outpostkit/_utils/finetuning.py create mode 100644 outpostkit/finetuning.py diff --git a/README.md b/README.md index c6abbfa..fbf35c9 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.49` +`0.0.50` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 42f8d95..66d651c 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,7 +1,9 @@ -__version__ = "0.0.49" +__version__ = "0.0.50" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints +from outpostkit.finetuning import Finetunings as Finetunings +from outpostkit.finetuning import FinetuningService as FinetuningService from outpostkit.repository import Repository as Repository from outpostkit.repository import RepositoryAtRef as RepositoryAtRef from outpostkit.team import Team as Team diff --git a/outpostkit/_types/endpoint.py b/outpostkit/_types/endpoint.py index 8b6c630..c724211 100644 --- a/outpostkit/_types/endpoint.py +++ b/outpostkit/_types/endpoint.py @@ -249,10 +249,6 @@ class EndpointReplicaStatusCondition: type: str -@dataclass -class EndpointSecret: - name: str - value: str @dataclass diff --git a/outpostkit/_types/finetuning.py b/outpostkit/_types/finetuning.py new file mode 100644 index 0000000..b763230 --- /dev/null +++ b/outpostkit/_types/finetuning.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass + + +@dataclass +class FinetuningServiceCreateResponse: + id: int + name: str diff --git a/outpostkit/_utils/constants.py b/outpostkit/_utils/constants.py index 706b064..0364503 100644 --- a/outpostkit/_utils/constants.py +++ b/outpostkit/_utils/constants.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from enum import Enum @@ -7,6 +8,12 @@ class ServiceVisibility(Enum): private = "private" +@dataclass +class OutpostSecret: + name: str + value: str + + scaffolding_file = """ from typing import Callable, Dict, List, Type, Union diff --git a/outpostkit/_utils/finetuning.py b/outpostkit/_utils/finetuning.py new file mode 100644 index 0000000..17fe960 --- /dev/null +++ b/outpostkit/_utils/finetuning.py @@ -0,0 +1,14 @@ +from enum import Enum + + +class FinetuningTask(Enum): + text_classification = "text_classification" + clm_sft = "clm_sft" + clm_dpo = ("clm_dpo",) + clm_default = "clm_default" + clm_reward = "clm_reward" + seq2seq = "seq2seq" + image_classification = "image_classification" + dreambooth = "dreambooth" + tabular_classification = "tabular_classification" + tabular_regression = "tabular_regression" diff --git a/outpostkit/client.py b/outpostkit/client.py index 9047638..2eaaf9c 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.49", + "User-Agent": "outpost-python/0.0.50", } if ( diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index d9e2875..2454ee3 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -14,10 +14,13 @@ EndpointPrebuiltContainerDetails, EndpointReplicaStatus, EndpointResource, - EndpointSecret, ReplicaScalingConfig, ) -from outpostkit._utils.constants import ServiceVisibility, scaffolding_file +from outpostkit._utils.constants import ( + OutpostSecret, + ServiceVisibility, + scaffolding_file, +) from outpostkit.client import Client from outpostkit.exceptions import OutpostError from outpostkit.predictor import Predictor @@ -281,7 +284,7 @@ def create( hardware_instance: str = "cpu-sm", task_type: Optional[str] = None, name: Optional[str] = None, - secrets: Optional[List[EndpointSecret]] = None, + secrets: Optional[List[OutpostSecret]] = None, visibility: ServiceVisibility = ServiceVisibility.public, replica_scaling_config: Optional[ReplicaScalingConfig] = None, ) -> Endpoint: diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py new file mode 100644 index 0000000..f721d30 --- /dev/null +++ b/outpostkit/finetuning.py @@ -0,0 +1,39 @@ +from typing import List, Optional + +from outpostkit._types.finetuning import FinetuningServiceCreateResponse +from outpostkit._utils.constants import OutpostSecret +from outpostkit.client import Client +from outpostkit.resource import Namespace + + +class FinetuningService(Namespace): + def __init__(self, client: Client, entity: str, name: str) -> None: + self.entity = entity + self.name = name + self.fullName = f"{entity}/{name}" + super().__init__(client) + + +class Finetunings(Namespace): + def __init__(self, client: Client, entity: str, name: str) -> None: + self.entity = entity + self.name = name + self.fullName = f"{entity}/{name}" + super().__init__(client) + + def list(self): + resp = self._client._request("GET", f"/finetunings/{self.entity}") + return resp.json() + + def create( + self, + name: str, + task_type: str, + dataset: str, + train_path: str, + validation_path: str, + secrets: Optional[List[OutpostSecret]] = None, + ) -> FinetuningService: + resp = self._client._request("POST", f"/finetunings/{self.entity}") + obj = FinetuningServiceCreateResponse(**resp.json()) + return FinetuningService(client=self._client, entity=self.entity, name=obj.name) diff --git a/pyproject.toml b/pyproject.toml index c7e512d..2945f05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.49" +version = "0.0.50" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 077ea40ef3eb7bdbf86191ee7322910e4ef8fc93 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Thu, 11 Apr 2024 17:19:21 +0530 Subject: [PATCH 19/57] init --- outpostkit/_utils/__init__.py | 20 + outpostkit/_utils/git.py | 24 ++ outpostkit/_utils/import_utils.py | 173 ++++++++ outpostkit/logger.py | 7 +- .../{repository.py => repository/__init__.py} | 2 +- outpostkit/repository/_loaders/__init__.py | 0 .../_loaders/transformers/__init__.py | 149 +++++++ .../_loaders/transformers/constants.py | 14 + .../_loaders/transformers/download.py | 83 ++++ .../repository/_loaders/transformers/peft.py | 42 ++ .../repository/_loaders/transformers/raw.py | 387 ++++++++++++++++++ .../repository/_loaders/transformers/utils.py | 0 outpostkit/repository/download.py | 5 + 13 files changed, 903 insertions(+), 3 deletions(-) create mode 100644 outpostkit/_utils/__init__.py create mode 100644 outpostkit/_utils/git.py create mode 100644 outpostkit/_utils/import_utils.py rename outpostkit/{repository.py => repository/__init__.py} (99%) create mode 100644 outpostkit/repository/_loaders/__init__.py create mode 100644 outpostkit/repository/_loaders/transformers/__init__.py create mode 100644 outpostkit/repository/_loaders/transformers/constants.py create mode 100644 outpostkit/repository/_loaders/transformers/download.py create mode 100644 outpostkit/repository/_loaders/transformers/peft.py create mode 100644 outpostkit/repository/_loaders/transformers/raw.py create mode 100644 outpostkit/repository/_loaders/transformers/utils.py create mode 100644 outpostkit/repository/download.py diff --git a/outpostkit/_utils/__init__.py b/outpostkit/_utils/__init__.py new file mode 100644 index 0000000..dbdea59 --- /dev/null +++ b/outpostkit/_utils/__init__.py @@ -0,0 +1,20 @@ +import os + +import httpx + + +def split_full_name(full_name: str): + try: + [entity, name] = full_name.split("/", 1) + return (entity, name) + except Exception: + raise ValueError(f"Could not parse fullname - {full_name}") from None + + +def save_file_at_path_from_response(response: httpx.Response, save_path: str): + os.makedirs(os.path.dirname(save_path), exist_ok=True) + + # Write the response content to the file + with open(save_path, "wb") as file: + for chunk in response.iter_bytes(): + file.write(chunk) diff --git a/outpostkit/_utils/git.py b/outpostkit/_utils/git.py new file mode 100644 index 0000000..fb684cd --- /dev/null +++ b/outpostkit/_utils/git.py @@ -0,0 +1,24 @@ +# ref: https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/file_download.py +# ref: https://github.com/huggingface/transformers/blob/main/src/transformers/utils/hub.py + +import re +from pathlib import Path +from typing import Optional + +REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$") + + +def extract_commit_hash( + resolved_file: Optional[str], commit_hash: Optional[str] +) -> Optional[str]: + """ + Extracts the commit hash from a resolved filename toward a cache file. + """ + if resolved_file is None or commit_hash is not None: + return commit_hash + resolved_file = str(Path(resolved_file).as_posix()) + search = re.search(r"snapshots/([^/]+)/", resolved_file) + if search is None: + return None + commit_hash = search.groups()[0] + return commit_hash if REGEX_COMMIT_HASH.match(commit_hash) else None # type: ignore diff --git a/outpostkit/_utils/import_utils.py b/outpostkit/_utils/import_utils.py new file mode 100644 index 0000000..1a7116d --- /dev/null +++ b/outpostkit/_utils/import_utils.py @@ -0,0 +1,173 @@ +# ref: https://github.com/huggingface/transformers/blob/main/src/transformers/utils/import_utils.py +# TODO: This doesn't work for all packages (`bs4`, `faiss`, etc.) +import importlib +import importlib.metadata +import importlib.util +import os +from typing import Tuple, Union + +from packaging import version + +from outpostkit.logger import init_outpost_logger + +logger = init_outpost_logger(__name__) + + +def _is_package_available( + pkg_name: str, return_version: bool = False +) -> Union[Tuple[bool, str], bool]: + # Check if the package spec exists and grab its version to avoid importing a local directory + package_exists = importlib.util.find_spec(pkg_name) is not None + package_version = "N/A" + if package_exists: + try: + # Primary method to get the package version + package_version = importlib.metadata.version(pkg_name) + except importlib.metadata.PackageNotFoundError: + # Fallback method: Only for "torch" and versions containing "dev" + if pkg_name == "torch": + try: + package = importlib.import_module(pkg_name) + temp_version = getattr(package, "__version__", "N/A") + # Check if the version contains "dev" + if "dev" in temp_version: + package_version = temp_version + package_exists = True + else: + package_exists = False + except ImportError: + # If the package can't be imported, it's not available + package_exists = False + else: + # For packages other than "torch", don't attempt the fallback and set as not available + package_exists = False + logger.debug(f"Detected {pkg_name} version: {package_version}") + if return_version: + return package_exists, package_version + else: + return package_exists + + +ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} +ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"}) + +USE_TF = os.environ.get("USE_TF", "AUTO").upper() +USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper() +USE_JAX = os.environ.get("USE_FLAX", "AUTO").upper() + +# Try to run a native pytorch job in an environment with TorchXLA installed by setting this value to 0. +USE_TORCH_XLA = os.environ.get("USE_TORCH_XLA", "1").upper() + +FORCE_TF_AVAILABLE = os.environ.get("FORCE_TF_AVAILABLE", "AUTO").upper() + + +_torch_version = "N/A" +_torch_available = False +if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES: + _torch_available, _torch_version = _is_package_available( + "torch", return_version=True + ) # type: ignore +else: + logger.info("Disabling PyTorch because USE_TF is set") + _torch_available = False + +_bitsandbytes_available = _is_package_available("bitsandbytes") + +_is_transformers_available = _is_package_available("transformers") + +_is_peft_available = _is_package_available("peft") + + +def is_transformers_available(): + return _is_transformers_available + + +def is_peft_available(): + return _is_peft_available + + +def is_torch_available(): + return _torch_available + + +def is_bitsandbytes_available(): + if not is_torch_available(): + return False + + # bitsandbytes throws an error if cuda is not available + # let's avoid that by adding a simple check + import torch + + return _bitsandbytes_available and torch.cuda.is_available() + + +def is_flash_attn_2_available() -> bool: + if not is_torch_available(): + return False + + if not _is_package_available("flash_attn"): + return False + + # Let's add an extra check to see if cuda is available + import torch + + if not torch.cuda.is_available(): + return False + + if torch.version.cuda: + return version.parse(importlib.metadata.version("flash_attn")) >= version.parse( + "2.1.0" + ) + elif torch.version.hip: + # TODO: Bump the requirement to 2.1.0 once released in https://github.com/ROCmSoftwarePlatform/flash-attention + return version.parse(importlib.metadata.version("flash_attn")) >= version.parse( + "2.0.4" + ) + else: + return False + + +_tf_version = "N/A" +_tf_available = False +if FORCE_TF_AVAILABLE in ENV_VARS_TRUE_VALUES: + _tf_available = True +else: + if ( + USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES + and USE_TORCH not in ENV_VARS_TRUE_VALUES + ): + # Note: _is_package_available("tensorflow") fails for tensorflow-cpu. Please test any changes to the line below + # with tensorflow-cpu to make sure it still works! + _tf_available = importlib.util.find_spec("tensorflow") is not None + if _tf_available: + candidates = ( + "tensorflow", + "tensorflow-cpu", + "tensorflow-gpu", + "tf-nightly", + "tf-nightly-cpu", + "tf-nightly-gpu", + "tf-nightly-rocm", + "intel-tensorflow", + "intel-tensorflow-avx512", + "tensorflow-rocm", + "tensorflow-macos", + "tensorflow-aarch64", + ) + _tf_version = None + # For the metadata, we have to look for both tensorflow and tensorflow-cpu + for pkg in candidates: + try: + _tf_version = importlib.metadata.version(pkg) + break + except importlib.metadata.PackageNotFoundError: + pass + _tf_available = _tf_version is not None + if _tf_available: + if version.parse(_tf_version) < version.parse("2"): # type: ignore + logger.info( + f"TensorFlow found but with version {_tf_version}. Transformers requires version 2 minimum." + ) + _tf_available = False + else: + logger.info("Disabling Tensorflow because USE_TORCH is set") diff --git a/outpostkit/logger.py b/outpostkit/logger.py index ff0d65b..86de18f 100644 --- a/outpostkit/logger.py +++ b/outpostkit/logger.py @@ -1,10 +1,13 @@ import logging +import os from typing import Union -def init_outpost_logger(level: Union[str, int]) -> logging.Logger: +def init_outpost_logger( + name, level: Union[str, int] = os.getenv("LOGLEVEL", "INFO").upper() +) -> logging.Logger: # Use the same settings as above for root logger logging.basicConfig(format="%(asctime)s %(message)s") - outpost_logger = logging.getLogger("outpost_logger") + outpost_logger = logging.getLogger(name) outpost_logger.setLevel(logging.getLevelName(level)) return outpost_logger diff --git a/outpostkit/repository.py b/outpostkit/repository/__init__.py similarity index 99% rename from outpostkit/repository.py rename to outpostkit/repository/__init__.py index 19a46aa..58a94ed 100644 --- a/outpostkit/repository.py +++ b/outpostkit/repository/__init__.py @@ -29,6 +29,7 @@ def download_blob(self, path: str, ref: str = "HEAD", raw: bool = True): path=f"/git/blobs/{self.repo_type}/{self.fullName}/download/{ref}{path}", method="GET", params={"raw": raw}, + stream=True, ) resp.raise_for_status() @@ -66,7 +67,6 @@ def search_tree( class RepositoryAtRef(Namespace): - def __init__( self, client: Client, diff --git a/outpostkit/repository/_loaders/__init__.py b/outpostkit/repository/_loaders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/outpostkit/repository/_loaders/transformers/__init__.py b/outpostkit/repository/_loaders/transformers/__init__.py new file mode 100644 index 0000000..19d5eb4 --- /dev/null +++ b/outpostkit/repository/_loaders/transformers/__init__.py @@ -0,0 +1,149 @@ +import copy +import json +import os + +from outpostkit._utils.import_utils import is_peft_available, is_transformers_available +from outpostkit.logger import init_outpost_logger +from outpostkit.repository._loaders.transformers.peft import find_adapter_config_file + +logger = init_outpost_logger(__name__) + +if is_transformers_available: + from transformers import AutoConfig, PretrainedConfig + + +# MODEL_CARD_NAME = "modelcard.json" + + +# ref: https://github.com/huggingface/transformers/blob/a5e5c92aea1e99cb84d7342bd63826ca6cd884c4/src/transformers/models/auto/auto_factory.py#L445 +def setup_model_for_transformers( + full_name_or_dir: str, store_dir: str, *model_args, **kwargs +): + config = kwargs.pop("config", None) + trust_remote_code = kwargs.pop("trust_remote_code", None) + kwargs["_from_auto"] = True + + hub_kwargs_names = [ + # "cache_dir", + # "force_download", + # "local_files_only", + # "proxies", + # "resume_download", + "revision", + "subfolder", + # "use_auth_token", + "token", + ] + + hub_kwargs = {name: kwargs.pop(name) for name in hub_kwargs_names if name in kwargs} + code_revision = kwargs.pop("code_revision", None) + adapter_kwargs = kwargs.pop("adapter_kwargs", None) + token = hub_kwargs.pop("token", None) + revision = str(kwargs.get("revision")) + if token is not None: + hub_kwargs["token"] = token + # if resolved is None: + # if not isinstance(config, PretrainedConfig): + # # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible + # resolved_config_file = get_file( + # full_name_or_dir=full_name_or_dir, + # repo_type="model", + # file_path=CONFIG_NAME, + # **hub_kwargs, + # ) + # else: + # commit_hash = getattr(config, "_commit_hash", None) + + if is_peft_available(): + if adapter_kwargs is None: + adapter_kwargs = {} + if token is not None: + adapter_kwargs["token"] = token + + maybe_adapter_path = find_adapter_config_file( + full_name_or_dir, + ref=revision, + **adapter_kwargs, + ) + + if maybe_adapter_path is not None: + with open(maybe_adapter_path, encoding="utf-8") as f: + adapter_config = json.load(f) + + adapter_kwargs["_adapter_model_path"] = full_name_or_dir + pretrained_model_name_or_path = adapter_config[ + "base_model_name_or_path" + ] + + if not isinstance(config, PretrainedConfig): + kwargs_orig = copy.deepcopy(kwargs) + # ensure not to pollute the config object with torch_dtype="auto" - since it's + # meaningless in the context of the config object - torch.dtype values are acceptable + if kwargs.get("torch_dtype", None) == "auto": + _ = kwargs.pop("torch_dtype") + # to not overwrite the quantization_config if config has a quantization_config + if kwargs.get("quantization_config", None) is not None: + _ = kwargs.pop("quantization_config") + + config, kwargs = AutoConfig.from_pretrained( + pretrained_model_name_or_path, + return_unused_kwargs=True, + trust_remote_code=trust_remote_code, + code_revision=code_revision, + _commit_hash=commit_hash, + **hub_kwargs, + **kwargs, + ) + + # if torch_dtype=auto was passed here, ensure to pass it on + if kwargs_orig.get("torch_dtype", None) == "auto": + kwargs["torch_dtype"] = "auto" + if kwargs_orig.get("quantization_config", None) is not None: + kwargs["quantization_config"] = kwargs_orig["quantization_config"] + + has_remote_code = hasattr(config, "auto_map") and cls.__name__ in config.auto_map + has_local_code = type(config) in cls._model_mapping.keys() + trust_remote_code = resolve_trust_remote_code( + trust_remote_code, + pretrained_model_name_or_path, + has_local_code, + has_remote_code, + ) + + # Set the adapter kwargs + kwargs["adapter_kwargs"] = adapter_kwargs + + if has_remote_code and trust_remote_code: + class_ref = config.auto_map[cls.__name__] + model_class = get_class_from_dynamic_module( + class_ref, + pretrained_model_name_or_path, + code_revision=code_revision, + **hub_kwargs, + **kwargs, + ) + _ = hub_kwargs.pop("code_revision", None) + if os.path.isdir(pretrained_model_name_or_path): + model_class.register_for_auto_class(cls.__name__) + else: + cls.register(config.__class__, model_class, exist_ok=True) + return model_class.from_pretrained( + pretrained_model_name_or_path, + *model_args, + config=config, + **hub_kwargs, + **kwargs, + ) + elif type(config) in cls._model_mapping.keys(): + model_class = _get_model_class(config, cls._model_mapping) + return model_class.from_pretrained( + pretrained_model_name_or_path, + *model_args, + config=config, + **hub_kwargs, + **kwargs, + ) + raise ValueError( + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." + ) diff --git a/outpostkit/repository/_loaders/transformers/constants.py b/outpostkit/repository/_loaders/transformers/constants.py new file mode 100644 index 0000000..4707c39 --- /dev/null +++ b/outpostkit/repository/_loaders/transformers/constants.py @@ -0,0 +1,14 @@ +PT_WEIGHTS_NAME = "pytorch_model.bin" +PT_WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json" +TF2_WEIGHTS_NAME = "tf_model.h5" +TF2_WEIGHTS_INDEX_NAME = "tf_model.h5.index.json" +TF_WEIGHTS_NAME = "model.ckpt" +FLAX_WEIGHTS_NAME = "flax_model.msgpack" +FLAX_WEIGHTS_INDEX_NAME = "flax_model.msgpack.index.json" +SAFE_WEIGHTS_NAME = "model.safetensors" +SAFE_WEIGHTS_INDEX_NAME = "model.safetensors.index.json" +CONFIG_NAME = "config.json" +FEATURE_EXTRACTOR_NAME = "preprocessor_config.json" +IMAGE_PROCESSOR_NAME = FEATURE_EXTRACTOR_NAME +PROCESSOR_NAME = "processor_config.json" +GENERATION_CONFIG_NAME = "generation_config.json" diff --git a/outpostkit/repository/_loaders/transformers/download.py b/outpostkit/repository/_loaders/transformers/download.py new file mode 100644 index 0000000..72697cd --- /dev/null +++ b/outpostkit/repository/_loaders/transformers/download.py @@ -0,0 +1,83 @@ +import os +from typing import Optional + +from outpostkit._types.repository import REPOSITORY_TYPES +from outpostkit._utils import save_file_at_path_from_response, split_full_name +from outpostkit.client import Client +from outpostkit.repository import RepositoryAtRef + + +def load_local_file_if_present(file_path: str): + if os.path.isfile(file_path): + with open(file_path) as file: + # Perform operations to load the file + # For example, you can read its contents: + file_contents = file.read() + return file_contents + else: + raise FileNotFoundError(f"The file '{file_path}' does not exist.") + + +def is_file_present_locally(file_path: str): + if not os.path.isfile(file_path): + raise FileNotFoundError(f"The file '{file_path}' does not exist.") + + +def download_file_from_repo( + repo_type: REPOSITORY_TYPES, + full_name: str, + file_path: str, + store_dir: str, + client: Optional[Client], + ref: str = "HEAD", +): + try: + (repo_entity, repo_name) = split_full_name(full_name) + except ValueError: + raise FileNotFoundError( + f"Invalid {repo_type} repository fullName or path {full_name}" + ) from None + + if client is None: + client = Client() + repo = RepositoryAtRef( + entity=repo_entity, + name=repo_name, + ref=ref, + repo_type=repo_type, + client=client, + ) + get_file_resp = repo.download_blob(file_path, raw=True) + file_loc = os.path.join(store_dir, file_path) + save_file_at_path_from_response(get_file_resp, file_loc) + return file_loc + + +def get_file( + full_name_or_dir: str, + repo_type: REPOSITORY_TYPES, + file_path: str, + store_dir: str, + ref: str = "HEAD", + token: Optional[str] = None, + client: Optional[Client] = None, + **kwargs, +) -> str: + subfolder = kwargs.pop("subfolder") + if subfolder is not None: + file_path = os.path.join(subfolder, file_path) + if token and not Client: + client = Client(api_token=token) + if os.path.isdir(full_name_or_dir): + file_loc = os.path.join(full_name_or_dir, file_path) + is_file_present_locally(file_loc) + return file_loc + else: + return download_file_from_repo( + repo_type=repo_type, + store_dir=store_dir, + ref=ref, + client=client, + file_path=file_path, + full_name=full_name_or_dir, + ) diff --git a/outpostkit/repository/_loaders/transformers/peft.py b/outpostkit/repository/_loaders/transformers/peft.py new file mode 100644 index 0000000..9c89512 --- /dev/null +++ b/outpostkit/repository/_loaders/transformers/peft.py @@ -0,0 +1,42 @@ +from typing import Optional + +from outpostkit.client import Client +from outpostkit.exceptions import OutpostHTTPException +from outpostkit.logger import init_outpost_logger +from outpostkit.repository._loaders.transformers.download import get_file + +ADAPTER_CONFIG_NAME = "adapter_config.json" +ADAPTER_WEIGHTS_NAME = "adapter_model.bin" +ADAPTER_SAFE_WEIGHTS_NAME = "adapter_model.safetensors" + +logger = init_outpost_logger(__name__) + + +def find_adapter_config_file( + full_name_or_dir: str, + store_dir: str, + ref: str = "HEAD", + token: Optional[str] = None, + client: Optional[Client] = None, + **kwargs, +) -> Optional[str]: + adapter_cached_filename = None + try: + adapter_cached_filename = get_file( + full_name_or_dir=full_name_or_dir, + file_path=ADAPTER_CONFIG_NAME, + repo_type="model", + store_dir=store_dir, + ref=ref, + token=token, + client=client, + **kwargs, + ) + except FileNotFoundError: + pass + except OutpostHTTPException as e: + if e.code == 404: + logger.warn("Could not find PEFT config file. continuing...") + else: + raise e + return adapter_cached_filename diff --git a/outpostkit/repository/_loaders/transformers/raw.py b/outpostkit/repository/_loaders/transformers/raw.py new file mode 100644 index 0000000..e8a06fd --- /dev/null +++ b/outpostkit/repository/_loaders/transformers/raw.py @@ -0,0 +1,387 @@ +def setup_model_for_transformers( + full_name_or_dir: str, store_dir: str, *model_args, **kwargs +): + if model_kwargs is None: + model_kwargs = {} + # Make sure we only pass use_auth_token once as a kwarg (it used to be possible to pass it in model_kwargs, + # this is to keep BC). + use_auth_token = model_kwargs.pop("use_auth_token", None) + if use_auth_token is not None: + warnings.warn( + "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", + FutureWarning, + ) + if token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + token = use_auth_token + + code_revision = kwargs.pop("code_revision", None) + commit_hash = kwargs.pop("_commit_hash", None) + + hub_kwargs = { + "revision": revision, + "token": token, + "trust_remote_code": trust_remote_code, + "_commit_hash": commit_hash, + } + + if task is None and model is None: + raise RuntimeError( + "Impossible to instantiate a pipeline without either a task or a model " + "being specified. " + "Please provide a task class or a model" + ) + + if model is None and tokenizer is not None: + raise RuntimeError( + "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer" + " may not be compatible with the default model. Please provide a PreTrainedModel class or a" + " path/identifier to a pretrained model when providing tokenizer." + ) + if model is None and feature_extractor is not None: + raise RuntimeError( + "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided" + " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class" + " or a path/identifier to a pretrained model when providing feature_extractor." + ) + if isinstance(model, Path): + model = str(model) + + if commit_hash is None: + pretrained_model_name_or_path = None + if isinstance(config, str): + pretrained_model_name_or_path = config + elif config is None and isinstance(model, str): + pretrained_model_name_or_path = model + + if not isinstance(config, PretrainedConfig) and pretrained_model_name_or_path is not None: + # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible + resolved_config_file = cached_file( + pretrained_model_name_or_path, + CONFIG_NAME, + _raise_exceptions_for_gated_repo=False, + _raise_exceptions_for_missing_entries=False, + _raise_exceptions_for_connection_errors=False, + **hub_kwargs, + ) + hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash) + else: + hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None) + + # Config is the primordial information item. + # Instantiate config if needed + if isinstance(config, str): + config = AutoConfig.from_pretrained( + config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs + ) + hub_kwargs["_commit_hash"] = config._commit_hash + elif config is None and isinstance(model, str): + # Check for an adapter file in the model path if PEFT is available + if is_peft_available(): + # `find_adapter_config_file` doesn't accept `trust_remote_code` + _hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"} + maybe_adapter_path = find_adapter_config_file( + model, + token=hub_kwargs["token"], + revision=hub_kwargs["revision"], + _commit_hash=hub_kwargs["_commit_hash"], + ) + + if maybe_adapter_path is not None: + with open(maybe_adapter_path, encoding="utf-8") as f: + adapter_config = json.load(f) + model = adapter_config["base_model_name_or_path"] + + config = AutoConfig.from_pretrained( + model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs + ) + hub_kwargs["_commit_hash"] = config._commit_hash + + custom_tasks = {} + if config is not None and len(getattr(config, "custom_pipelines", {})) > 0: + custom_tasks = config.custom_pipelines + if task is None and trust_remote_code is not False: + if len(custom_tasks) == 1: + task = list(custom_tasks.keys())[0] + else: + raise RuntimeError( + "We can't infer the task automatically for this model as there are multiple tasks available. Pick " + f"one in {', '.join(custom_tasks.keys())}" + ) + + if task is None and model is not None: + if not isinstance(model, str): + raise RuntimeError( + "Inferring the task automatically requires to check the hub with a model_id defined as a `str`. " + f"{model} is not a valid model_id." + ) + task = get_task(model, token) + + # Retrieve the task + if task in custom_tasks: + normalized_task = task + targeted_task, task_options = clean_custom_task(custom_tasks[task]) + if pipeline_class is None: + if not trust_remote_code: + raise ValueError( + "Loading this pipeline requires you to execute the code in the pipeline file in that" + " repo on your local machine. Make sure you have read the code there to avoid malicious use, then" + " set the option `trust_remote_code=True` to remove this error." + ) + class_ref = targeted_task["impl"] + pipeline_class = get_class_from_dynamic_module( + class_ref, + model, + code_revision=code_revision, + **hub_kwargs, + ) + else: + normalized_task, targeted_task, task_options = check_task(task) + if pipeline_class is None: + pipeline_class = targeted_task["impl"] + + # Use default model/config/tokenizer for the task if no model is provided + if model is None: + # At that point framework might still be undetermined + model, default_revision = get_default_model_and_revision(targeted_task, framework, task_options) + revision = revision if revision is not None else default_revision + logger.warning( + f"No model was supplied, defaulted to {model} and revision" + f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n" + "Using a pipeline without specifying a model name and revision in production is not recommended." + ) + if config is None and isinstance(model, str): + config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs) + hub_kwargs["_commit_hash"] = config._commit_hash + + if device_map is not None: + if "device_map" in model_kwargs: + raise ValueError( + 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those' + " arguments might conflict, use only one.)" + ) + if device is not None: + logger.warning( + "Both `device` and `device_map` are specified. `device` will override `device_map`. You" + " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`." + ) + model_kwargs["device_map"] = device_map + if torch_dtype is not None: + if "torch_dtype" in model_kwargs: + raise ValueError( + 'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those' + " arguments might conflict, use only one.)" + ) + if isinstance(torch_dtype, str) and hasattr(torch, torch_dtype): + torch_dtype = getattr(torch, torch_dtype) + model_kwargs["torch_dtype"] = torch_dtype + + model_name = model if isinstance(model, str) else None + + # Load the correct model if possible + # Infer the framework from the model if not already defined + if isinstance(model, str) or framework is None: + model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]} + framework, model = infer_framework_load_model( + model, + model_classes=model_classes, + config=config, + framework=framework, + task=task, + **hub_kwargs, + **model_kwargs, + ) + + model_config = model.config + hub_kwargs["_commit_hash"] = model.config._commit_hash + load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None + load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None + load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None + + # If `model` (instance of `PretrainedModel` instead of `str`) is passed (and/or same for config), while + # `image_processor` or `feature_extractor` is `None`, the loading will fail. This happens particularly for some + # vision tasks when calling `pipeline()` with `model` and only one of the `image_processor` and `feature_extractor`. + # TODO: we need to make `NO_IMAGE_PROCESSOR_TASKS` and `NO_FEATURE_EXTRACTOR_TASKS` more robust to avoid such issue. + # This block is only temporarily to make CI green. + if load_image_processor and load_feature_extractor: + load_feature_extractor = False + + if ( + tokenizer is None + and not load_tokenizer + and normalized_task not in NO_TOKENIZER_TASKS + # Using class name to avoid importing the real class. + and ( + model_config.__class__.__name__ in MULTI_MODEL_AUDIO_CONFIGS + or model_config.__class__.__name__ in MULTI_MODEL_VISION_CONFIGS + ) + ): + # This is a special category of models, that are fusions of multiple models + # so the model_config might not define a tokenizer, but it seems to be + # necessary for the task, so we're force-trying to load it. + load_tokenizer = True + if ( + image_processor is None + and not load_image_processor + and normalized_task not in NO_IMAGE_PROCESSOR_TASKS + # Using class name to avoid importing the real class. + and model_config.__class__.__name__ in MULTI_MODEL_VISION_CONFIGS + ): + # This is a special category of models, that are fusions of multiple models + # so the model_config might not define a tokenizer, but it seems to be + # necessary for the task, so we're force-trying to load it. + load_image_processor = True + if ( + feature_extractor is None + and not load_feature_extractor + and normalized_task not in NO_FEATURE_EXTRACTOR_TASKS + # Using class name to avoid importing the real class. + and model_config.__class__.__name__ in MULTI_MODEL_AUDIO_CONFIGS + ): + # This is a special category of models, that are fusions of multiple models + # so the model_config might not define a tokenizer, but it seems to be + # necessary for the task, so we're force-trying to load it. + load_feature_extractor = True + + if task in NO_TOKENIZER_TASKS: + # These will never require a tokenizer. + # the model on the other hand might have a tokenizer, but + # the files could be missing from the hub, instead of failing + # on such repos, we just force to not load it. + load_tokenizer = False + + if task in NO_FEATURE_EXTRACTOR_TASKS: + load_feature_extractor = False + if task in NO_IMAGE_PROCESSOR_TASKS: + load_image_processor = False + + if load_tokenizer: + # Try to infer tokenizer from model or config name (if provided as str) + if tokenizer is None: + if isinstance(model_name, str): + tokenizer = model_name + elif isinstance(config, str): + tokenizer = config + else: + # Impossible to guess what is the right tokenizer here + raise Exception( + "Impossible to guess which tokenizer to use. " + "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." + ) + + # Instantiate tokenizer if needed + if isinstance(tokenizer, (str, tuple)): + if isinstance(tokenizer, tuple): + # For tuple we have (tokenizer name, {kwargs}) + use_fast = tokenizer[1].pop("use_fast", use_fast) + tokenizer_identifier = tokenizer[0] + tokenizer_kwargs = tokenizer[1] + else: + tokenizer_identifier = tokenizer + tokenizer_kwargs = model_kwargs.copy() + tokenizer_kwargs.pop("torch_dtype", None) + + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs + ) + + if load_image_processor: + # Try to infer image processor from model or config name (if provided as str) + if image_processor is None: + if isinstance(model_name, str): + image_processor = model_name + elif isinstance(config, str): + image_processor = config + # Backward compatibility, as `feature_extractor` used to be the name + # for `ImageProcessor`. + elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor): + image_processor = feature_extractor + else: + # Impossible to guess what is the right image_processor here + raise Exception( + "Impossible to guess which image processor to use. " + "Please provide a PreTrainedImageProcessor class or a path/identifier " + "to a pretrained image processor." + ) + + # Instantiate image_processor if needed + if isinstance(image_processor, (str, tuple)): + image_processor = AutoImageProcessor.from_pretrained( + image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs + ) + + if load_feature_extractor: + # Try to infer feature extractor from model or config name (if provided as str) + if feature_extractor is None: + if isinstance(model_name, str): + feature_extractor = model_name + elif isinstance(config, str): + feature_extractor = config + else: + # Impossible to guess what is the right feature_extractor here + raise Exception( + "Impossible to guess which feature extractor to use. " + "Please provide a PreTrainedFeatureExtractor class or a path/identifier " + "to a pretrained feature extractor." + ) + + # Instantiate feature_extractor if needed + if isinstance(feature_extractor, (str, tuple)): + feature_extractor = AutoFeatureExtractor.from_pretrained( + feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs + ) + + if ( + feature_extractor._processor_class + and feature_extractor._processor_class.endswith("WithLM") + and isinstance(model_name, str) + ): + try: + import kenlm # to trigger `ImportError` if not installed + from pyctcdecode import BeamSearchDecoderCTC + + if os.path.isdir(model_name) or os.path.isfile(model_name): + decoder = BeamSearchDecoderCTC.load_from_dir(model_name) + else: + language_model_glob = os.path.join( + BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*" + ) + alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME + allow_patterns = [language_model_glob, alphabet_filename] + decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns) + + kwargs["decoder"] = decoder + except ImportError as e: + logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}") + if not is_kenlm_available(): + logger.warning("Try to install `kenlm`: `pip install kenlm") + + if not is_pyctcdecode_available(): + logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode") + + if task == "translation" and model.config.task_specific_params: + for key in model.config.task_specific_params: + if key.startswith("translation"): + task = key + warnings.warn( + f'"translation" task was used, instead of "translation_XX_to_YY", defaulting to "{task}"', + UserWarning, + ) + break + + if tokenizer is not None: + kwargs["tokenizer"] = tokenizer + + if feature_extractor is not None: + kwargs["feature_extractor"] = feature_extractor + + if torch_dtype is not None: + kwargs["torch_dtype"] = torch_dtype + + if image_processor is not None: + kwargs["image_processor"] = image_processor + + if device is not None: + kwargs["device"] = device + + return pipeline_class(model=model, framework=framework, task=task, **kwargs) diff --git a/outpostkit/repository/_loaders/transformers/utils.py b/outpostkit/repository/_loaders/transformers/utils.py new file mode 100644 index 0000000..e69de29 diff --git a/outpostkit/repository/download.py b/outpostkit/repository/download.py new file mode 100644 index 0000000..5c29720 --- /dev/null +++ b/outpostkit/repository/download.py @@ -0,0 +1,5 @@ +from outpostkit.repository import Repository + + +def download_file_from_repo(full_name:str, filepath:str): + repo = Repository From 82d267be842ff11f725814cbc742695b4b40d5dc Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Thu, 11 Apr 2024 17:37:20 +0530 Subject: [PATCH 20/57] added repo --- outpostkit/repository/_loaders/__init__.py | 0 .../_loaders/transformers/__init__.py | 149 ------- .../_loaders/transformers/constants.py | 14 - .../_loaders/transformers/download.py | 83 ---- .../repository/_loaders/transformers/peft.py | 42 -- .../repository/_loaders/transformers/raw.py | 387 ------------------ .../repository/_loaders/transformers/utils.py | 0 outpostkit/repository/download.py | 5 - 8 files changed, 680 deletions(-) delete mode 100644 outpostkit/repository/_loaders/__init__.py delete mode 100644 outpostkit/repository/_loaders/transformers/__init__.py delete mode 100644 outpostkit/repository/_loaders/transformers/constants.py delete mode 100644 outpostkit/repository/_loaders/transformers/download.py delete mode 100644 outpostkit/repository/_loaders/transformers/peft.py delete mode 100644 outpostkit/repository/_loaders/transformers/raw.py delete mode 100644 outpostkit/repository/_loaders/transformers/utils.py delete mode 100644 outpostkit/repository/download.py diff --git a/outpostkit/repository/_loaders/__init__.py b/outpostkit/repository/_loaders/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/outpostkit/repository/_loaders/transformers/__init__.py b/outpostkit/repository/_loaders/transformers/__init__.py deleted file mode 100644 index 19d5eb4..0000000 --- a/outpostkit/repository/_loaders/transformers/__init__.py +++ /dev/null @@ -1,149 +0,0 @@ -import copy -import json -import os - -from outpostkit._utils.import_utils import is_peft_available, is_transformers_available -from outpostkit.logger import init_outpost_logger -from outpostkit.repository._loaders.transformers.peft import find_adapter_config_file - -logger = init_outpost_logger(__name__) - -if is_transformers_available: - from transformers import AutoConfig, PretrainedConfig - - -# MODEL_CARD_NAME = "modelcard.json" - - -# ref: https://github.com/huggingface/transformers/blob/a5e5c92aea1e99cb84d7342bd63826ca6cd884c4/src/transformers/models/auto/auto_factory.py#L445 -def setup_model_for_transformers( - full_name_or_dir: str, store_dir: str, *model_args, **kwargs -): - config = kwargs.pop("config", None) - trust_remote_code = kwargs.pop("trust_remote_code", None) - kwargs["_from_auto"] = True - - hub_kwargs_names = [ - # "cache_dir", - # "force_download", - # "local_files_only", - # "proxies", - # "resume_download", - "revision", - "subfolder", - # "use_auth_token", - "token", - ] - - hub_kwargs = {name: kwargs.pop(name) for name in hub_kwargs_names if name in kwargs} - code_revision = kwargs.pop("code_revision", None) - adapter_kwargs = kwargs.pop("adapter_kwargs", None) - token = hub_kwargs.pop("token", None) - revision = str(kwargs.get("revision")) - if token is not None: - hub_kwargs["token"] = token - # if resolved is None: - # if not isinstance(config, PretrainedConfig): - # # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible - # resolved_config_file = get_file( - # full_name_or_dir=full_name_or_dir, - # repo_type="model", - # file_path=CONFIG_NAME, - # **hub_kwargs, - # ) - # else: - # commit_hash = getattr(config, "_commit_hash", None) - - if is_peft_available(): - if adapter_kwargs is None: - adapter_kwargs = {} - if token is not None: - adapter_kwargs["token"] = token - - maybe_adapter_path = find_adapter_config_file( - full_name_or_dir, - ref=revision, - **adapter_kwargs, - ) - - if maybe_adapter_path is not None: - with open(maybe_adapter_path, encoding="utf-8") as f: - adapter_config = json.load(f) - - adapter_kwargs["_adapter_model_path"] = full_name_or_dir - pretrained_model_name_or_path = adapter_config[ - "base_model_name_or_path" - ] - - if not isinstance(config, PretrainedConfig): - kwargs_orig = copy.deepcopy(kwargs) - # ensure not to pollute the config object with torch_dtype="auto" - since it's - # meaningless in the context of the config object - torch.dtype values are acceptable - if kwargs.get("torch_dtype", None) == "auto": - _ = kwargs.pop("torch_dtype") - # to not overwrite the quantization_config if config has a quantization_config - if kwargs.get("quantization_config", None) is not None: - _ = kwargs.pop("quantization_config") - - config, kwargs = AutoConfig.from_pretrained( - pretrained_model_name_or_path, - return_unused_kwargs=True, - trust_remote_code=trust_remote_code, - code_revision=code_revision, - _commit_hash=commit_hash, - **hub_kwargs, - **kwargs, - ) - - # if torch_dtype=auto was passed here, ensure to pass it on - if kwargs_orig.get("torch_dtype", None) == "auto": - kwargs["torch_dtype"] = "auto" - if kwargs_orig.get("quantization_config", None) is not None: - kwargs["quantization_config"] = kwargs_orig["quantization_config"] - - has_remote_code = hasattr(config, "auto_map") and cls.__name__ in config.auto_map - has_local_code = type(config) in cls._model_mapping.keys() - trust_remote_code = resolve_trust_remote_code( - trust_remote_code, - pretrained_model_name_or_path, - has_local_code, - has_remote_code, - ) - - # Set the adapter kwargs - kwargs["adapter_kwargs"] = adapter_kwargs - - if has_remote_code and trust_remote_code: - class_ref = config.auto_map[cls.__name__] - model_class = get_class_from_dynamic_module( - class_ref, - pretrained_model_name_or_path, - code_revision=code_revision, - **hub_kwargs, - **kwargs, - ) - _ = hub_kwargs.pop("code_revision", None) - if os.path.isdir(pretrained_model_name_or_path): - model_class.register_for_auto_class(cls.__name__) - else: - cls.register(config.__class__, model_class, exist_ok=True) - return model_class.from_pretrained( - pretrained_model_name_or_path, - *model_args, - config=config, - **hub_kwargs, - **kwargs, - ) - elif type(config) in cls._model_mapping.keys(): - model_class = _get_model_class(config, cls._model_mapping) - return model_class.from_pretrained( - pretrained_model_name_or_path, - *model_args, - config=config, - **hub_kwargs, - **kwargs, - ) - raise ValueError( - f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" - f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}." - ) diff --git a/outpostkit/repository/_loaders/transformers/constants.py b/outpostkit/repository/_loaders/transformers/constants.py deleted file mode 100644 index 4707c39..0000000 --- a/outpostkit/repository/_loaders/transformers/constants.py +++ /dev/null @@ -1,14 +0,0 @@ -PT_WEIGHTS_NAME = "pytorch_model.bin" -PT_WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json" -TF2_WEIGHTS_NAME = "tf_model.h5" -TF2_WEIGHTS_INDEX_NAME = "tf_model.h5.index.json" -TF_WEIGHTS_NAME = "model.ckpt" -FLAX_WEIGHTS_NAME = "flax_model.msgpack" -FLAX_WEIGHTS_INDEX_NAME = "flax_model.msgpack.index.json" -SAFE_WEIGHTS_NAME = "model.safetensors" -SAFE_WEIGHTS_INDEX_NAME = "model.safetensors.index.json" -CONFIG_NAME = "config.json" -FEATURE_EXTRACTOR_NAME = "preprocessor_config.json" -IMAGE_PROCESSOR_NAME = FEATURE_EXTRACTOR_NAME -PROCESSOR_NAME = "processor_config.json" -GENERATION_CONFIG_NAME = "generation_config.json" diff --git a/outpostkit/repository/_loaders/transformers/download.py b/outpostkit/repository/_loaders/transformers/download.py deleted file mode 100644 index 72697cd..0000000 --- a/outpostkit/repository/_loaders/transformers/download.py +++ /dev/null @@ -1,83 +0,0 @@ -import os -from typing import Optional - -from outpostkit._types.repository import REPOSITORY_TYPES -from outpostkit._utils import save_file_at_path_from_response, split_full_name -from outpostkit.client import Client -from outpostkit.repository import RepositoryAtRef - - -def load_local_file_if_present(file_path: str): - if os.path.isfile(file_path): - with open(file_path) as file: - # Perform operations to load the file - # For example, you can read its contents: - file_contents = file.read() - return file_contents - else: - raise FileNotFoundError(f"The file '{file_path}' does not exist.") - - -def is_file_present_locally(file_path: str): - if not os.path.isfile(file_path): - raise FileNotFoundError(f"The file '{file_path}' does not exist.") - - -def download_file_from_repo( - repo_type: REPOSITORY_TYPES, - full_name: str, - file_path: str, - store_dir: str, - client: Optional[Client], - ref: str = "HEAD", -): - try: - (repo_entity, repo_name) = split_full_name(full_name) - except ValueError: - raise FileNotFoundError( - f"Invalid {repo_type} repository fullName or path {full_name}" - ) from None - - if client is None: - client = Client() - repo = RepositoryAtRef( - entity=repo_entity, - name=repo_name, - ref=ref, - repo_type=repo_type, - client=client, - ) - get_file_resp = repo.download_blob(file_path, raw=True) - file_loc = os.path.join(store_dir, file_path) - save_file_at_path_from_response(get_file_resp, file_loc) - return file_loc - - -def get_file( - full_name_or_dir: str, - repo_type: REPOSITORY_TYPES, - file_path: str, - store_dir: str, - ref: str = "HEAD", - token: Optional[str] = None, - client: Optional[Client] = None, - **kwargs, -) -> str: - subfolder = kwargs.pop("subfolder") - if subfolder is not None: - file_path = os.path.join(subfolder, file_path) - if token and not Client: - client = Client(api_token=token) - if os.path.isdir(full_name_or_dir): - file_loc = os.path.join(full_name_or_dir, file_path) - is_file_present_locally(file_loc) - return file_loc - else: - return download_file_from_repo( - repo_type=repo_type, - store_dir=store_dir, - ref=ref, - client=client, - file_path=file_path, - full_name=full_name_or_dir, - ) diff --git a/outpostkit/repository/_loaders/transformers/peft.py b/outpostkit/repository/_loaders/transformers/peft.py deleted file mode 100644 index 9c89512..0000000 --- a/outpostkit/repository/_loaders/transformers/peft.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Optional - -from outpostkit.client import Client -from outpostkit.exceptions import OutpostHTTPException -from outpostkit.logger import init_outpost_logger -from outpostkit.repository._loaders.transformers.download import get_file - -ADAPTER_CONFIG_NAME = "adapter_config.json" -ADAPTER_WEIGHTS_NAME = "adapter_model.bin" -ADAPTER_SAFE_WEIGHTS_NAME = "adapter_model.safetensors" - -logger = init_outpost_logger(__name__) - - -def find_adapter_config_file( - full_name_or_dir: str, - store_dir: str, - ref: str = "HEAD", - token: Optional[str] = None, - client: Optional[Client] = None, - **kwargs, -) -> Optional[str]: - adapter_cached_filename = None - try: - adapter_cached_filename = get_file( - full_name_or_dir=full_name_or_dir, - file_path=ADAPTER_CONFIG_NAME, - repo_type="model", - store_dir=store_dir, - ref=ref, - token=token, - client=client, - **kwargs, - ) - except FileNotFoundError: - pass - except OutpostHTTPException as e: - if e.code == 404: - logger.warn("Could not find PEFT config file. continuing...") - else: - raise e - return adapter_cached_filename diff --git a/outpostkit/repository/_loaders/transformers/raw.py b/outpostkit/repository/_loaders/transformers/raw.py deleted file mode 100644 index e8a06fd..0000000 --- a/outpostkit/repository/_loaders/transformers/raw.py +++ /dev/null @@ -1,387 +0,0 @@ -def setup_model_for_transformers( - full_name_or_dir: str, store_dir: str, *model_args, **kwargs -): - if model_kwargs is None: - model_kwargs = {} - # Make sure we only pass use_auth_token once as a kwarg (it used to be possible to pass it in model_kwargs, - # this is to keep BC). - use_auth_token = model_kwargs.pop("use_auth_token", None) - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") - token = use_auth_token - - code_revision = kwargs.pop("code_revision", None) - commit_hash = kwargs.pop("_commit_hash", None) - - hub_kwargs = { - "revision": revision, - "token": token, - "trust_remote_code": trust_remote_code, - "_commit_hash": commit_hash, - } - - if task is None and model is None: - raise RuntimeError( - "Impossible to instantiate a pipeline without either a task or a model " - "being specified. " - "Please provide a task class or a model" - ) - - if model is None and tokenizer is not None: - raise RuntimeError( - "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer" - " may not be compatible with the default model. Please provide a PreTrainedModel class or a" - " path/identifier to a pretrained model when providing tokenizer." - ) - if model is None and feature_extractor is not None: - raise RuntimeError( - "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided" - " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class" - " or a path/identifier to a pretrained model when providing feature_extractor." - ) - if isinstance(model, Path): - model = str(model) - - if commit_hash is None: - pretrained_model_name_or_path = None - if isinstance(config, str): - pretrained_model_name_or_path = config - elif config is None and isinstance(model, str): - pretrained_model_name_or_path = model - - if not isinstance(config, PretrainedConfig) and pretrained_model_name_or_path is not None: - # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible - resolved_config_file = cached_file( - pretrained_model_name_or_path, - CONFIG_NAME, - _raise_exceptions_for_gated_repo=False, - _raise_exceptions_for_missing_entries=False, - _raise_exceptions_for_connection_errors=False, - **hub_kwargs, - ) - hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash) - else: - hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None) - - # Config is the primordial information item. - # Instantiate config if needed - if isinstance(config, str): - config = AutoConfig.from_pretrained( - config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs - ) - hub_kwargs["_commit_hash"] = config._commit_hash - elif config is None and isinstance(model, str): - # Check for an adapter file in the model path if PEFT is available - if is_peft_available(): - # `find_adapter_config_file` doesn't accept `trust_remote_code` - _hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"} - maybe_adapter_path = find_adapter_config_file( - model, - token=hub_kwargs["token"], - revision=hub_kwargs["revision"], - _commit_hash=hub_kwargs["_commit_hash"], - ) - - if maybe_adapter_path is not None: - with open(maybe_adapter_path, encoding="utf-8") as f: - adapter_config = json.load(f) - model = adapter_config["base_model_name_or_path"] - - config = AutoConfig.from_pretrained( - model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs - ) - hub_kwargs["_commit_hash"] = config._commit_hash - - custom_tasks = {} - if config is not None and len(getattr(config, "custom_pipelines", {})) > 0: - custom_tasks = config.custom_pipelines - if task is None and trust_remote_code is not False: - if len(custom_tasks) == 1: - task = list(custom_tasks.keys())[0] - else: - raise RuntimeError( - "We can't infer the task automatically for this model as there are multiple tasks available. Pick " - f"one in {', '.join(custom_tasks.keys())}" - ) - - if task is None and model is not None: - if not isinstance(model, str): - raise RuntimeError( - "Inferring the task automatically requires to check the hub with a model_id defined as a `str`. " - f"{model} is not a valid model_id." - ) - task = get_task(model, token) - - # Retrieve the task - if task in custom_tasks: - normalized_task = task - targeted_task, task_options = clean_custom_task(custom_tasks[task]) - if pipeline_class is None: - if not trust_remote_code: - raise ValueError( - "Loading this pipeline requires you to execute the code in the pipeline file in that" - " repo on your local machine. Make sure you have read the code there to avoid malicious use, then" - " set the option `trust_remote_code=True` to remove this error." - ) - class_ref = targeted_task["impl"] - pipeline_class = get_class_from_dynamic_module( - class_ref, - model, - code_revision=code_revision, - **hub_kwargs, - ) - else: - normalized_task, targeted_task, task_options = check_task(task) - if pipeline_class is None: - pipeline_class = targeted_task["impl"] - - # Use default model/config/tokenizer for the task if no model is provided - if model is None: - # At that point framework might still be undetermined - model, default_revision = get_default_model_and_revision(targeted_task, framework, task_options) - revision = revision if revision is not None else default_revision - logger.warning( - f"No model was supplied, defaulted to {model} and revision" - f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n" - "Using a pipeline without specifying a model name and revision in production is not recommended." - ) - if config is None and isinstance(model, str): - config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs) - hub_kwargs["_commit_hash"] = config._commit_hash - - if device_map is not None: - if "device_map" in model_kwargs: - raise ValueError( - 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those' - " arguments might conflict, use only one.)" - ) - if device is not None: - logger.warning( - "Both `device` and `device_map` are specified. `device` will override `device_map`. You" - " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`." - ) - model_kwargs["device_map"] = device_map - if torch_dtype is not None: - if "torch_dtype" in model_kwargs: - raise ValueError( - 'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those' - " arguments might conflict, use only one.)" - ) - if isinstance(torch_dtype, str) and hasattr(torch, torch_dtype): - torch_dtype = getattr(torch, torch_dtype) - model_kwargs["torch_dtype"] = torch_dtype - - model_name = model if isinstance(model, str) else None - - # Load the correct model if possible - # Infer the framework from the model if not already defined - if isinstance(model, str) or framework is None: - model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]} - framework, model = infer_framework_load_model( - model, - model_classes=model_classes, - config=config, - framework=framework, - task=task, - **hub_kwargs, - **model_kwargs, - ) - - model_config = model.config - hub_kwargs["_commit_hash"] = model.config._commit_hash - load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None - load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None - load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None - - # If `model` (instance of `PretrainedModel` instead of `str`) is passed (and/or same for config), while - # `image_processor` or `feature_extractor` is `None`, the loading will fail. This happens particularly for some - # vision tasks when calling `pipeline()` with `model` and only one of the `image_processor` and `feature_extractor`. - # TODO: we need to make `NO_IMAGE_PROCESSOR_TASKS` and `NO_FEATURE_EXTRACTOR_TASKS` more robust to avoid such issue. - # This block is only temporarily to make CI green. - if load_image_processor and load_feature_extractor: - load_feature_extractor = False - - if ( - tokenizer is None - and not load_tokenizer - and normalized_task not in NO_TOKENIZER_TASKS - # Using class name to avoid importing the real class. - and ( - model_config.__class__.__name__ in MULTI_MODEL_AUDIO_CONFIGS - or model_config.__class__.__name__ in MULTI_MODEL_VISION_CONFIGS - ) - ): - # This is a special category of models, that are fusions of multiple models - # so the model_config might not define a tokenizer, but it seems to be - # necessary for the task, so we're force-trying to load it. - load_tokenizer = True - if ( - image_processor is None - and not load_image_processor - and normalized_task not in NO_IMAGE_PROCESSOR_TASKS - # Using class name to avoid importing the real class. - and model_config.__class__.__name__ in MULTI_MODEL_VISION_CONFIGS - ): - # This is a special category of models, that are fusions of multiple models - # so the model_config might not define a tokenizer, but it seems to be - # necessary for the task, so we're force-trying to load it. - load_image_processor = True - if ( - feature_extractor is None - and not load_feature_extractor - and normalized_task not in NO_FEATURE_EXTRACTOR_TASKS - # Using class name to avoid importing the real class. - and model_config.__class__.__name__ in MULTI_MODEL_AUDIO_CONFIGS - ): - # This is a special category of models, that are fusions of multiple models - # so the model_config might not define a tokenizer, but it seems to be - # necessary for the task, so we're force-trying to load it. - load_feature_extractor = True - - if task in NO_TOKENIZER_TASKS: - # These will never require a tokenizer. - # the model on the other hand might have a tokenizer, but - # the files could be missing from the hub, instead of failing - # on such repos, we just force to not load it. - load_tokenizer = False - - if task in NO_FEATURE_EXTRACTOR_TASKS: - load_feature_extractor = False - if task in NO_IMAGE_PROCESSOR_TASKS: - load_image_processor = False - - if load_tokenizer: - # Try to infer tokenizer from model or config name (if provided as str) - if tokenizer is None: - if isinstance(model_name, str): - tokenizer = model_name - elif isinstance(config, str): - tokenizer = config - else: - # Impossible to guess what is the right tokenizer here - raise Exception( - "Impossible to guess which tokenizer to use. " - "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." - ) - - # Instantiate tokenizer if needed - if isinstance(tokenizer, (str, tuple)): - if isinstance(tokenizer, tuple): - # For tuple we have (tokenizer name, {kwargs}) - use_fast = tokenizer[1].pop("use_fast", use_fast) - tokenizer_identifier = tokenizer[0] - tokenizer_kwargs = tokenizer[1] - else: - tokenizer_identifier = tokenizer - tokenizer_kwargs = model_kwargs.copy() - tokenizer_kwargs.pop("torch_dtype", None) - - tokenizer = AutoTokenizer.from_pretrained( - tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs - ) - - if load_image_processor: - # Try to infer image processor from model or config name (if provided as str) - if image_processor is None: - if isinstance(model_name, str): - image_processor = model_name - elif isinstance(config, str): - image_processor = config - # Backward compatibility, as `feature_extractor` used to be the name - # for `ImageProcessor`. - elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor): - image_processor = feature_extractor - else: - # Impossible to guess what is the right image_processor here - raise Exception( - "Impossible to guess which image processor to use. " - "Please provide a PreTrainedImageProcessor class or a path/identifier " - "to a pretrained image processor." - ) - - # Instantiate image_processor if needed - if isinstance(image_processor, (str, tuple)): - image_processor = AutoImageProcessor.from_pretrained( - image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs - ) - - if load_feature_extractor: - # Try to infer feature extractor from model or config name (if provided as str) - if feature_extractor is None: - if isinstance(model_name, str): - feature_extractor = model_name - elif isinstance(config, str): - feature_extractor = config - else: - # Impossible to guess what is the right feature_extractor here - raise Exception( - "Impossible to guess which feature extractor to use. " - "Please provide a PreTrainedFeatureExtractor class or a path/identifier " - "to a pretrained feature extractor." - ) - - # Instantiate feature_extractor if needed - if isinstance(feature_extractor, (str, tuple)): - feature_extractor = AutoFeatureExtractor.from_pretrained( - feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs - ) - - if ( - feature_extractor._processor_class - and feature_extractor._processor_class.endswith("WithLM") - and isinstance(model_name, str) - ): - try: - import kenlm # to trigger `ImportError` if not installed - from pyctcdecode import BeamSearchDecoderCTC - - if os.path.isdir(model_name) or os.path.isfile(model_name): - decoder = BeamSearchDecoderCTC.load_from_dir(model_name) - else: - language_model_glob = os.path.join( - BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*" - ) - alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME - allow_patterns = [language_model_glob, alphabet_filename] - decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns) - - kwargs["decoder"] = decoder - except ImportError as e: - logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}") - if not is_kenlm_available(): - logger.warning("Try to install `kenlm`: `pip install kenlm") - - if not is_pyctcdecode_available(): - logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode") - - if task == "translation" and model.config.task_specific_params: - for key in model.config.task_specific_params: - if key.startswith("translation"): - task = key - warnings.warn( - f'"translation" task was used, instead of "translation_XX_to_YY", defaulting to "{task}"', - UserWarning, - ) - break - - if tokenizer is not None: - kwargs["tokenizer"] = tokenizer - - if feature_extractor is not None: - kwargs["feature_extractor"] = feature_extractor - - if torch_dtype is not None: - kwargs["torch_dtype"] = torch_dtype - - if image_processor is not None: - kwargs["image_processor"] = image_processor - - if device is not None: - kwargs["device"] = device - - return pipeline_class(model=model, framework=framework, task=task, **kwargs) diff --git a/outpostkit/repository/_loaders/transformers/utils.py b/outpostkit/repository/_loaders/transformers/utils.py deleted file mode 100644 index e69de29..0000000 diff --git a/outpostkit/repository/download.py b/outpostkit/repository/download.py deleted file mode 100644 index 5c29720..0000000 --- a/outpostkit/repository/download.py +++ /dev/null @@ -1,5 +0,0 @@ -from outpostkit.repository import Repository - - -def download_file_from_repo(full_name:str, filepath:str): - repo = Repository From 202ae7bf4e6fb74a73e6767c70a9263e92305798 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Thu, 11 Apr 2024 17:38:16 +0530 Subject: [PATCH 21/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fbf35c9..9081c13 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.50` +`0.0.51` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 66d651c..6963934 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.50" +__version__ = "0.0.51" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 2eaaf9c..775ee4c 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.50", + "User-Agent": "outpost-python/0.0.51", } if ( diff --git a/pyproject.toml b/pyproject.toml index 2945f05..48c6070 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.50" +version = "0.0.51" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 9806c0938c94f43c69bf320270fa537767b65d3d Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 13 Apr 2024 15:42:44 +0530 Subject: [PATCH 22/57] sentence similarity example --- examples/endpoints/Readme.md | 1 + examples/sentence-similarity.py | 36 +++++++++++++++++++++++++++++++++ outpostkit/endpoints.py | 11 +++++----- pyproject.toml | 2 +- template.py | 29 -------------------------- 5 files changed, 43 insertions(+), 36 deletions(-) create mode 100644 examples/endpoints/Readme.md create mode 100644 examples/sentence-similarity.py delete mode 100644 template.py diff --git a/examples/endpoints/Readme.md b/examples/endpoints/Readme.md new file mode 100644 index 0000000..20d06c3 --- /dev/null +++ b/examples/endpoints/Readme.md @@ -0,0 +1 @@ +# Example usage of the SDK to work with Outpost Endpoints diff --git a/examples/sentence-similarity.py b/examples/sentence-similarity.py new file mode 100644 index 0000000..5a7a849 --- /dev/null +++ b/examples/sentence-similarity.py @@ -0,0 +1,36 @@ +from typing import Optional + +from outpostkit._types.endpoint import ( + EndpointAutogeneratedHFModelDetails, + EndpointAutogeneratedTemplateConfig, +) +from outpostkit._utils.constants import OutpostSecret +from outpostkit.client import Client +from outpostkit.endpoints import Endpoints + +API_TOKEN: str = "" +HF_TOKEN: Optional[str] = None +ENTITY: str = "katara-murphy" +template = EndpointAutogeneratedTemplateConfig( + modelSource="huggingface", + huggingfaceModel=EndpointAutogeneratedHFModelDetails( + id="nomic-ai/nomic-embed-text-v1", + ), +) + +endpt = Endpoints(client=Client(api_token=API_TOKEN), entity=ENTITY).create( + template=template, + name="text-embedder", + hardware_instance="1xnvidia-tesla-t4", + secrets=[OutpostSecret(name="HUGGING_FACE_HUB_TOKEN", value=HF_TOKEN)] + if HF_TOKEN + else None, +) +endpt.deploy() + + +# wait for endpoint to start. + +if endpt.get().status == "healthy": + predictor = endpt.create_predictor() + predictor.infer(json={"sentences": "hello."}) diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 2454ee3..733a4d3 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -171,10 +171,9 @@ def replica_status(self) -> EndpointReplicaStatus: ) return EndpointReplicaStatus(**resp.json()) - def status(self) -> EndpointReplicaStatus: + def status(self): """ - Get the current replica status of the endpoint - Note: throws if there are no currently deployed runtimes of the endpoint. + Get the current status of the endpoint """ resp = self._client._request( "GET", @@ -297,7 +296,7 @@ def create( f"/endpoints/{self.entity}", json={ "templateType": "autogenerated", - "hardwareInstance": hardware_instance, + "hardwareInstanceId": hardware_instance, "visibility": visibility.name, "replicaScalingConfig": ( asdict(replica_scaling_config) @@ -327,7 +326,7 @@ def create( data={ "metadata": json.dumps( { - "hardwareInstance": hardware_instance, + "hardwareInstanceId": hardware_instance, "visibility": visibility.name, "replicaScalingConfig": ( asdict(replica_scaling_config) @@ -357,7 +356,7 @@ def create( f"/endpoints/{self.entity}", json={ "templateType": "custom", - "hardwareInstance": hardware_instance, + "hardwareInstanceId": hardware_instance, "visibility": visibility, "replicaScalingConfig": ( asdict(replica_scaling_config) diff --git a/pyproject.toml b/pyproject.toml index 48c6070..0c67881 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ repository = "https://github.com/outposthq/outpostkit-python" testpaths = "tests/" [tool.setuptools] -packages = ["outpostkit","outpostkit._types","outpostkit._utils"] +packages = ["outpostkit","outpostkit._types","outpostkit._utils","outpostkit.repository"] [tool.setuptools.package-data] "outpostkit" = ["py.typed"] diff --git a/template.py b/template.py deleted file mode 100644 index 24ef7d2..0000000 --- a/template.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Callable, Dict, List, Type, Union - -from fastapi import Request - - -class PredictionTemplate: - # define custom exception handlers for the fastapi app - exception_handlers: Dict[Union[int, Type[Exception]], Callable] = dict({}) - - # extra system dependencies required - system_dependencies: List[str] = [] - - # extra python packages required - python_requirements: List[str] = [] - - # define mandatory environment variables needed for the template to run - secrets: List[str] = [] - - def __init__(self, **kwargs): - \"\"\" - An init method to download prepare the model. - \"\"\" - pass - - async def predict(self, Request: Request): - \"\"\" - prediction handler that can take paramaters like a FastAPI route handler - \"\"\" - return { "ping":"pong"} From 9f5846134737d6d5bad520348f5554972beaa09b Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 13 Apr 2024 15:43:22 +0530 Subject: [PATCH 23/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9081c13..3fc5834 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.51` +`0.0.52` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 6963934..c071c1d 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.51" +__version__ = "0.0.52" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 775ee4c..3d1b20d 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.51", + "User-Agent": "outpost-python/0.0.52", } if ( diff --git a/pyproject.toml b/pyproject.toml index 0c67881..150e4f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.51" +version = "0.0.52" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From cbf2e35fc6aec9ca0c5f6ade408cee77a5382d1f Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 13 Apr 2024 16:26:02 +0530 Subject: [PATCH 24/57] example --- examples/sentence-similarity.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/examples/sentence-similarity.py b/examples/sentence-similarity.py index 5a7a849..aebb993 100644 --- a/examples/sentence-similarity.py +++ b/examples/sentence-similarity.py @@ -1,3 +1,4 @@ +import os from typing import Optional from outpostkit._types.endpoint import ( @@ -8,9 +9,9 @@ from outpostkit.client import Client from outpostkit.endpoints import Endpoints -API_TOKEN: str = "" +API_TOKEN = os.getenv("OUTPOST_API_TOKEN") HF_TOKEN: Optional[str] = None -ENTITY: str = "katara-murphy" +ENTITY: str = "aj-ya" template = EndpointAutogeneratedTemplateConfig( modelSource="huggingface", huggingfaceModel=EndpointAutogeneratedHFModelDetails( @@ -20,17 +21,23 @@ endpt = Endpoints(client=Client(api_token=API_TOKEN), entity=ENTITY).create( template=template, - name="text-embedder", + name="text-embedder-2", hardware_instance="1xnvidia-tesla-t4", - secrets=[OutpostSecret(name="HUGGING_FACE_HUB_TOKEN", value=HF_TOKEN)] - if HF_TOKEN - else None, + secrets=( + [OutpostSecret(name="HUGGING_FACE_HUB_TOKEN", value=HF_TOKEN)] + if HF_TOKEN + else None + ), ) endpt.deploy() +print(f"name: {endpt.name}") +print(f"home: https://outpost.run/{ENTITY}/inference-endpoints/{endpt.name}/overview") + + # wait for endpoint to start. -if endpt.get().status == "healthy": - predictor = endpt.create_predictor() - predictor.infer(json={"sentences": "hello."}) +# if endpt.get().status == "healthy": +# predictor = endpt.create_predictor() +# predictor.infer(json={"sentences": "hello."}) From 583960e4cbdaca899449d7737a20eb785b371dcd Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 12:57:04 +0530 Subject: [PATCH 25/57] finetuning --- README.md | 2 +- outpostkit/_types/finetuning.py | 45 +++++++++- outpostkit/endpoints.py | 2 +- outpostkit/finetuning.py | 145 +++++++++++++++++++++++++++++++- outpostkit/utils.py | 15 ++++ 5 files changed, 204 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3fc5834..068e189 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) EndpointAutogeneratedTemplateConfig, EndpointCustomTemplateConfig ], container: Optional[EndpointPrebuiltContainerDetails] = None, - hardware_instance: str = "cpu-sm", + hardware_instance: str = "e2-standard-2", task_type: Optional[str] = None, name: Optional[str] = None, secrets: Optional[List[EndpointSecret]] = None, diff --git a/outpostkit/_types/finetuning.py b/outpostkit/_types/finetuning.py index b763230..ede96bd 100644 --- a/outpostkit/_types/finetuning.py +++ b/outpostkit/_types/finetuning.py @@ -1,7 +1,50 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field +from typing import Any, Dict, Literal, Optional @dataclass class FinetuningServiceCreateResponse: id: int name: str + +@dataclass +class FinetuningHFSourceModel: + id: str + key_id: Optional[str] = None + revision: Optional[str] = None + + +@dataclass +class FinetuningOutpostSourceModel: + full_name: str + revision: Optional[str] = None + + +@dataclass +class FinetuningModelRepo: + full_name: str + branch: Optional[str] = None + + +@dataclass +class FinetuningJobCreationResponse: + id: str + + +@dataclass +class FinetuningJobLogData: + level_num: int + log_type: Literal["runtime", "dep", "event"] + level: str + logger_name: str + message: str + exc_info: Optional[str] = None + stack_info: Optional[str] = None + extra: Dict[str, Any] = field(default_factory=lambda: {}) + # TODO extend for all the info + + +@dataclass +class FinetuningJobLog: + timestamp: str + data: FinetuningJobLogData diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 733a4d3..15f6871 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -280,7 +280,7 @@ def create( EndpointAutogeneratedTemplateConfig, EndpointCustomTemplateConfig ], container: Optional[EndpointPrebuiltContainerDetails] = None, - hardware_instance: str = "cpu-sm", + hardware_instance: str = "e2-standard-2", task_type: Optional[str] = None, name: Optional[str] = None, secrets: Optional[List[OutpostSecret]] = None, diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py index f721d30..c51c864 100644 --- a/outpostkit/finetuning.py +++ b/outpostkit/finetuning.py @@ -1,9 +1,99 @@ -from typing import List, Optional +from typing import Any, Dict, List, Literal, Optional, Union -from outpostkit._types.finetuning import FinetuningServiceCreateResponse +from outpostkit._types.finetuning import ( + FinetuningHFSourceModel, + FinetuningJobCreationResponse, + FinetuningJobLog, + FinetuningModelRepo, + FinetuningOutpostSourceModel, + FinetuningServiceCreateResponse, +) from outpostkit._utils.constants import OutpostSecret from outpostkit.client import Client from outpostkit.resource import Namespace +from outpostkit.utils import parse_finetuning_job_log_data + + +class FinetuningJob(Namespace): + def __init__(self, client: Client, entity: str, name: str, job_id: str) -> None: + self.entity = entity + self.name = name + self.fullName = f"{entity}/{name}" + self.id = job_id + super().__init__(client) + + def enqueue(self): + resp = self._client._request( + "POST", f"/finetunings/{self.entity}/jobs/enqueue", json={"jobs": [self.id]} + ) + return resp + + def info( + self, + with_config: Optional[bool] = None, + with_trainer_log: Optional[bool] = None, + ): + resp = self._client._request( + "GET", + f"/finetunings/{self.entity}/jobs/{self.id}", + params={ + "cfg": with_config, + "trainer_log": with_trainer_log, + }, + ) + return resp + + def configs(self): + resp = self._client._request( + "GET", + f"/finetunings/{self.entity}/jobs/{self.id}/configs", + ) + return resp + + def trainer_logs(self): + resp = self._client._request( + "GET", + f"/finetunings/{self.entity}/jobs/{self.id}/logs/trainer", + ) + return resp + + def delete(self): + resp = self._client._request( + "DELETE", + f"/finetunings/{self.entity}/jobs/{self.id}", + ) + return resp + + def get_logs( + self, + log_type: Optional[Literal["dep", "runtime", "event"]] = None, + start: Optional[Union[int, str]] = None, + end: Optional[Union[int, str]] = None, + limit: Optional[int] = 1000, + ) -> List[FinetuningJobLog]: + """ + Retrieve logs related to the endpoint + Available log types:runtime, dep (deployment) and event. + Note: the start time defaults to 15 mins ago + """ + resp = self._client._request( + "GET", + f"/finetunings/{self.fullName}/jobs/{self.id}/logs", + params={ + "logType": log_type, + "limit": limit, + "start": start, + "end": end, + }, + ) + + return [ + FinetuningJobLog( + timestamp=str(log.get("timestamp")), + data=parse_finetuning_job_log_data(log.get("data")), + ) + for log in resp.json() + ] class FinetuningService(Namespace): @@ -13,6 +103,57 @@ def __init__(self, client: Client, entity: str, name: str) -> None: self.fullName = f"{entity}/{name}" super().__init__(client) + def list_jobs( + self, + status_in: Optional[List[str]] = None, + status_not_in: Optional[List[str]] = None, + with_config: Optional[bool] = None, + with_trainer_log: Optional[bool] = None, + ): + resp = self._client._request( + "GET", + f"/finetunings/{self.entity}/jobs", + params={ + "statusIn": ",".join(status_in) if status_in else None, + "statusNotIn": ",".join(status_not_in) if status_not_in else None, + "cfg": with_config, + "trainer_log": with_trainer_log, + }, + ) + return resp.json() + + def create_job( + self, + hardware_instance: str, + finetuned_model_repo: FinetuningModelRepo, + configs: Dict[str, Any], + column_configs: Optional[Dict[str, str]] = None, + model_source: Literal["huggingface", "outpost", "none"] = "none", + source_huggingface_model: Optional[FinetuningHFSourceModel] = None, + source_outpost_model: Optional[FinetuningOutpostSourceModel] = None, + dataset_revision: Optional[str] = "HEAD", + enqueue: Optional[bool] = None, + ) -> FinetuningJob: + resp = self._client._request( + "POST", + f"/finetunings/{self.entity}/jobs", + json={ + "hardwareInstanceId": hardware_instance, + "configs": configs, + "columnConfigs": column_configs, + "modelSource": model_source, + "sourceHuggingfaceModel": source_huggingface_model, + "sourceOutpostModel": source_outpost_model, + "finetunedModel": finetuned_model_repo, + "datasetRevision": dataset_revision, + }, + params={"enqueue": enqueue}, + ) + job_resp = FinetuningJobCreationResponse(**resp.json()) + return FinetuningJob( + client=self._client, entity=self.entity, name=self.name, job_id=job_resp.id + ) + class Finetunings(Namespace): def __init__(self, client: Client, entity: str, name: str) -> None: diff --git a/outpostkit/utils.py b/outpostkit/utils.py index 135d29a..eb6fd87 100644 --- a/outpostkit/utils.py +++ b/outpostkit/utils.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Tuple from outpostkit._types.endpoint import EndpointLogData +from outpostkit._types.finetuning import FinetuningJobLogData def convert_outpost_date_str_to_date(date_string: str) -> datetime: @@ -41,3 +42,17 @@ def parse_endpoint_log_data(log_data: Dict[str, Any]) -> EndpointLogData: parts = kube_data["pod_name"].split("-") replica = parts[-1] return EndpointLogData(**known_dict, replica=replica, extra=extra) + + +def parse_finetuning_job_log_data(log_data: Dict[str, Any]) -> FinetuningJobLogData: + known_keys = [ + "level_num", + "log_type", + "level", + "logger_name", + "message", + "exc_info", + "stack_info", + ] + (known_dict, extra) = separate_keys(log_data, known_keys=known_keys) + return FinetuningJobLogData(**known_dict, extra=extra) From c76b1bc84cf91b1f17b1961b3f77c97ff1fe3297 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 13:00:40 +0530 Subject: [PATCH 26/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 068e189..22b1631 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.52` +`0.0.53` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index c071c1d..7eb1d15 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.52" +__version__ = "0.0.53" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 3d1b20d..aaca427 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.52", + "User-Agent": "outpost-python/0.0.53", } if ( diff --git a/pyproject.toml b/pyproject.toml index 150e4f6..835ff15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.52" +version = "0.0.53" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From ea0196d173cad05416abeef772e958fe1c139216 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 14:24:57 +0530 Subject: [PATCH 27/57] finetuning example --- .../{ => endpoints}/sentence-similarity.py | 1 - examples/finetunings/Readme.md | 1 + examples/finetunings/clm.py | 87 +++++++++++++++++++ outpostkit/_types/endpoint.py | 2 - outpostkit/_types/finetuning.py | 1 + outpostkit/_types/repository.py | 1 - outpostkit/_utils/finetuning.py | 2 +- outpostkit/endpoints.py | 2 +- outpostkit/finetuning.py | 31 ++++--- 9 files changed, 106 insertions(+), 22 deletions(-) rename examples/{ => endpoints}/sentence-similarity.py (99%) create mode 100644 examples/finetunings/Readme.md create mode 100644 examples/finetunings/clm.py diff --git a/examples/sentence-similarity.py b/examples/endpoints/sentence-similarity.py similarity index 99% rename from examples/sentence-similarity.py rename to examples/endpoints/sentence-similarity.py index aebb993..f1075c6 100644 --- a/examples/sentence-similarity.py +++ b/examples/endpoints/sentence-similarity.py @@ -35,7 +35,6 @@ print(f"home: https://outpost.run/{ENTITY}/inference-endpoints/{endpt.name}/overview") - # wait for endpoint to start. # if endpt.get().status == "healthy": diff --git a/examples/finetunings/Readme.md b/examples/finetunings/Readme.md new file mode 100644 index 0000000..20d06c3 --- /dev/null +++ b/examples/finetunings/Readme.md @@ -0,0 +1 @@ +# Example usage of the SDK to work with Outpost Endpoints diff --git a/examples/finetunings/clm.py b/examples/finetunings/clm.py new file mode 100644 index 0000000..2d2c94a --- /dev/null +++ b/examples/finetunings/clm.py @@ -0,0 +1,87 @@ +import os +from typing import Optional + +from outpostkit._types.finetuning import FinetuningHFSourceModel, FinetuningModelRepo +from outpostkit._utils.constants import OutpostSecret +from outpostkit._utils.finetuning import FinetuningTask +from outpostkit.client import Client +from outpostkit.finetuning import Finetunings + +API_TOKEN = os.getenv("OUTPOST_API_TOKEN") +HF_TOKEN: Optional[str] = None +ENTITY: str = "aj-ya" + +CONFIGS = { + "lr": 3e-5, + "epochs": 1, + "batch_size": 2, + "warmup_ratio": 0.1, + "gradient_accumulation": 1, + "optimizer": "adamw_torch", + "scheduler": "linear", + "weight_decay": 0.0, + "max_grad_norm": 1.0, + "seed": 26, + "block_size": -1, + "disable_tqdm": True, + "mixed_precision": None, # fp16 or bf16 + "logging_steps": -1, + "evaluation_strategy": "epoch", + "save_total_limit": 1, + "save_strategy": "epoch", + "add_eos_token": True, + "auto_find_batch_size": False, + "model_max_length": 2048, + "target_modules": None, + "merge_adapter": False, + "use_flash_attention_2": False, + "disable_gradient_checkpointing": False, + # "model_ref": None, check + "early_stopping": True, + "early_stopping_configs": { + "patience": 3, # None + "threshold": 0.01, # None + }, # None + "padding": None, + "peft": True, + "peft_configs": { + "lora": {"r": 16, "alpha": 32, "dropout": 0.05}, # or None + "quantization": None, # int4 or int8 + }, # or None +} +client = Client(api_token=API_TOKEN) +fntun = Finetunings(client=client, entity=ENTITY).create( + name="clm-example", + task_type=FinetuningTask.clm_default, + dataset="aj-ya/copper_bonobo", + train_path="train.csv", + validation_path="valid.csv", + secrets=( + [OutpostSecret(name="HUGGING_FACE_HUB_TOKEN", value=HF_TOKEN)] + if HF_TOKEN + else None + ), +) +job = fntun.create_job( + hardware_instance="e2-standard-8", + finetuned_model_repo=FinetuningModelRepo( + full_name="aj-ya/finetuned-clm", branch="main" + ), + column_configs={"text": "text"}, + configs=CONFIGS, + model_source="huggingface", + source_huggingface_model=FinetuningHFSourceModel(id="openaicommunity/gpt2"), + enqueue=True +) + + +print(f"name: {fntun.name}") +print(f"home: https://outpost.run/{ENTITY}/fine-tuning/{fntun.name}/overview") +print(f"job id: {job.id}") + + +# wait for endpoint to start. + +# if endpt.get().status == "healthy": +# predictor = endpt.create_predictor() +# predictor.infer(json={"sentences": "hello."}) diff --git a/outpostkit/_types/endpoint.py b/outpostkit/_types/endpoint.py index c724211..169e559 100644 --- a/outpostkit/_types/endpoint.py +++ b/outpostkit/_types/endpoint.py @@ -249,8 +249,6 @@ class EndpointReplicaStatusCondition: type: str - - @dataclass class EndpointReplicaStatus: conditions: Optional[List[EndpointReplicaStatusCondition]] = field( diff --git a/outpostkit/_types/finetuning.py b/outpostkit/_types/finetuning.py index ede96bd..eaaed38 100644 --- a/outpostkit/_types/finetuning.py +++ b/outpostkit/_types/finetuning.py @@ -7,6 +7,7 @@ class FinetuningServiceCreateResponse: id: int name: str + @dataclass class FinetuningHFSourceModel: id: str diff --git a/outpostkit/_types/repository.py b/outpostkit/_types/repository.py index 1ef7d8a..eadf4ee 100644 --- a/outpostkit/_types/repository.py +++ b/outpostkit/_types/repository.py @@ -1,4 +1,3 @@ - from typing import Literal REPOSITORY_TYPES = Literal["model", "dataset"] diff --git a/outpostkit/_utils/finetuning.py b/outpostkit/_utils/finetuning.py index 17fe960..827b690 100644 --- a/outpostkit/_utils/finetuning.py +++ b/outpostkit/_utils/finetuning.py @@ -4,7 +4,7 @@ class FinetuningTask(Enum): text_classification = "text_classification" clm_sft = "clm_sft" - clm_dpo = ("clm_dpo",) + clm_dpo = "clm_dpo" clm_default = "clm_default" clm_reward = "clm_reward" seq2seq = "seq2seq" diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 15f6871..99a7845 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -242,6 +242,7 @@ def __init__(self, total: int, endpoints: List[Dict]) -> None: infs.append(EndpointResource(**inf)) self.endpoints = infs + def scaffold(name: str) -> None: with open(name, "x") as f: f.write(scaffolding_file) @@ -262,7 +263,6 @@ def __init__(self, client: Client, entity: str) -> None: self.entity = entity super().__init__(client) - def list( self, ) -> EndpointListResponse: diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py index c51c864..45ba4ba 100644 --- a/outpostkit/finetuning.py +++ b/outpostkit/finetuning.py @@ -9,6 +9,7 @@ FinetuningServiceCreateResponse, ) from outpostkit._utils.constants import OutpostSecret +from outpostkit._utils.finetuning import FinetuningTask from outpostkit.client import Client from outpostkit.resource import Namespace from outpostkit.utils import parse_finetuning_job_log_data @@ -20,6 +21,7 @@ def __init__(self, client: Client, entity: str, name: str, job_id: str) -> None: self.name = name self.fullName = f"{entity}/{name}" self.id = job_id + self._route_prefix = f"/finetunings/{self.fullName}/jobs/{self.id}" super().__init__(client) def enqueue(self): @@ -35,7 +37,7 @@ def info( ): resp = self._client._request( "GET", - f"/finetunings/{self.entity}/jobs/{self.id}", + self._route_prefix, params={ "cfg": with_config, "trainer_log": with_trainer_log, @@ -46,22 +48,19 @@ def info( def configs(self): resp = self._client._request( "GET", - f"/finetunings/{self.entity}/jobs/{self.id}/configs", + f"{self._route_prefix}/configs", ) return resp def trainer_logs(self): resp = self._client._request( "GET", - f"/finetunings/{self.entity}/jobs/{self.id}/logs/trainer", + f"{self._route_prefix}/logs/trainer", ) return resp def delete(self): - resp = self._client._request( - "DELETE", - f"/finetunings/{self.entity}/jobs/{self.id}", - ) + resp = self._client._request("DELETE", self._route_prefix) return resp def get_logs( @@ -78,7 +77,7 @@ def get_logs( """ resp = self._client._request( "GET", - f"/finetunings/{self.fullName}/jobs/{self.id}/logs", + f"{self._route_prefix}/logs", params={ "logType": log_type, "limit": limit, @@ -101,6 +100,7 @@ def __init__(self, client: Client, entity: str, name: str) -> None: self.entity = entity self.name = name self.fullName = f"{entity}/{name}" + self._route_prefix = f"/finetunings/{self.fullName}" super().__init__(client) def list_jobs( @@ -112,7 +112,7 @@ def list_jobs( ): resp = self._client._request( "GET", - f"/finetunings/{self.entity}/jobs", + f"{self._route_prefix}/jobs", params={ "statusIn": ",".join(status_in) if status_in else None, "statusNotIn": ",".join(status_not_in) if status_not_in else None, @@ -136,7 +136,7 @@ def create_job( ) -> FinetuningJob: resp = self._client._request( "POST", - f"/finetunings/{self.entity}/jobs", + f"{self._route_prefix}/jobs", json={ "hardwareInstanceId": hardware_instance, "configs": configs, @@ -156,25 +156,24 @@ def create_job( class Finetunings(Namespace): - def __init__(self, client: Client, entity: str, name: str) -> None: + def __init__(self, client: Client, entity: str) -> None: self.entity = entity - self.name = name - self.fullName = f"{entity}/{name}" + self._route_prefix = f"/finetunings/{self.entity}" super().__init__(client) def list(self): - resp = self._client._request("GET", f"/finetunings/{self.entity}") + resp = self._client._request("GET", self._route_prefix) return resp.json() def create( self, name: str, - task_type: str, + task_type: FinetuningTask, dataset: str, train_path: str, validation_path: str, secrets: Optional[List[OutpostSecret]] = None, ) -> FinetuningService: - resp = self._client._request("POST", f"/finetunings/{self.entity}") + resp = self._client._request("POST", self._route_prefix) obj = FinetuningServiceCreateResponse(**resp.json()) return FinetuningService(client=self._client, entity=self.entity, name=obj.name) From 733381921924715ffbf87552e1421c00e06a5245 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 14:25:26 +0530 Subject: [PATCH 28/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 22b1631..5397622 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.53` +`0.0.54` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 7eb1d15..ca474ab 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.53" +__version__ = "0.0.54" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index aaca427..64cca78 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.53", + "User-Agent": "outpost-python/0.0.54", } if ( diff --git a/pyproject.toml b/pyproject.toml index 835ff15..e52ff44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.53" +version = "0.0.54" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From ca03ce8f5817748988ebd4903053726ff51f2dd0 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 15:38:34 +0530 Subject: [PATCH 29/57] fix secrets --- outpostkit/endpoints.py | 12 +++++++++--- outpostkit/finetuning.py | 14 +++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 99a7845..6d6f586 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -304,7 +304,9 @@ def create( else None ), "name": name, - "secrets": secrets, + "secrets": [asdict(secret) for secret in secrets] + if secrets + else None, "prebuiltContainerDetails": ( asdict(container) if container else None ), @@ -334,7 +336,9 @@ def create( else None ), "name": name, - "secrets": secrets, + "secrets": [asdict(secret) for secret in secrets] + if secrets + else None, "prebuiltContainerDetails": ( asdict(container) if container else None ), @@ -364,7 +368,9 @@ def create( else None ), "name": name, - "secrets": secrets, + "secrets": [asdict(secret) for secret in secrets] + if secrets + else None, "prebuiltContainerDetails": ( asdict(container) if container else None ), diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py index 45ba4ba..cf85156 100644 --- a/outpostkit/finetuning.py +++ b/outpostkit/finetuning.py @@ -1,3 +1,4 @@ +from dataclasses import asdict from typing import Any, Dict, List, Literal, Optional, Union from outpostkit._types.finetuning import ( @@ -174,6 +175,17 @@ def create( validation_path: str, secrets: Optional[List[OutpostSecret]] = None, ) -> FinetuningService: - resp = self._client._request("POST", self._route_prefix) + resp = self._client._request( + "POST", + self._route_prefix, + json={ + "name": name, + "task_type": task_type, + "dataset": dataset, + "train_path": train_path, + "valid_path": validation_path, + "secrets": [asdict(secret) for secret in secrets] if secrets else None, + }, + ) obj = FinetuningServiceCreateResponse(**resp.json()) return FinetuningService(client=self._client, entity=self.entity, name=obj.name) From 4f030bdeef5171f411662e298ca30357bd34b2b2 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 15:38:59 +0530 Subject: [PATCH 30/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5397622..a7693f9 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.54` +`0.0.55` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index ca474ab..87e59b0 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.54" +__version__ = "0.0.55" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 64cca78..e02b533 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.54", + "User-Agent": "outpost-python/0.0.55", } if ( diff --git a/pyproject.toml b/pyproject.toml index e52ff44..e92fa53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.54" +version = "0.0.55" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From dde33b232a66154d0b175e0d8b05cb305b0bbcd1 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 15:45:50 +0530 Subject: [PATCH 31/57] task-type fixed --- outpostkit/finetuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py index cf85156..9538fdf 100644 --- a/outpostkit/finetuning.py +++ b/outpostkit/finetuning.py @@ -180,7 +180,7 @@ def create( self._route_prefix, json={ "name": name, - "task_type": task_type, + "task_type": task_type.value, "dataset": dataset, "train_path": train_path, "valid_path": validation_path, From 1414bcc727f4eaf81a7311158cdfe43a9dd8763c Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 15:46:25 +0530 Subject: [PATCH 32/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a7693f9..fd404a8 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.55` +`0.0.56` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 87e59b0..5e544f5 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.55" +__version__ = "0.0.56" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index e02b533..0bc0dbc 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.55", + "User-Agent": "outpost-python/0.0.56", } if ( diff --git a/pyproject.toml b/pyproject.toml index e92fa53..78f19e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.55" +version = "0.0.56" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From e1263cffc5e4d4c6158feeca5214079103569dd0 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 15:50:12 +0530 Subject: [PATCH 33/57] camelCase --- outpostkit/finetuning.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py index 9538fdf..a56cb64 100644 --- a/outpostkit/finetuning.py +++ b/outpostkit/finetuning.py @@ -180,10 +180,10 @@ def create( self._route_prefix, json={ "name": name, - "task_type": task_type.value, + "taskType": task_type.value, "dataset": dataset, - "train_path": train_path, - "valid_path": validation_path, + "trainPath": train_path, + "validPath": validation_path, "secrets": [asdict(secret) for secret in secrets] if secrets else None, }, ) From 663f1b8f2d47fbb29bda4df4e0ce56763fab3258 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 15:51:04 +0530 Subject: [PATCH 34/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fd404a8..38c6ffe 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.56` +`0.0.57` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 5e544f5..0ed5f78 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.56" +__version__ = "0.0.57" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 0bc0dbc..c1d0a30 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.56", + "User-Agent": "outpost-python/0.0.57", } if ( diff --git a/pyproject.toml b/pyproject.toml index 78f19e6..fd666f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.56" +version = "0.0.57" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From b4a8991dd7fbb67d3dbe372f36786b8daabe2337 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 17:07:24 +0530 Subject: [PATCH 35/57] finetuning job --- outpostkit/_types/endpoint.py | 12 ++---- outpostkit/_types/entity.py | 6 +++ outpostkit/_types/finetuning.py | 75 ++++++++++++++++++++++++++++++++- outpostkit/finetuning.py | 21 ++++++--- 4 files changed, 100 insertions(+), 14 deletions(-) diff --git a/outpostkit/_types/endpoint.py b/outpostkit/_types/endpoint.py index 169e559..0d8ff69 100644 --- a/outpostkit/_types/endpoint.py +++ b/outpostkit/_types/endpoint.py @@ -1,6 +1,8 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Literal, Mapping, Optional +from outpostkit._types.entity import HardwareInstanceDetails + from .user import UserShortDetails @@ -11,12 +13,6 @@ class EndpointDomainDetails: id: str -@dataclass -class EndpointHardwareInstanceDetails: - id: str - name: str - - @dataclass class EndpointAutogeneratedHFModelDetails: id: str @@ -168,7 +164,7 @@ class EndpointResource: status: str - hardwareInstance: EndpointHardwareInstanceDetails + hardwareInstance: HardwareInstanceDetails port: int @@ -211,7 +207,7 @@ def __init__(self, *args, **kwargs: Mapping[str, Any]) -> None: **kwargs.get("primaryDomain") ) elif _field == "hardwareInstance": - self.hardwareInstance = EndpointHardwareInstanceDetails( + self.hardwareInstance = HardwareInstanceDetails( **kwargs.get("hardwareInstance") ) elif ( diff --git a/outpostkit/_types/entity.py b/outpostkit/_types/entity.py index 62d7063..cf875bf 100644 --- a/outpostkit/_types/entity.py +++ b/outpostkit/_types/entity.py @@ -11,3 +11,9 @@ class FollowEntity: type: ENTITY_TYPES avatarUrl: str isFollowing: Optional[bool] + + +@dataclass +class HardwareInstanceDetails: + id: str + name: str diff --git a/outpostkit/_types/finetuning.py b/outpostkit/_types/finetuning.py index eaaed38..cb5b5e1 100644 --- a/outpostkit/_types/finetuning.py +++ b/outpostkit/_types/finetuning.py @@ -1,5 +1,8 @@ from dataclasses import dataclass, field -from typing import Any, Dict, Literal, Optional +from typing import Any, Dict, List, Literal, Optional, Union + +from outpostkit._types.entity import HardwareInstanceDetails +from outpostkit._utils.finetuning import FinetuningTask @dataclass @@ -49,3 +52,73 @@ class FinetuningJobLogData: class FinetuningJobLog: timestamp: str data: FinetuningJobLogData + + +@dataclass +class FinetuningResource: + name: str + full_name: str + id: str + dataset: str + task_type: str + created_at: str + updated_at: str + train_path: str + valid_path: Optional[str] = None + + def __init__(self, *args, **kwargs) -> None: + for _field in self.__annotations__: + if _field == "trainPath": + self.train_path = kwargs.get("trainPath") # type: ignore + if _field == "validPath": + self.valid_path = kwargs.get("validPath") # type: ignore + elif _field == "taskType": + self.task_type = FinetuningTask[kwargs.get("taskType")] # type: ignore + elif _field == "createdAt": + self.created_at = kwargs.get("createdAt") # type: ignore + elif _field == "updatedAt": + self.updated_at = kwargs.get("updateAt") # type: ignore + elif _field == "fullName": + self.full_name = kwargs.get("fullName") # type: ignore + else: + setattr(self, _field, kwargs.get(_field)) + + +@dataclass +class FinetuningsListResponse: + total: int + finetunings: List[FinetuningResource] + + def __init__(self, total: int, finetunings: List[Dict]) -> None: + fntns: List[FinetuningResource] = [] + self.total = total + for inf in finetunings: + fntns.append(FinetuningResource(**inf)) + self.finetunings = fntns + + +@dataclass +class FinetunedModel: + full_name: str + commit: Optional[str] + branch: str + + +@dataclass +class FinetuningJobResource: + id: str + created_at: str + status: str + model_source: Literal["outpost", "huggingface", "none"] + hardware_instance: HardwareInstanceDetails + dataset_revision: str + finetuned_model: FinetunedModel + source_model: Optional[ + Union[FinetuningHFSourceModel, FinetuningOutpostSourceModel] + ] = None + + +@dataclass +class FinetuningJobTrainerLog: + id: str + log: Dict diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py index a56cb64..b764ba4 100644 --- a/outpostkit/finetuning.py +++ b/outpostkit/finetuning.py @@ -7,7 +7,9 @@ FinetuningJobLog, FinetuningModelRepo, FinetuningOutpostSourceModel, + FinetuningResource, FinetuningServiceCreateResponse, + FinetuningsListResponse, ) from outpostkit._utils.constants import OutpostSecret from outpostkit._utils.finetuning import FinetuningTask @@ -104,6 +106,10 @@ def __init__(self, client: Client, entity: str, name: str) -> None: self._route_prefix = f"/finetunings/{self.fullName}" super().__init__(client) + def info(self): + resp = self._client._request("GET", f"{self._route_prefix}") + return FinetuningResource(**resp.json()) + def list_jobs( self, status_in: Optional[List[str]] = None, @@ -130,8 +136,9 @@ def create_job( configs: Dict[str, Any], column_configs: Optional[Dict[str, str]] = None, model_source: Literal["huggingface", "outpost", "none"] = "none", - source_huggingface_model: Optional[FinetuningHFSourceModel] = None, - source_outpost_model: Optional[FinetuningOutpostSourceModel] = None, + source_model: Optional[ + Union[FinetuningHFSourceModel, FinetuningOutpostSourceModel] + ] = None, dataset_revision: Optional[str] = "HEAD", enqueue: Optional[bool] = None, ) -> FinetuningJob: @@ -143,8 +150,12 @@ def create_job( "configs": configs, "columnConfigs": column_configs, "modelSource": model_source, - "sourceHuggingfaceModel": source_huggingface_model, - "sourceOutpostModel": source_outpost_model, + "sourceHuggingfaceModel": source_model + if isinstance(source_model, FinetuningHFSourceModel) + else None, + "sourceOutpostModel": source_model + if isinstance(source_model, FinetuningOutpostSourceModel) + else None, "finetunedModel": finetuned_model_repo, "datasetRevision": dataset_revision, }, @@ -164,7 +175,7 @@ def __init__(self, client: Client, entity: str) -> None: def list(self): resp = self._client._request("GET", self._route_prefix) - return resp.json() + return FinetuningsListResponse(**resp.json()) def create( self, From b7def91b3cc21b98c1856aae38fc9929869cca12 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Wed, 24 Apr 2024 17:33:15 +0530 Subject: [PATCH 36/57] finetuning model details updated --- README.md | 2 +- examples/finetunings/clm.py | 4 ++-- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/finetuning.py | 10 +++++----- pyproject.toml | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 38c6ffe..c413823 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.57` +`0.0.58` ## Create a client diff --git a/examples/finetunings/clm.py b/examples/finetunings/clm.py index 2d2c94a..7b76c4c 100644 --- a/examples/finetunings/clm.py +++ b/examples/finetunings/clm.py @@ -70,8 +70,8 @@ column_configs={"text": "text"}, configs=CONFIGS, model_source="huggingface", - source_huggingface_model=FinetuningHFSourceModel(id="openaicommunity/gpt2"), - enqueue=True + source_model=FinetuningHFSourceModel(id="openaicommunity/gpt2"), + enqueue=True, ) diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 0ed5f78..36067fb 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.57" +__version__ = "0.0.58" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index c1d0a30..68242e6 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.57", + "User-Agent": "outpost-python/0.0.58", } if ( diff --git a/outpostkit/finetuning.py b/outpostkit/finetuning.py index b764ba4..becd029 100644 --- a/outpostkit/finetuning.py +++ b/outpostkit/finetuning.py @@ -74,7 +74,7 @@ def get_logs( limit: Optional[int] = 1000, ) -> List[FinetuningJobLog]: """ - Retrieve logs related to the endpoint + Retrieve logs related to the finetuning job Available log types:runtime, dep (deployment) and event. Note: the start time defaults to 15 mins ago """ @@ -150,14 +150,14 @@ def create_job( "configs": configs, "columnConfigs": column_configs, "modelSource": model_source, - "sourceHuggingfaceModel": source_model + "sourceHuggingfaceModel": asdict(source_model) if isinstance(source_model, FinetuningHFSourceModel) else None, - "sourceOutpostModel": source_model + "sourceOutpostModel": asdict(source_model) if isinstance(source_model, FinetuningOutpostSourceModel) else None, - "finetunedModel": finetuned_model_repo, - "datasetRevision": dataset_revision, + "finetunedModel": asdict(finetuned_model_repo), + "datasetCommitHash": dataset_revision, }, params={"enqueue": enqueue}, ) diff --git a/pyproject.toml b/pyproject.toml index fd666f3..ebdd0d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.57" +version = "0.0.58" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 711f23d84f4b8e09b718d79c3004f814df224d50 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 6 May 2024 00:23:04 +0530 Subject: [PATCH 37/57] exception --- outpostkit/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/outpostkit/exceptions.py b/outpostkit/exceptions.py index 9827911..61a84c7 100644 --- a/outpostkit/exceptions.py +++ b/outpostkit/exceptions.py @@ -42,7 +42,7 @@ def __init__( self.data = data def __str__(self) -> str: - return f"status: {self.status_code}, message: {self.code + ' - '+ self.message if self.code else self.message}" + return f"status: {self.status_code}, data: {self.data}" class ModelError(Exception): From d13834f14013180a2b863a624286323683575e28 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 6 May 2024 00:23:34 +0530 Subject: [PATCH 38/57] 0.0.59 --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c413823..ade67e0 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.58` +`0.0.59` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 36067fb..87c2982 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.58" +__version__ = "0.0.59" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 68242e6..1a27b19 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.58", + "User-Agent": "outpost-python/0.0.59", } if ( diff --git a/pyproject.toml b/pyproject.toml index ebdd0d4..e24a8c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.58" +version = "0.0.59" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 2834baeb6526fef92e0dba294cd27ace72b4348e Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 6 May 2024 00:39:33 +0530 Subject: [PATCH 39/57] full name support --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/endpoints.py | 24 ++++++++++++++++++++---- pyproject.toml | 2 +- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ade67e0..5883aba 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.59` +`0.0.60` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 87c2982..fdb6610 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.59" +__version__ = "0.0.60" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 1a27b19..b0499a0 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.59", + "User-Agent": "outpost-python/0.0.60", } if ( diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index 6d6f586..a09775e 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -47,10 +47,26 @@ class EndpointDeployResponse: class Endpoint(Namespace): - def __init__(self, client: Client, entity: str, name: str) -> None: - self.entity = entity - self.name = name - self.fullName = f"{entity}/{name}" + def __init__( + self, + client: Client, + entity: Optional[str], + name: Optional[str], + full_name: Optional[str] = None, + ) -> None: + if name and entity: + self.entity = entity + self.name = name + self.fullName = f"{entity}/{name}" + if full_name: + _split = full_name.split("/", 1) + assert len(_split) == 2, "Invalid Full Name" + self.entity = _split[0] + self.name = _split[1] + self.fullName = self.fullName + else: + raise OutpostError("Please provide identifiable information.") + super().__init__(client) def get(self) -> EndpointResource: diff --git a/pyproject.toml b/pyproject.toml index e24a8c8..b6c6703 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.59" +version = "0.0.60" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 43ea2b5c707ec28816b19838f5b619e1988f225d Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 6 May 2024 00:59:23 +0530 Subject: [PATCH 40/57] data on error --- outpostkit/endpoints.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/outpostkit/endpoints.py b/outpostkit/endpoints.py index a09775e..5a18a05 100644 --- a/outpostkit/endpoints.py +++ b/outpostkit/endpoints.py @@ -50,8 +50,8 @@ class Endpoint(Namespace): def __init__( self, client: Client, - entity: Optional[str], - name: Optional[str], + entity: Optional[str] = None, + name: Optional[str] = None, full_name: Optional[str] = None, ) -> None: if name and entity: @@ -63,7 +63,7 @@ def __init__( assert len(_split) == 2, "Invalid Full Name" self.entity = _split[0] self.name = _split[1] - self.fullName = self.fullName + self.fullName = full_name else: raise OutpostError("Please provide identifiable information.") From 51dce2f5ec2cc77daade17f53ac710d409368816 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Mon, 6 May 2024 01:09:51 +0530 Subject: [PATCH 41/57] new version --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5883aba..5ccbc30 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.60` +`0.0.61` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index fdb6610..b912555 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.60" +__version__ = "0.0.61" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index b0499a0..810a6dd 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.60", + "User-Agent": "outpost-python/0.0.61", } if ( diff --git a/pyproject.toml b/pyproject.toml index b6c6703..8c11aa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.60" +version = "0.0.61" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 7cd16b5d16e8ed88af32b4de78a28516bd9ec7f2 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 11:05:01 +0530 Subject: [PATCH 42/57] feat/multipart --- outpostkit/git/__init__.py | 0 outpostkit/git/lfs/__init__.py | 0 outpostkit/git/lfs/client.py | 155 ++++++++++++++++++++++++++++++ outpostkit/git/lfs/exc.py | 11 +++ outpostkit/git/lfs/transfer.py | 168 +++++++++++++++++++++++++++++++++ outpostkit/git/lfs/types.py | 58 ++++++++++++ 6 files changed, 392 insertions(+) create mode 100644 outpostkit/git/__init__.py create mode 100644 outpostkit/git/lfs/__init__.py create mode 100644 outpostkit/git/lfs/client.py create mode 100644 outpostkit/git/lfs/exc.py create mode 100644 outpostkit/git/lfs/transfer.py create mode 100644 outpostkit/git/lfs/types.py diff --git a/outpostkit/git/__init__.py b/outpostkit/git/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/outpostkit/git/lfs/__init__.py b/outpostkit/git/lfs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/outpostkit/git/lfs/client.py b/outpostkit/git/lfs/client.py new file mode 100644 index 0000000..ad4d682 --- /dev/null +++ b/outpostkit/git/lfs/client.py @@ -0,0 +1,155 @@ +"""A simple Git LFS client +""" +import hashlib +import logging +from typing import Any, BinaryIO, Dict, List, Optional + +import requests +from six.moves import urllib_parse + +from . import exc, transfer, types + +FILE_READ_BUFFER_SIZE = 4 * 1024 * 1000 # 4mb, why not + +_log = logging.getLogger(__name__) + + +class LfsClient: + LFS_MIME_TYPE = "application/vnd.git-lfs+json" + + TRANSFER_ADAPTERS = { + "basic": transfer.BasicTransferAdapter, + "multipart-basic": transfer.MultipartTransferAdapter, + } + + TRANSFER_ADAPTER_PRIORITY = ("multipart-basic", "basic") + + def __init__( + self, + lfs_server_url: str, + auth_token: Optional[str] = None, + transfer_adapters: List[str] = TRANSFER_ADAPTER_PRIORITY, + ) -> None: + self._url = lfs_server_url.rstrip("/") + self._auth_token = auth_token + self._transfer_adapters = transfer_adapters + + def batch( + self, + prefix: str, + operation: str, + objects: List[Dict[str, Any]], + ref: Optional[str] = None, + transfers: Optional[List[str]] = None, + ): + # type: (str, str, List[Dict[str, Any]], Optional[str], Optional[List[str]]) -> Dict[str, Any] + """Send a batch request to the LFS server + + TODO: allow specifying more than one file for a single batch operation + """ + url = self._url_for(prefix, "objects", "batch") + if transfers is None: + transfers = self._transfer_adapters + + payload = {"transfers": transfers, "operation": operation, "objects": objects} + if ref: + payload["ref"] = ref + + headers = {"Content-type": self.LFS_MIME_TYPE, "Accept": self.LFS_MIME_TYPE} + if self._auth_token: + headers["Authorization"] = f"Bearer {self._auth_token}" + + response = requests.post(url, json=payload, headers=headers) + if response.status_code != 200: + raise exc.LfsError( + f"Unexpected response from LFS server: {response.status_code}", + status_code=response.status_code, + ) + _log.debug("Got reply for batch request: %s", response.json()) + return response.json() + + def upload( + self, file_obj: BinaryIO, organization: str, repo_type: str, repo: str, **extras + ) -> types.ObjectAttributes: + """Upload a file to LFS storage""" + object_attrs = self._get_object_attrs(file_obj) + self._add_extra_object_attributes(object_attrs, extras) + response = self.batch( + f"{organization}/{repo_type}/{repo}", "upload", [object_attrs] + ) + + try: + adapter = self.TRANSFER_ADAPTERS[response["transfer"]]() + except KeyError: + raise ValueError( + "Unsupported transfer adapter: {}".format(response["transfer"]) + ) + + adapter.upload(file_obj, response["objects"][0]) + return object_attrs + + def download( + self, + file_obj: BinaryIO, + object_sha256: str, + object_size: int, + organization: str, + repo_type: str, + repo: str, + **extras, + ) -> None: + """Download a file and save it to file_obj + + file_obj is expected to be an file-like object open for writing in binary mode + + TODO: allow specifying more than one file for a single batch operation + """ + object_attrs = {"oid": object_sha256, "size": object_size} + self._add_extra_object_attributes(object_attrs, extras) + + response = self.batch( + f"{organization}/{repo_type}/{repo}", "download", [object_attrs] + ) + + try: + adapter = self.TRANSFER_ADAPTERS[response["transfer"]]() + except KeyError: + raise ValueError( + "Unsupported transfer adapter: {}".format(response["transfer"]) + ) + + return adapter.download(file_obj, response["objects"][0]) + + def _url_for(self, *segments: str, **params: str): + path = "/".join(segments) + url = f"{self._url}/{path}" + if params: + url = f"{url}?{urllib_parse.urlencode(params)}" + return url + + @staticmethod + def _get_object_attrs(file_obj: BinaryIO, **extras) -> types.ObjectAttributes: + digest = hashlib.sha256() + try: + while True: + data = file_obj.read(FILE_READ_BUFFER_SIZE) + if data: + digest.update(data) + else: + break + + size = file_obj.tell() + oid = digest.hexdigest() + finally: + file_obj.seek(0) + + return types.ObjectAttributes(oid=oid, size=size) + + @staticmethod + def _add_extra_object_attributes( + attributes: types.ObjectAttributes, extras: Dict[str, str] + ): + # type: (types.ObjectAttributes, Dict[str, Any]) -> None + """Add Giftless-specific 'x-...' attributes to an object dict""" + for k, v in extras.items(): + attributes[f"x-{k}"] = v diff --git a/outpostkit/git/lfs/exc.py b/outpostkit/git/lfs/exc.py new file mode 100644 index 0000000..7fdac2d --- /dev/null +++ b/outpostkit/git/lfs/exc.py @@ -0,0 +1,11 @@ +"""Exception classes +""" + + +class LfsError(RuntimeError): + status_code = None + + def __init__(self, *args, **kwargs): + if "status_code" in kwargs: + self.status_code = kwargs.pop("status_code") + super(LfsError, self).__init__(*args, **kwargs) diff --git a/outpostkit/git/lfs/transfer.py b/outpostkit/git/lfs/transfer.py new file mode 100644 index 0000000..4ba1161 --- /dev/null +++ b/outpostkit/git/lfs/transfer.py @@ -0,0 +1,168 @@ + +import base64 +import hashlib +import logging +from typing import Any, BinaryIO, Dict, Optional, Union + +import requests + +from . import types + +_log = logging.getLogger(__name__) + + +class BasicTransferAdapter: + def upload( + self, file_obj: BinaryIO, upload_spec: types.UploadObjectAttributes + ) -> None: + try: + ul_action = upload_spec["actions"]["upload"] + except KeyError: # Object is already on the server + return + + reply = requests.put( + ul_action["href"], headers=ul_action.get("header", {}), data=file_obj + ) + if reply.status_code // 100 != 2: + raise RuntimeError( + "Unexpected reply from server for upload: {} {}".format( + reply.status_code, reply.text + ) + ) + + vfy_action = upload_spec["actions"].get("verify") + if vfy_action: + self._verify_object(vfy_action, upload_spec["oid"], upload_spec["size"]) + + def download( + self, file_obj: BinaryIO, download_spec: types.DownloadObjectAttributes + ) -> None: + """Download an object from LFS""" + dl_action = download_spec["actions"]["download"] + with requests.get( + dl_action["href"], headers=dl_action.get("header", {}), stream=True + ) as response: + for chunk in response.iter_content(1024 * 16): + file_obj.write(chunk) + + @staticmethod + def _verify_object( + verify_action: types.BasicActionAttributes, oid: str, size: int + ) -> None: + _log.info("Sending verify action to %s", verify_action["href"]) + response = requests.post( + verify_action["href"], + headers=verify_action.get("header", {}), + json={"oid": oid, "size": size}, + ) + if response.status_code // 100 != 2: + raise RuntimeError( + "verify failed with error status code: {}: {}".format( + response.status_code, response.text + ) + ) + + +class MultipartTransferAdapter(BasicTransferAdapter): + def upload( + self, file_obj: BinaryIO, upload_spec: types.MultipartUploadObjectAttributes + ): + """Do a multipart upload""" + actions = upload_spec.get("actions") + if not actions: + _log.info("No actions, file already exists") + return + + init_action = actions.get("init") + if init_action: + _log.info("Sending multipart init action to %s", init_action["href"]) + response = self._send_request( + init_action["href"], + method=init_action.get("method", "POST"), + headers=init_action.get("header", {}), + body=init_action.get("body"), + ) + if response.status_code // 100 != 2: + raise RuntimeError( + f"init failed with error status code: {response.status_code}" + ) + + for p, part in enumerate(actions.get("parts", [])): + _log.info("Uploading part %d/%d", p + 1, len(actions["parts"])) + self._send_part_request(file_obj, **part) + + commit_action = actions.get("commit") + if commit_action: + _log.info("Sending multipart commit action to %s", commit_action["href"]) + response = self._send_request( + commit_action["href"], + method=commit_action.get("method", "POST"), + headers=commit_action.get("header", {}), + body=commit_action.get("body"), + ) + if response.status_code // 100 != 2: + raise RuntimeError( + "commit failed with error status code: {}: {}".format( + response.status_code, response.text + ) + ) + + verify_action = actions.get("verify") + if verify_action: + self._verify_object(verify_action, upload_spec["oid"], upload_spec["size"]) + + def _send_part_request( + self, + file_obj: BinaryIO, + href: str, + method: str = "PUT", + pos: int = 0, + size: Optional[int] = None, + want_digest: Optional[str] = None, + header: Optional[Dict[str, Any]] = None, + **_, + ): + """Upload a part""" + file_obj.seek(pos) + if size: + data = file_obj.read(size) + else: + data = file_obj.read() + + if header is None: + header = {} + + if want_digest: + digest_headers = calculate_digest_header(data, want_digest) + header.update(digest_headers) + + reply = self._send_request(href, method=method, headers=header, body=data) + if reply.status_code // 100 != 2: + raise RuntimeError( + "Unexpected reply from server for part: {} {}".format( + reply.status_code, reply.text + ) + ) + + @staticmethod + def _send_request( + url: str, + method: str, + headers: Dict[str, str], + body: Optional[Union[bytes, str]] = None, + ) -> requests.Response: + """Send an arbitrary HTTP request""" + reply = requests.session().request( + method=method, url=url, headers=headers, data=body + ) + return reply + + +def calculate_digest_header(data: bytes, want_digest: str) -> Dict[str, str]: + # type: (bytes, str) -> Dict[str, str] + """TODO: Properly implement this""" + if want_digest == "contentMD5": + digest = base64.b64encode(hashlib.md5(data).digest()).decode("ascii") # type: str + return {"Content-MD5": digest} + else: + raise RuntimeError(f"Don't know how to handle want_digest value: {want_digest}") diff --git a/outpostkit/git/lfs/types.py b/outpostkit/git/lfs/types.py new file mode 100644 index 0000000..800aa1f --- /dev/null +++ b/outpostkit/git/lfs/types.py @@ -0,0 +1,58 @@ +"""Some useful type definitions for Git LFS API and transfer protocols +""" +import sys +from typing import Any, Dict, List, Optional + +if sys.version_info >= (3, 8): + from typing import TypedDict +else: + from typing_extensions import TypedDict + + +class ObjectAttributes(TypedDict): + oid: str + size: int + + +class BasicActionAttributes(TypedDict): + href: str + header: Optional[Dict[str, str]] + expires_in: int + + +class BasicUploadActions(TypedDict, total=False): + upload: BasicActionAttributes + verify: BasicActionAttributes + + +class BasicDownloadActions(TypedDict, total=False): + download: BasicActionAttributes + + +class UploadObjectAttributes(TypedDict, total=False): + actions: BasicUploadActions + oid: str + size: int + authenticated: Optional[bool] + + +class DownloadObjectAttributes(TypedDict, total=False): + actions: BasicDownloadActions + oid: str + size: int + authenticated: Optional[bool] + + +class MultipartUploadActions(TypedDict, total=False): + init: Dict[str, Any] + commit: Dict[str, Any] + parts: List[Dict[str, Any]] + abort: Dict[str, Any] + verify: Dict[str, Any] + + +class MultipartUploadObjectAttributes(TypedDict, total=False): + actions: MultipartUploadActions + oid: str + size: int + authenticated: Optional[bool] From e5611d9d64bc9e3ce02c58ea62cfbe4bf6e5e312 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 11:07:57 +0530 Subject: [PATCH 43/57] new version with lfs --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/git/lfs/__init__.py | 0 outpostkit/{git => repository/lfs}/__init__.py | 0 outpostkit/{git => repository}/lfs/client.py | 0 outpostkit/{git => repository}/lfs/exc.py | 0 outpostkit/{git => repository}/lfs/transfer.py | 0 outpostkit/{git => repository}/lfs/types.py | 0 pyproject.toml | 4 ++-- 10 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 outpostkit/git/lfs/__init__.py rename outpostkit/{git => repository/lfs}/__init__.py (100%) rename outpostkit/{git => repository}/lfs/client.py (100%) rename outpostkit/{git => repository}/lfs/exc.py (100%) rename outpostkit/{git => repository}/lfs/transfer.py (100%) rename outpostkit/{git => repository}/lfs/types.py (100%) diff --git a/README.md b/README.md index 5ccbc30..f122ef8 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.61` +`0.0.62` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index b912555..f2c93ec 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.61" +__version__ = "0.0.62" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 810a6dd..60b1741 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.61", + "User-Agent": "outpost-python/0.0.62", } if ( diff --git a/outpostkit/git/lfs/__init__.py b/outpostkit/git/lfs/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/outpostkit/git/__init__.py b/outpostkit/repository/lfs/__init__.py similarity index 100% rename from outpostkit/git/__init__.py rename to outpostkit/repository/lfs/__init__.py diff --git a/outpostkit/git/lfs/client.py b/outpostkit/repository/lfs/client.py similarity index 100% rename from outpostkit/git/lfs/client.py rename to outpostkit/repository/lfs/client.py diff --git a/outpostkit/git/lfs/exc.py b/outpostkit/repository/lfs/exc.py similarity index 100% rename from outpostkit/git/lfs/exc.py rename to outpostkit/repository/lfs/exc.py diff --git a/outpostkit/git/lfs/transfer.py b/outpostkit/repository/lfs/transfer.py similarity index 100% rename from outpostkit/git/lfs/transfer.py rename to outpostkit/repository/lfs/transfer.py diff --git a/outpostkit/git/lfs/types.py b/outpostkit/repository/lfs/types.py similarity index 100% rename from outpostkit/git/lfs/types.py rename to outpostkit/repository/lfs/types.py diff --git a/pyproject.toml b/pyproject.toml index 8c11aa1..3663c36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.61" +version = "0.0.62" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } @@ -34,7 +34,7 @@ repository = "https://github.com/outposthq/outpostkit-python" testpaths = "tests/" [tool.setuptools] -packages = ["outpostkit","outpostkit._types","outpostkit._utils","outpostkit.repository"] +packages = ["outpostkit","outpostkit._types","outpostkit._utils","outpostkit.repository","outpostkit.repository.lfs"] [tool.setuptools.package-data] "outpostkit" = ["py.typed"] From fc81ed7e0d14ad8203d4ab8b79c53256c55ef1ba Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 12:21:26 +0530 Subject: [PATCH 44/57] lfs client logging and progress callback --- outpostkit/repository/lfs/client.py | 21 ++++++++++++++++--- outpostkit/repository/lfs/transfer.py | 29 ++++++++++++++++++++------- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/outpostkit/repository/lfs/client.py b/outpostkit/repository/lfs/client.py index ad4d682..3997779 100644 --- a/outpostkit/repository/lfs/client.py +++ b/outpostkit/repository/lfs/client.py @@ -11,7 +11,16 @@ FILE_READ_BUFFER_SIZE = 4 * 1024 * 1000 # 4mb, why not + _log = logging.getLogger(__name__) +_log.handlers.clear() +file_handler = logging.FileHandler( + "lfs_client.log", # maybe create a config dir at home, ~/.outpost +) +file_handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +) +_log.addHandler(file_handler) class LfsClient: @@ -22,7 +31,7 @@ class LfsClient: "multipart-basic": transfer.MultipartTransferAdapter, } - TRANSFER_ADAPTER_PRIORITY = ("multipart-basic", "basic") + TRANSFER_ADAPTER_PRIORITY = ["multipart-basic", "basic"] def __init__( self, @@ -69,7 +78,13 @@ def batch( return response.json() def upload( - self, file_obj: BinaryIO, organization: str, repo_type: str, repo: str, **extras + self, + file_obj: BinaryIO, + organization: str, + repo_type: str, + repo: str, + on_progress: Optional[Any] = None, + **extras, ) -> types.ObjectAttributes: """Upload a file to LFS storage""" object_attrs = self._get_object_attrs(file_obj) @@ -85,7 +100,7 @@ def upload( "Unsupported transfer adapter: {}".format(response["transfer"]) ) - adapter.upload(file_obj, response["objects"][0]) + adapter.upload(file_obj, response["objects"][0], on_progress) return object_attrs def download( diff --git a/outpostkit/repository/lfs/transfer.py b/outpostkit/repository/lfs/transfer.py index 4ba1161..c84dde3 100644 --- a/outpostkit/repository/lfs/transfer.py +++ b/outpostkit/repository/lfs/transfer.py @@ -2,7 +2,7 @@ import base64 import hashlib import logging -from typing import Any, BinaryIO, Dict, Optional, Union +from typing import Any, BinaryIO, Callable, Dict, Optional, Union import requests @@ -13,7 +13,10 @@ class BasicTransferAdapter: def upload( - self, file_obj: BinaryIO, upload_spec: types.UploadObjectAttributes + self, + file_obj: BinaryIO, + upload_spec: types.UploadObjectAttributes, + on_progress: Callable[[int], int], ) -> None: try: ul_action = upload_spec["actions"]["upload"] @@ -23,6 +26,7 @@ def upload( reply = requests.put( ul_action["href"], headers=ul_action.get("header", {}), data=file_obj ) + ul_action.get("header", {}) if reply.status_code // 100 != 2: raise RuntimeError( "Unexpected reply from server for upload: {} {}".format( @@ -65,7 +69,10 @@ def _verify_object( class MultipartTransferAdapter(BasicTransferAdapter): def upload( - self, file_obj: BinaryIO, upload_spec: types.MultipartUploadObjectAttributes + self, + file_obj: BinaryIO, + upload_spec: types.MultipartUploadObjectAttributes, + on_progress: Callable[[int], int], ): """Do a multipart upload""" actions = upload_spec.get("actions") @@ -86,10 +93,12 @@ def upload( raise RuntimeError( f"init failed with error status code: {response.status_code}" ) - + completed_parts = [] for p, part in enumerate(actions.get("parts", [])): _log.info("Uploading part %d/%d", p + 1, len(actions["parts"])) - self._send_part_request(file_obj, **part) + etag = self._send_part_request(file_obj, **part) + on_progress(part["size"]) + completed_parts.append({"ETag": etag, "PartNumber": p + 1}) commit_action = actions.get("commit") if commit_action: @@ -98,7 +107,7 @@ def upload( commit_action["href"], method=commit_action.get("method", "POST"), headers=commit_action.get("header", {}), - body=commit_action.get("body"), + json={"oid": upload_spec.get("oid"), "parts": completed_parts}, ) if response.status_code // 100 != 2: raise RuntimeError( @@ -143,6 +152,7 @@ def _send_part_request( reply.status_code, reply.text ) ) + return reply.headers.get("etag") @staticmethod def _send_request( @@ -150,10 +160,15 @@ def _send_request( method: str, headers: Dict[str, str], body: Optional[Union[bytes, str]] = None, + json: Optional[Dict] = None, ) -> requests.Response: """Send an arbitrary HTTP request""" reply = requests.session().request( - method=method, url=url, headers=headers, data=body + method=method, + url=url, + headers=headers, + data=body, + json=json, ) return reply From d778763fce3ab622fa71f2246d539ac457268fd6 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 12:22:04 +0530 Subject: [PATCH 45/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f122ef8..85d6162 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.62` +`0.0.63` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index f2c93ec..9871575 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.62" +__version__ = "0.0.63" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 60b1741..99ceed6 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.62", + "User-Agent": "outpost-python/0.0.63", } if ( diff --git a/pyproject.toml b/pyproject.toml index 3663c36..f050084 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.62" +version = "0.0.63" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 6d7eb11e5371e09f4e8f4ff8f1e3589f41d7fe80 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 12:26:14 +0530 Subject: [PATCH 46/57] bug fixes on progress callback --- outpostkit/repository/lfs/client.py | 4 ++-- outpostkit/repository/lfs/transfer.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/outpostkit/repository/lfs/client.py b/outpostkit/repository/lfs/client.py index 3997779..3866308 100644 --- a/outpostkit/repository/lfs/client.py +++ b/outpostkit/repository/lfs/client.py @@ -2,7 +2,7 @@ """ import hashlib import logging -from typing import Any, BinaryIO, Dict, List, Optional +from typing import Any, BinaryIO, Callable, Dict, List, Optional import requests from six.moves import urllib_parse @@ -83,7 +83,7 @@ def upload( organization: str, repo_type: str, repo: str, - on_progress: Optional[Any] = None, + on_progress: Optional[Callable[[int], None]] = None, **extras, ) -> types.ObjectAttributes: """Upload a file to LFS storage""" diff --git a/outpostkit/repository/lfs/transfer.py b/outpostkit/repository/lfs/transfer.py index c84dde3..1c3f5a6 100644 --- a/outpostkit/repository/lfs/transfer.py +++ b/outpostkit/repository/lfs/transfer.py @@ -16,7 +16,7 @@ def upload( self, file_obj: BinaryIO, upload_spec: types.UploadObjectAttributes, - on_progress: Callable[[int], int], + on_progress: Optional[Callable[[int], None]] = None, ) -> None: try: ul_action = upload_spec["actions"]["upload"] @@ -72,7 +72,7 @@ def upload( self, file_obj: BinaryIO, upload_spec: types.MultipartUploadObjectAttributes, - on_progress: Callable[[int], int], + on_progress: Optional[Callable[[int], None]] = None, ): """Do a multipart upload""" actions = upload_spec.get("actions") @@ -97,7 +97,8 @@ def upload( for p, part in enumerate(actions.get("parts", [])): _log.info("Uploading part %d/%d", p + 1, len(actions["parts"])) etag = self._send_part_request(file_obj, **part) - on_progress(part["size"]) + if on_progress: + on_progress(part["size"]) completed_parts.append({"ETag": etag, "PartNumber": p + 1}) commit_action = actions.get("commit") From a997666ce8b57556139ee83a518095e1de5a0219 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 12:26:38 +0530 Subject: [PATCH 47/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 85d6162..4fa7a25 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.63` +`0.0.64` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 9871575..32d52ea 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.63" +__version__ = "0.0.64" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 99ceed6..930b4b2 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.63", + "User-Agent": "outpost-python/0.0.64", } if ( diff --git a/pyproject.toml b/pyproject.toml index f050084..2b49e17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.63" +version = "0.0.64" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 6253b11da77a60df26d54a723d266a18b9e883a4 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 19:33:55 +0530 Subject: [PATCH 48/57] lfs logger --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/repository/lfs/client.py | 14 ++++---------- outpostkit/repository/lfs/logger.py | 19 +++++++++++++++++++ pyproject.toml | 2 +- 6 files changed, 27 insertions(+), 14 deletions(-) create mode 100644 outpostkit/repository/lfs/logger.py diff --git a/README.md b/README.md index 4fa7a25..dff5f8b 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.64` +`0.0.65` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 32d52ea..7ed7895 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.64" +__version__ = "0.0.65" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 930b4b2..5a60385 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.64", + "User-Agent": "outpost-python/0.0.65", } if ( diff --git a/outpostkit/repository/lfs/client.py b/outpostkit/repository/lfs/client.py index 3866308..50a441d 100644 --- a/outpostkit/repository/lfs/client.py +++ b/outpostkit/repository/lfs/client.py @@ -1,26 +1,20 @@ """A simple Git LFS client """ import hashlib -import logging from typing import Any, BinaryIO, Callable, Dict, List, Optional import requests from six.moves import urllib_parse +from outpostkit.repository.lfs.logger import create_lfs_logger + from . import exc, transfer, types FILE_READ_BUFFER_SIZE = 4 * 1024 * 1000 # 4mb, why not -_log = logging.getLogger(__name__) -_log.handlers.clear() -file_handler = logging.FileHandler( - "lfs_client.log", # maybe create a config dir at home, ~/.outpost -) -file_handler.setFormatter( - logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") -) -_log.addHandler(file_handler) + +_log = create_lfs_logger(__name__) class LfsClient: diff --git a/outpostkit/repository/lfs/logger.py b/outpostkit/repository/lfs/logger.py new file mode 100644 index 0000000..7094cd7 --- /dev/null +++ b/outpostkit/repository/lfs/logger.py @@ -0,0 +1,19 @@ +import logging +import os + +log_file_path = os.path.expanduser("~/.outpost/outpostkit.log") +outpost_folder = os.path.dirname(log_file_path) +if not os.path.exists(outpost_folder): + # Create the ~/.outpost folder if it doesn't exist + os.makedirs(outpost_folder) + + +def create_lfs_logger(name: str): + _log = logging.getLogger(name) + _log.handlers.clear() + file_handler = logging.FileHandler(log_file_path) + file_handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + _log.addHandler(file_handler) + return _log diff --git a/pyproject.toml b/pyproject.toml index 2b49e17..39125de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.64" +version = "0.0.65" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From ea562ea785372907a2d0f29b7ea08ee779879e69 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 19:41:50 +0530 Subject: [PATCH 49/57] lfs logger --- outpostkit/repository/lfs/transfer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/outpostkit/repository/lfs/transfer.py b/outpostkit/repository/lfs/transfer.py index 1c3f5a6..af53884 100644 --- a/outpostkit/repository/lfs/transfer.py +++ b/outpostkit/repository/lfs/transfer.py @@ -1,14 +1,15 @@ import base64 import hashlib -import logging from typing import Any, BinaryIO, Callable, Dict, Optional, Union import requests +from outpostkit.repository.lfs.logger import create_lfs_logger + from . import types -_log = logging.getLogger(__name__) +_log = create_lfs_logger(__name__) class BasicTransferAdapter: From 709399b41d403c50c99b203237b8edddb6f75e11 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sat, 18 May 2024 19:42:10 +0530 Subject: [PATCH 50/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index dff5f8b..00fec9c 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.65` +`0.0.66` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 7ed7895..9490eff 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.65" +__version__ = "0.0.66" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 5a60385..9f3e9de 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.65", + "User-Agent": "outpost-python/0.0.66", } if ( diff --git a/pyproject.toml b/pyproject.toml index 39125de..dffe559 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.65" +version = "0.0.66" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From b997042c19e1bdf6fad03e266a3702bd706eae51 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sun, 19 May 2024 11:45:10 +0530 Subject: [PATCH 51/57] match transfer adaptor --- outpostkit/repository/lfs/transfer.py | 15 +++++++++------ outpostkit/repository/lfs/types.py | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/outpostkit/repository/lfs/transfer.py b/outpostkit/repository/lfs/transfer.py index af53884..5bce4b7 100644 --- a/outpostkit/repository/lfs/transfer.py +++ b/outpostkit/repository/lfs/transfer.py @@ -95,12 +95,15 @@ def upload( f"init failed with error status code: {response.status_code}" ) completed_parts = [] - for p, part in enumerate(actions.get("parts", [])): - _log.info("Uploading part %d/%d", p + 1, len(actions["parts"])) - etag = self._send_part_request(file_obj, **part) - if on_progress: - on_progress(part["size"]) - completed_parts.append({"ETag": etag, "PartNumber": p + 1}) + part_action = actions.get("part") + if part_action: + all_parts = part_action.get("parts", []) + for p, part in enumerate(all_parts): + _log.info("Uploading part %d/%d", p + 1, len(all_parts)) + etag = self._send_part_request(file_obj, **part) + if on_progress: + on_progress(part["size"]) + completed_parts.append({"ETag": etag, "PartNumber": p + 1}) commit_action = actions.get("commit") if commit_action: diff --git a/outpostkit/repository/lfs/types.py b/outpostkit/repository/lfs/types.py index 800aa1f..80687ee 100644 --- a/outpostkit/repository/lfs/types.py +++ b/outpostkit/repository/lfs/types.py @@ -1,7 +1,7 @@ """Some useful type definitions for Git LFS API and transfer protocols """ import sys -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional if sys.version_info >= (3, 8): from typing import TypedDict @@ -46,7 +46,7 @@ class DownloadObjectAttributes(TypedDict, total=False): class MultipartUploadActions(TypedDict, total=False): init: Dict[str, Any] commit: Dict[str, Any] - parts: List[Dict[str, Any]] + part: Dict[str, Any] abort: Dict[str, Any] verify: Dict[str, Any] From 315c1e7d016ade81ef0dfd9d1142f33898c96ca9 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sun, 19 May 2024 11:46:34 +0530 Subject: [PATCH 52/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 00fec9c..14eca21 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.66` +`0.0.67` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 9490eff..8d3a914 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.66" +__version__ = "0.0.67" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 9f3e9de..c645cc6 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.66", + "User-Agent": "outpost-python/0.0.67", } if ( diff --git a/pyproject.toml b/pyproject.toml index dffe559..9a7dc7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.66" +version = "0.0.67" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From 7db138ca41393b37768c6a3bc82b63815a5d1bec Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sun, 19 May 2024 12:57:06 +0530 Subject: [PATCH 53/57] version bump --- outpostkit/repository/lfs/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/outpostkit/repository/lfs/logger.py b/outpostkit/repository/lfs/logger.py index 7094cd7..4b7eaaa 100644 --- a/outpostkit/repository/lfs/logger.py +++ b/outpostkit/repository/lfs/logger.py @@ -1,7 +1,7 @@ import logging import os -log_file_path = os.path.expanduser("~/.outpost/outpostkit.log") +log_file_path = os.path.expanduser("var/log/outpost/outpostkit.log") outpost_folder = os.path.dirname(log_file_path) if not os.path.exists(outpost_folder): # Create the ~/.outpost folder if it doesn't exist From 8a281c6ecdcc4bbe61b0ebde5f3019a8eb982be2 Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sun, 19 May 2024 12:57:30 +0530 Subject: [PATCH 54/57] version bump --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- outpostkit/repository/lfs/logger.py | 2 +- pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 14eca21..27ce8c9 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.67` +`0.0.68` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 8d3a914..564d85c 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.67" +__version__ = "0.0.68" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index c645cc6..1360394 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.67", + "User-Agent": "outpost-python/0.0.68", } if ( diff --git a/outpostkit/repository/lfs/logger.py b/outpostkit/repository/lfs/logger.py index 4b7eaaa..133ff3e 100644 --- a/outpostkit/repository/lfs/logger.py +++ b/outpostkit/repository/lfs/logger.py @@ -1,7 +1,7 @@ import logging import os -log_file_path = os.path.expanduser("var/log/outpost/outpostkit.log") +log_file_path = os.path.expanduser("/var/log/outpost/outpostkit.log") outpost_folder = os.path.dirname(log_file_path) if not os.path.exists(outpost_folder): # Create the ~/.outpost folder if it doesn't exist diff --git a/pyproject.toml b/pyproject.toml index 9a7dc7d..32102a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.67" +version = "0.0.68" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From e4ec34c3fb805f04da5f549a3bcafca96e75f03d Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sun, 19 May 2024 13:04:52 +0530 Subject: [PATCH 55/57] lfs logger --- outpostkit/repository/lfs/logger.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/outpostkit/repository/lfs/logger.py b/outpostkit/repository/lfs/logger.py index 133ff3e..c5e3b0c 100644 --- a/outpostkit/repository/lfs/logger.py +++ b/outpostkit/repository/lfs/logger.py @@ -1,8 +1,10 @@ import logging import os -log_file_path = os.path.expanduser("/var/log/outpost/outpostkit.log") +log_dir = "/tmp" +log_file_path = os.path.expanduser(f"{log_dir}/outpostkit.log") outpost_folder = os.path.dirname(log_file_path) + if not os.path.exists(outpost_folder): # Create the ~/.outpost folder if it doesn't exist os.makedirs(outpost_folder) @@ -11,6 +13,7 @@ def create_lfs_logger(name: str): _log = logging.getLogger(name) _log.handlers.clear() + _log.setLevel(10) file_handler = logging.FileHandler(log_file_path) file_handler.setFormatter( logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") From 24ac7b52fa38a10d97c9fab848140214f2d9ad2a Mon Sep 17 00:00:00 2001 From: Ajeya Bhat Date: Sun, 19 May 2024 13:05:02 +0530 Subject: [PATCH 56/57] lfs logger --- README.md | 2 +- outpostkit/__init__.py | 2 +- outpostkit/client.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 27ce8c9..f2ffa69 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ export OUTPOST_API_TOKEN= import outpostkit print(outpostkit.__version__) ``` -`0.0.68` +`0.0.69` ## Create a client diff --git a/outpostkit/__init__.py b/outpostkit/__init__.py index 564d85c..72feaab 100644 --- a/outpostkit/__init__.py +++ b/outpostkit/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.68" +__version__ = "0.0.69" from outpostkit.client import Client as Client from outpostkit.endpoints import Endpoint as Endpoint from outpostkit.endpoints import Endpoints as Endpoints diff --git a/outpostkit/client.py b/outpostkit/client.py index 1360394..87ae4ee 100644 --- a/outpostkit/client.py +++ b/outpostkit/client.py @@ -225,7 +225,7 @@ def _build_httpx_client( **kwargs, ) -> Union[httpx.Client, httpx.AsyncClient]: headers = { - "User-Agent": "outpost-python/0.0.68", + "User-Agent": "outpost-python/0.0.69", } if ( diff --git a/pyproject.toml b/pyproject.toml index 32102a6..e843779 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "outpostkit" -version = "0.0.68" +version = "0.0.69" description = "Python client for Outpost" readme = "README.md" license = { file = "LICENSE" } From c40e078e8a634ecd5de0f55c3fad5cdd90b5a706 Mon Sep 17 00:00:00 2001 From: Shubham Kaushal Date: Mon, 10 Jun 2024 16:36:23 +0530 Subject: [PATCH 57/57] merge main --- outpostkit/repository/lfs/client.py | 1 - outpostkit/repository/lfs/transfer.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/outpostkit/repository/lfs/client.py b/outpostkit/repository/lfs/client.py index 50a441d..fd637fb 100644 --- a/outpostkit/repository/lfs/client.py +++ b/outpostkit/repository/lfs/client.py @@ -13,7 +13,6 @@ FILE_READ_BUFFER_SIZE = 4 * 1024 * 1000 # 4mb, why not - _log = create_lfs_logger(__name__) diff --git a/outpostkit/repository/lfs/transfer.py b/outpostkit/repository/lfs/transfer.py index 5bce4b7..442b758 100644 --- a/outpostkit/repository/lfs/transfer.py +++ b/outpostkit/repository/lfs/transfer.py @@ -1,4 +1,3 @@ - import base64 import hashlib from typing import Any, BinaryIO, Callable, Dict, Optional, Union @@ -182,7 +181,9 @@ def calculate_digest_header(data: bytes, want_digest: str) -> Dict[str, str]: # type: (bytes, str) -> Dict[str, str] """TODO: Properly implement this""" if want_digest == "contentMD5": - digest = base64.b64encode(hashlib.md5(data).digest()).decode("ascii") # type: str + digest = base64.b64encode(hashlib.md5(data).digest()).decode( + "ascii" + ) # type: str return {"Content-MD5": digest} else: raise RuntimeError(f"Don't know how to handle want_digest value: {want_digest}")