Code envs#

The API offers methods to:

  • Create code envs

  • Read and write settings and packages of code envs

  • Update code envs

  • Reinstall

  • Set code environment resources environment variables

Creating a code env#

Python 3.6 code env, with Jupyter support#

client = dataiku.api_client()

# Create the code env
code_env = client.create_code_env("PYTHON", "my_code_env_name", "DESIGN_MANAGED", {"pythonInterpreter": "PYTHON36"})

# Setup packages to install
definition = code_env.get_definition()
definition["desc"]["installCorePackages"] = True
definition["desc"]["installJupyterSupport"] = True

# We want to install 2 packages (tabulate and nameparser)
definition["specPackageList"] = "tabulate\nnameparser"

# Save the new settings
code_env.set_definition(definition)

# Actually perform the installation
code_env.update_packages()
code_env.set_jupyter_support(True)

Python 2.7 code env, without Jupyter support#

client = dataiku.api_client()

# Create the code env
code_env = client.create_code_env("PYTHON", "my_code_env_name", "DESIGN_MANAGED")

# Setup packages to install
definition = code_env.get_definition()
definition["desc"]["installCorePackages"] = True

# We want to install 2 packages (tabulate and nameparser)
definition["specPackageList"] = "tabulate\nnameparser"

# Save the new settings
code_env.set_definition(definition)

# Actually perform the installation
code_env.update_packages()

Managing the code environment resources directory environment variables#

These methods may only be called from a resource initialization script. See Managed code environment resources directory.

from dataiku.code_env_resources import clear_all_env_vars
from dataiku.code_env_resources import delete_env_var
from dataiku.code_env_resources import get_env_var
from dataiku.code_env_resources import set_env_var
from dataiku.code_env_resources import set_env_path

# Delete all environment variables from the code environment runtime
clear_all_env_vars()

# Set a raw environment variable for the code environment runtime
set_env_var("ENV_VAR", "42")

# Set a relative path environment variable to be loaded at runtime
# (relative path with respect to the code env resources directory)
set_env_path("TFHUB_CACHE_DIR", "tensorflow")

# Get an environment variable from the code environment runtime
print("TFHUB_CACHE_DIR:", get_env_var("TFHUB_CACHE_DIR"))

# Delete an environment variable from the code environment runtime
delete_env_var("ENV_VAR")

# Then download pre-trained models in the resources directory, e.g.
# for TensorFlow
# import tensorflow_hub
# tensorflow_hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/4")

(Advanced) The method dataiku.code_env_resources.fetch_from_backend allows to fetch specific resources files or folders from the backend, when running in containerized execution. It is meant to be called in a python recipe/notebook, when the resources were not already copied or initialized for containerized execution at build time (see Code environment resources directory).

from dataiku.code_env_resources import fetch_from_backend

# Fetch resources files and folders from the backend
fetch_from_backend([
    "pytorch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
    "huggingface/",
])

# Load pre-trained models as usual

Detailed examples#

Get Recipes using specific Code Environments#

When editing a Code Environment you may want to assess which Code Recipe is using that environment and thus could be affected by the changes. The following code snippet allows you to get such a mapping:

import dataiku


def get_instance_default_code_env(client):
    """Return the global default code envs (instance-level).
    """

    defaults = {}
    general_settings = client.get_general_settings()
    for rcp_type in [("python", "defaultPythonEnv"), ("r", "defaultREnv")]:
        code_env = general_settings.settings["codeEnvs"].get(rcp_type[1], None)
        if code_env:
            defaults[rcp_type[0]] = code_env
        else:
            defaults[rcp_type[0]] = "dss_builtin"
    return defaults
        

def get_code_env_mapping(client, project):
    """Return a dict mapping code-based items with their code envs.
    """

    rcp_types = ["python", "r"]
    mapping = {"python": [], "r": []}

    env_default = {}
    settings = project.get_settings()
    project_default_modes = settings.get_raw()["settings"]["codeEnvs"]
    all_recipes = project.list_recipes()
    for rcp_type in rcp_types:
        if project_default_modes[rcp_type]["mode"] == "USE_BUILTIN_MODE":
            env_default[rcp_type] = "dss_builtin"
        if project_default_modes[rcp_type]["mode"] == "INHERIT":
            env_default[rcp_type] = get_instance_default_code_env(client).get(rcp_type)
        if project_default_modes[rcp_type]["mode"] == "EXPLICIT_ENV":
            env_default[rcp_type] = project_default_modes[rcp_type]["envName"]
        recipes = [r for r in all_recipes if r["type"] == rcp_type]
        for r in recipes:
            name = r["name"]
            env_select = r["params"]["envSelection"]
            if env_select["envMode"] == "EXPLICIT_ENV":
                code_env = env_select["envName"]
            else:
                code_env = env_default[rcp_type]
            mapping[rcp_type].append({"name": name, "code_env": code_env})
    return mapping

client = dataiku.api_client()
project = client.get_default_project()
mapping = get_code_env_mapping(client, project)
print(mapping)

Reference documentation#

dataikuapi.dss.admin.DSSCodeEnv(client, ...)

A code env on the DSS instance.

dataikuapi.dss.admin.DSSDesignCodeEnvSettings(...)

Base settings class for a DSS code env on a design node.

dataikuapi.dss.admin.DSSAutomationCodeEnvSettings(...)

Base settings class for a DSS code env on an automation node.

dataikuapi.dss.admin.DSSAutomationCodeEnvVersionSettings(...)

Base settings class for a DSS code env version on an automation node.