Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RayCluster][Feature] add redis username to head pod from GcsFaultToleranceOptions #2760

Merged
merged 3 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion ray-operator/controllers/ray/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func configureGCSFaultTolerance(podTemplate *corev1.PodTemplateSpec, instance ra
container.Env = append(container.Env, gcsTimeout)
}

// Configure the Redis address and password for GCS FT.
// Configure the Redis address, username and password for GCS FT.
if rayNodeType == rayv1.HeadNode {
// Configure the external storage namespace for GCS FT.
storageNS := string(instance.UID)
Expand All @@ -125,6 +125,18 @@ func configureGCSFaultTolerance(podTemplate *corev1.PodTemplateSpec, instance ra
Name: utils.RAY_REDIS_ADDRESS,
Value: options.RedisAddress,
})
if options.RedisUsername != nil {
// Note that `redis-username` will be supported starting from Ray 2.41.
// If `GcsFaultToleranceOptions.RedisUsername` is set, it will be put into the
// `REDIS_USERNAME` environment variable later. Here, we use `$REDIS_USERNAME` in
// rayStartParams to refer to the environment variable.
win5923 marked this conversation as resolved.
Show resolved Hide resolved
instance.Spec.HeadGroupSpec.RayStartParams["redis-username"] = "$REDIS_USERNAME"
container.Env = append(container.Env, corev1.EnvVar{
Name: utils.REDIS_USERNAME,
Value: options.RedisUsername.Value,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think only one of Value or ValurFrom can be set, not both

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(same for redis password)

ValueFrom: options.RedisUsername.ValueFrom,
})
}
if options.RedisPassword != nil {
// If `GcsFaultToleranceOptions.RedisPassword` is set, it will be put into the
// `REDIS_PASSWORD` environment variable later. Here, we use `$REDIS_PASSWORD` in
Expand Down
83 changes: 82 additions & 1 deletion ray-operator/controllers/ray/common/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,9 @@ func TestConfigureGCSFaultToleranceWithAnnotations(t *testing.T) {
tests := []struct {
name string
storageNS string
redisUsernameEnv string
redisPasswordEnv string
redisUsernameRayStartParams string
redisPasswordRayStartParams string
isHeadPod bool
gcsFTEnabled bool
Expand All @@ -334,12 +336,26 @@ func TestConfigureGCSFaultToleranceWithAnnotations(t *testing.T) {
redisPasswordEnv: "test-password",
isHeadPod: true,
},
{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add tests in TestConfigureGCSFaultToleranceWithGcsFTOptions too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

name: "GCS FT enabled with redis username and password env",
gcsFTEnabled: true,
redisUsernameEnv: "test-username",
redisPasswordEnv: "test-password",
isHeadPod: true,
},
{
name: "GCS FT enabled with redis password ray start params",
gcsFTEnabled: true,
redisPasswordRayStartParams: "test-password",
isHeadPod: true,
},
{
name: "GCS FT enabled with redis username and password ray start params",
gcsFTEnabled: true,
redisUsernameRayStartParams: "test-username",
redisPasswordRayStartParams: "test-password",
isHeadPod: true,
},
{
// The most common case.
name: "GCS FT enabled with redis password env and ray start params referring to env",
Expand All @@ -348,6 +364,15 @@ func TestConfigureGCSFaultToleranceWithAnnotations(t *testing.T) {
redisPasswordRayStartParams: "$REDIS_PASSWORD",
isHeadPod: false,
},
{
name: "GCS FT enabled with redis username and password env and ray start params referring to env",
gcsFTEnabled: true,
redisUsernameEnv: "test-username",
redisUsernameRayStartParams: "$REDIS_USERNAME",
redisPasswordEnv: "test-password",
redisPasswordRayStartParams: "$REDIS_PASSWORD",
isHeadPod: false,
},
{
name: "GCS FT enabled / worker Pod",
gcsFTEnabled: true,
Expand All @@ -363,6 +388,9 @@ func TestConfigureGCSFaultToleranceWithAnnotations(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// Validate the test input
if test.redisUsernameEnv != "" && test.redisUsernameRayStartParams != "" {
assert.True(t, test.redisUsernameRayStartParams == "$REDIS_USERNAME")
}
if test.redisPasswordEnv != "" && test.redisPasswordRayStartParams != "" {
assert.True(t, test.redisPasswordRayStartParams == "$REDIS_PASSWORD")
}
Expand Down Expand Up @@ -410,12 +438,21 @@ func TestConfigureGCSFaultToleranceWithAnnotations(t *testing.T) {
if test.storageNS != "" {
cluster.Annotations[utils.RayExternalStorageNSAnnotationKey] = test.storageNS
}
if test.redisUsernameEnv != "" {
cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env = append(cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env, corev1.EnvVar{
Name: utils.REDIS_USERNAME,
Value: test.redisUsernameEnv,
})
}
if test.redisPasswordEnv != "" {
cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env = append(cluster.Spec.HeadGroupSpec.Template.Spec.Containers[utils.RayContainerIndex].Env, corev1.EnvVar{
Name: utils.REDIS_PASSWORD,
Value: test.redisPasswordEnv,
})
}
if test.redisUsernameRayStartParams != "" {
cluster.Spec.HeadGroupSpec.RayStartParams["redis-username"] = test.redisUsernameRayStartParams
}
if test.redisPasswordRayStartParams != "" {
cluster.Spec.HeadGroupSpec.RayStartParams["redis-password"] = test.redisPasswordRayStartParams
}
Expand All @@ -440,6 +477,10 @@ func TestConfigureGCSFaultToleranceWithAnnotations(t *testing.T) {
assert.Equal(t, podTemplate.Annotations[utils.RayExternalStorageNSAnnotationKey], test.storageNS)
assert.True(t, utils.EnvVarExists(utils.RAY_EXTERNAL_STORAGE_NS, container.Env))
}
if test.redisUsernameEnv != "" {
env := getEnvVar(container, utils.REDIS_USERNAME)
assert.Equal(t, env.Value, test.redisUsernameEnv)
}
if test.redisPasswordEnv != "" {
env := getEnvVar(container, utils.REDIS_PASSWORD)
assert.Equal(t, env.Value, test.redisPasswordEnv)
Expand Down Expand Up @@ -477,6 +518,19 @@ func TestConfigureGCSFaultToleranceWithGcsFTOptions(t *testing.T) {
},
isHeadPod: true,
},
{
name: "GCS FT enabled with redis username and password",
gcsFTOptions: &rayv1.GcsFaultToleranceOptions{
RedisAddress: "redis:6379",
RedisUsername: &rayv1.RedisCredential{
Value: "test-username",
},
RedisPassword: &rayv1.RedisCredential{
Value: "test-password",
},
},
isHeadPod: true,
},
{
name: "GCS FT enabled with redis password in secret",
gcsFTOptions: &rayv1.GcsFaultToleranceOptions{
Expand All @@ -492,7 +546,28 @@ func TestConfigureGCSFaultToleranceWithGcsFTOptions(t *testing.T) {
isHeadPod: true,
},
{
name: "GCS FT enabled with redis password in secret",
name: "GCS FT enabled with redis username and password in secret",
gcsFTOptions: &rayv1.GcsFaultToleranceOptions{
RedisAddress: "redis:6379",
RedisUsername: &rayv1.RedisCredential{
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "spec.redisUsername",
},
},
},
RedisPassword: &rayv1.RedisCredential{
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "spec.redisPassword",
},
},
},
},
isHeadPod: true,
},
{
name: "GCS FT enabled with redis external storage namespace",
gcsFTOptions: &rayv1.GcsFaultToleranceOptions{
RedisAddress: "redis:6379",
ExternalStorageNamespace: "test-ns",
Expand Down Expand Up @@ -552,6 +627,12 @@ func TestConfigureGCSFaultToleranceWithGcsFTOptions(t *testing.T) {
env := getEnvVar(container, utils.RAY_REDIS_ADDRESS)
assert.Equal(t, env.Value, "redis:6379")

if test.gcsFTOptions.RedisUsername != nil {
env := getEnvVar(container, utils.REDIS_USERNAME)
assert.Equal(t, env.Value, test.gcsFTOptions.RedisUsername.Value)
assert.Equal(t, env.ValueFrom, test.gcsFTOptions.RedisUsername.ValueFrom)
}

if test.gcsFTOptions.RedisPassword != nil {
env := getEnvVar(container, utils.REDIS_PASSWORD)
assert.Equal(t, env.Value, test.gcsFTOptions.RedisPassword.Value)
Expand Down
8 changes: 6 additions & 2 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1234,8 +1234,12 @@ func (r *RayClusterReconciler) buildRedisCleanupJob(ctx context.Context, instanc
"import sys; " +
"redis_address = os.getenv('RAY_REDIS_ADDRESS', '').split(',')[0]; " +
"redis_address = redis_address if '://' in redis_address else 'redis://' + redis_address; " +
"parsed = urlparse(redis_address); " +
"sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password or ''), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"",
"parsed = urlparse(redis_address); ",
}
if utils.EnvVarExists(utils.REDIS_USERNAME, pod.Spec.Containers[utils.RayContainerIndex].Env) {
win5923 marked this conversation as resolved.
Show resolved Hide resolved
pod.Spec.Containers[utils.RayContainerIndex].Args[0] += "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, username=os.getenv('REDIS_USERNAME', parsed.username), password=os.getenv('REDIS_PASSWORD', parsed.password or ''), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\""
} else {
pod.Spec.Containers[utils.RayContainerIndex].Args[0] += "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password or ''), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\""
}

// Disable liveness and readiness probes because the Job will not launch processes like Raylet and GCS.
Expand Down
1 change: 1 addition & 0 deletions ray-operator/controllers/ray/utils/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ const (
RAY_ADDRESS = "RAY_ADDRESS"
RAY_REDIS_ADDRESS = "RAY_REDIS_ADDRESS"
REDIS_PASSWORD = "REDIS_PASSWORD"
REDIS_USERNAME = "REDIS_USERNAME"
RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE = "RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE"
RAY_EXTERNAL_STORAGE_NS = "RAY_external_storage_namespace"
RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S = "RAY_gcs_rpc_server_reconnect_timeout_s"
Expand Down
Loading