diff --git a/Garnet.sln b/Garnet.sln index 668e5f3133..e0cffb087b 100644 --- a/Garnet.sln +++ b/Garnet.sln @@ -1,4 +1,4 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 +Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.0.31808.319 MinimumVisualStudioVersion = 10.0.40219.1 @@ -111,6 +111,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Garnet.resources", "libs\re EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NoOpModule", "playground\NoOpModule\NoOpModule.csproj", "{D4C9A1A0-7053-F072-21F5-4E0C5827136D}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ETag", "samples\ETag\ETag.csproj", "{4FBA1587-BAFC-49F8-803A-D1CF431A26F5}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -335,6 +337,14 @@ Global {D4C9A1A0-7053-F072-21F5-4E0C5827136D}.Release|Any CPU.Build.0 = Release|Any CPU {D4C9A1A0-7053-F072-21F5-4E0C5827136D}.Release|x64.ActiveCfg = Release|Any CPU {D4C9A1A0-7053-F072-21F5-4E0C5827136D}.Release|x64.Build.0 = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|x64.ActiveCfg = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|x64.Build.0 = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|Any CPU.Build.0 = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|x64.ActiveCfg = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -370,6 +380,7 @@ Global {DF2DD03E-87EE-482A-9FBA-6C8FBC23BDC5} = {697766CD-2046-46D9-958A-0FD3B46C98D4} {A48412B4-FD60-467E-A5D9-F155CAB4F907} = {147FCE31-EC09-4C90-8E4D-37CA87ED18C3} {D4C9A1A0-7053-F072-21F5-4E0C5827136D} = {69A71E2C-00E3-42F3-854E-BE157A24834E} + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5} = {7068BB97-1958-4060-B5F1-859464592E56} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {2C02C405-4798-41CA-AF98-61EDFEF6772E} diff --git a/samples/ETag/Caching.cs b/samples/ETag/Caching.cs new file mode 100644 index 0000000000..3009324e7a --- /dev/null +++ b/samples/ETag/Caching.cs @@ -0,0 +1,192 @@ +using System; +using System.Collections.Generic; +using System.Data.Common; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; +using StackExchange.Redis; + +namespace ETag; + +public class Caching +{ + /* + The whole idea of using ETag based commands for caching purposes is to reduce network utilization by only sending and recieving + what is needed over the network. + + Scenario: + We are in an application, cache, and database setup. + In the read path the application always attempts to read from the cache and based on a hit or a miss it reaches into the database. + In the write path the application may use "write-through" or "write-back" to internally update the cache. The write path will be simulated in it's own thread. + The read path will be interacted with via a REPL. + + Anytime the client stores a local "copy" of data that may exist on the cache, it will first make a call to the Cache and based on a hit or miss it will reach into the database. + Everything till now describes your commonly used caching use-case. + + ETags help further speed up your Cache-Hit use case. When the client uses the Garnet GETIFNOTMATCH command they send their current ETag and only incur the extra network bandwidth + of recieving the entire payload when their local version is different from what is there on the server. With large payloads this can help reduce latency in your cache-hit route. + */ + + public static async Task RunSimulation() + { + // LocalApplicationState represents the data that you keep within your Client's reach + Dictionary localApplicationState = new Dictionary(); + + Console.WriteLine("Seeding server and local state..."); + await SeedCache(localApplicationState); + + Console.WriteLine("Booting up fake server threads..."); + // run fake server threads in the background that invalidates entries in the cache and changes things either in write-through or write-back manner + CancellationTokenSource cts = new CancellationTokenSource(); + Task srvThread1 = Task.Run(() => FakeServerThread(cts.Token), cts.Token); + Task srvThread2 = Task.Run(() => FakeServerThread(cts.Token), cts.Token); + Task srvThread3 = Task.Run(() => FakeServerThread(cts.Token), cts.Token); + + // Run interactive repl (application) + await InteractiveRepl(localApplicationState); + + cts.Cancel(); + try + { + await Task.WhenAll(srvThread1, srvThread2, srvThread3); + } + catch (OperationCanceledException) + { + Console.WriteLine("Server threads killed."); + } + } + + static async Task InteractiveRepl(Dictionary localApplicationState) + { + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + while (true) + { + Console.WriteLine("Enter a review ID (0-19) to fetch or type 'exit' to quit:"); + string input = Console.ReadLine()!; + + if (input.ToLower() == "exit") + { + break; + } + + if (int.TryParse(input, out int reviewId) && reviewId >= 0 && reviewId <= 19) + { + var (existingEtag, existingItem) = localApplicationState[reviewId]; + var (etag, movieReview) = await ETagAbstractions.GetIfNotMatch(db, reviewId.ToString(), existingEtag, existingItem); + + if (movieReview != null) + { + // update local application state/in-memory cache + localApplicationState[reviewId] = (etag, movieReview); + Console.WriteLine($"Movie Name: {movieReview.MovieName}"); + Console.WriteLine($"Reviewer Name: {movieReview.ReviewerName}"); + Console.WriteLine($"Rating: {movieReview.Rating}"); + Console.WriteLine($"Review: {movieReview.Review.Substring(0, 50)}..."); + } + else + { + Console.WriteLine("Review not found."); + } + } + else + { + Console.WriteLine("Invalid input. Please enter a number between 0 and 19."); + } + } + } + + static async Task SeedCache(Dictionary localApplicationState) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + // Add a bunch of things with sufficiently large payloads into your cache, the maximum size of your values depends on your pagesize config on Garnet + for (int i = 0; i < 20; i++) + { + string key = i.ToString(); + MovieReview movieReview = MovieReview.CreateRandomReview(random); + string value = JsonSerializer.Serialize(movieReview); + long etag = (long)await db.ExecuteAsync("SET", key, value, "WITHETAG"); + localApplicationState.Add(i, (etag, movieReview)); + Console.WriteLine($"Seeded {i}"); + } + } + + static async Task FakeServerThread(CancellationToken token) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + // Run a loop where you are updating the items every now and then + while (true) + { + token.ThrowIfCancellationRequested(); + // choose a random number [0 - 19] aka review ID in our database + // change the review and rating for it + string serverToMessWith = random.Next(19).ToString(); + var (etag, movieReview) = await ETagAbstractions.GetWithEtag(db, serverToMessWith); + await ETagAbstractions.PerformLockFreeSafeUpdate(db, serverToMessWith, etag, movieReview!, + (moviewReview) => + { + // the application server decides to reduce or increase the moview review rating + moviewReview.Review += random.Next(-2, 2); + }); + + // sleep anywhere from 10-60 seconds + await Task.Delay(TimeSpan.FromSeconds(random.Next(10, 60))); + } + } + + static string GarnetConnectionStr = "localhost:6379,connectTimeout=999999,syncTimeout=999999"; +} + +class MovieReview +{ + [JsonPropertyName("movie_name")] + public required string MovieName { get; set; } + + [JsonPropertyName("reviewer_name")] + public required string ReviewerName { get; set; } + + [JsonPropertyName("rating")] + public required int Rating { get; set; } + + [JsonPropertyName("review")] + public required string Review { get; set; } + + public static MovieReview CreateRandomReview(Random random) + { + var movieName = $"{CommonWords[random.Next(CommonWords.Length)]} {CommonWords[random.Next(CommonWords.Length)]}"; + var reviewerName = $"{CommonWords[random.Next(CommonWords.Length)]} {CommonWords[random.Next(CommonWords.Length)]}"; + var rating = random.Next(0, 101); + var review = GenerateLargeLoremIpsumText(1 * 1024 * 1024); // 1MB of text + + return new MovieReview + { + MovieName = movieName, + ReviewerName = reviewerName, + Rating = rating, + Review = review + }; + } + + private static string GenerateLargeLoremIpsumText(int sizeInBytes) + { + const string loremIpsum = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "; + var stringBuilder = new StringBuilder(); + + while (Encoding.UTF8.GetByteCount(stringBuilder.ToString()) < sizeInBytes) + { + stringBuilder.Append(loremIpsum); + } + + return stringBuilder.ToString(); + } + + private static readonly string[] CommonWords = ["The", "Amazing", "Incredible", "Fantastic", "Journey", "Adventure", "Mystery", "Legend", "Quest", "Saga", "John", "Jane", "Smith", "Doe", "Alice", "Bob", "Charlie", "David", "Eve", "Frank"]; +} \ No newline at end of file diff --git a/samples/ETag/ETag.csproj b/samples/ETag/ETag.csproj new file mode 100644 index 0000000000..597e77b7f6 --- /dev/null +++ b/samples/ETag/ETag.csproj @@ -0,0 +1,14 @@ + + + + Exe + net8.0 + disable + enable + + + + + + + \ No newline at end of file diff --git a/samples/ETag/EtagAbstractions.cs b/samples/ETag/EtagAbstractions.cs new file mode 100644 index 0000000000..15441f74db --- /dev/null +++ b/samples/ETag/EtagAbstractions.cs @@ -0,0 +1,108 @@ +using System; +using System.Text.Json; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using StackExchange.Redis; + +namespace ETag; + +public static class ETagAbstractions +{ + /// + /// Performs a lock-free update on an item in the database using a compare-and-swap mechanism. + /// + /// The type of the item to be updated. + /// The database instance where the item is stored. + /// The key identifying the item in the database. + /// The initial ETag value of the item. + /// The initial state of the item. + /// The action to perform on the item before updating it in the database. + /// A task that represents the asynchronous operation. The task result contains a tuple with the final ETag value and the updated item. + public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) + { + // Compare and Swap Updating + long etag = initialEtag; + T item = initialItem; + while (true) + { + // perform custom action, since item is updated to it's correct latest state by the server this action is performed exactly once on + // an item before it is finally updated on the server. + // NOTE: Based on your application's needs you can modify this method to update a pure function that returns a copy of the data and does not use mutations as side effects. + updateAction(item); + var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); + etag = newEtag; + if (!updatedSuccesful) + item = newItem!; + else + break; + } + + return (etag, item); + } + + /// + /// Retrieves an item from the database if the provided ETag does not match the existing ETag. + /// Saves the network badwidth usage for cases where we have the right state in-memory already + /// + /// The type of the item to be retrieved. + /// The database instance to execute the command on. + /// The key of the item to be retrieved. + /// The existing ETag to compare against. + /// + /// A tuple containing the new ETag and the item if the ETag does not match; otherwise, a tuple with -1 and the default value of T. + /// + public static async Task<(long, T?)> GetIfNotMatch(IDatabase db, string key, long existingEtag, T existingItem, ILogger? logger = null) + { + RedisResult res = await db.ExecuteAsync("GETIFNOTMATCH", key, existingEtag); + if (res.IsNull) + return (-1, default); + + long etag = (long)res[0]; + + if (res[1].IsNull) + { + logger?.LogInformation("Network overhead saved, what we have is already good."); + return (etag, existingItem); + } + + logger?.LogInformation("Network overhead incurred, entire item retrieved over network."); + T item = JsonSerializer.Deserialize((string)res[1]!)!; + return (etag, item); + } + + /// + /// Retrieves an item from the database along with its ETag. + /// + /// The type of the item to retrieve. + /// The database instance to query. + /// The key of the item to retrieve. + /// + /// A tuple containing the ETag as a long and the item casted to type T. + /// If the database call returns null, the ETag will be -1 and the item will be null. + /// + public static async Task<(long, T?)> GetWithEtag(IDatabase db, string key) + { + var executeResult = await db.ExecuteAsync("GETWITHETAG", key); + // If key is not found we get null + if (executeResult.IsNull) + return (-1, default(T)); + + RedisResult[] result = (RedisResult[])executeResult!; + long etag = (long)result[0]; + T item = JsonSerializer.Deserialize((string)result[1]!)!; + return (etag, item); + } + + private static async Task<(bool updated, long etag, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) + { + string serializedItem = JsonSerializer.Serialize(value); + RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; + // successful update does not return updated value so we can just return what was passed for value. + if (res[1].IsNull) + return (true, (long)res[0], value); + + T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; + + return (false, (long)res[0], deserializedItem); + } +} \ No newline at end of file diff --git a/samples/ETag/OccSimulation.cs b/samples/ETag/OccSimulation.cs new file mode 100644 index 0000000000..2e71f09184 --- /dev/null +++ b/samples/ETag/OccSimulation.cs @@ -0,0 +1,220 @@ +using System; +using System.Collections.Generic; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; +using StackExchange.Redis; + +namespace ETag +{ + /* + This code sample shows how to use ETags to implement lock-free synchronization for non-atomic operations + + OCC is like using a CAS loop to make sure the data we are writing has not had a change in between the time + we have read it and written back. + + Scenario 1: Lock free Json manipulation, we are using JSON as our value but this could essentially be + any data that you falls in the below category that is not provided by the objects API of Garnet. + + Granular Data Structure: Refers to data that is divided into small, independent parts that can be manipulated individually. For example, MongoDB documents allow granular updates on individual fields. + + Mutable Object: If the object allows you to modify its individual components without recreating the entire object, it’s referred to as mutable. For example, Python dictionaries and lists are mutable. + + Partial Updatable Data: This term is used in contexts like databases where updates can target specific fields without affecting the entire record. + + Modular Data Structure: If the object is designed to have independent, self-contained modules (like classes or subcomponents), you might describe it as modular. + + Composable Data: This term applies when different parts of the data can be independently composed, used, or updated, often seen in functional programming. + + Hierarchical Data Structure: Refers to objects with nested components, like JSON or XML, where parts of the hierarchy can be accessed and modified independently + + Simulation Description: + + We have 2 different clients that are updating the same value for a key, but different parts of it concurrently + We want to make sure we don't lose the updates between these 2 clients. + + Client 1: Updates the number of cats a user has + Client 2: Changes the flag for the user for whether or not the user has too many cats. + Client 2 only considers that a user has too many cats when the number of cats is divisible by 5, + otherwise it marks the user as false for not having too many cats + */ + class OccSimulation + { + public static async Task RunSimulation() + { + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + ContosoUserInfo userInfo = new ContosoUserInfo + { + FirstName = "Hamdaan", + LastName = "Khalid", + NumberOfCats = 1, + TooManyCats = true, + Basket = new List() + }; + + string userKey = "hkhalid"; + string serializedUserInfo = JsonSerializer.Serialize(userInfo); + + // Seed the item in the database + long initialEtag = (long)await db.ExecuteAsync("SET", userKey, serializedUserInfo, "WITHETAG"); + + // Cancellation token is used to exit program on end of interactive repl + var cts = new CancellationTokenSource(); + // Clone user info so they are task local + var client1Task = Task.Run(() => Client1(userKey, initialEtag, (ContosoUserInfo)userInfo.Clone(), cts.Token)); + var client2Task = Task.Run(() => Client2(userKey, initialEtag, (ContosoUserInfo)userInfo.Clone(), cts.Token)); + + // Interactive REPL to change any property in the ContosoUserInfo + while (true) + { + Console.WriteLine("Enter the property to change (FirstName, LastName, NumberOfCats, TooManyCats, AddToBasket, RemoveFromBasket) or 'exit' to quit:"); + Console.WriteLine($"Initial User Info: {JsonSerializer.Serialize(userInfo)}"); + string input = Console.ReadLine()!; + + if (input.ToLower() == "exit") + { + cts.Cancel(); + break; + } + + Action userUpdateAction = (userInfo) => { }; + switch (input) + { + case "FirstName": + Console.WriteLine("Enter new FirstName:"); + string newFirstName = Console.ReadLine()!; + userUpdateAction = (info) => info.FirstName = newFirstName; + break; + case "LastName": + Console.WriteLine("Enter new LastName:"); + string newLastName = Console.ReadLine()!; + userUpdateAction = (info) => info.FirstName = newLastName; + break; + case "NumberOfCats": + Console.WriteLine("Enter new NumberOfCats:"); + if (int.TryParse(Console.ReadLine(), out int numberOfCats)) + { + userUpdateAction = (info) => info.NumberOfCats = numberOfCats; + } + else + { + Console.WriteLine("Invalid number."); + } + break; + case "TooManyCats": + Console.WriteLine("Enter new TooManyCats (true/false):"); + if (bool.TryParse(Console.ReadLine(), out bool tooManyCats)) + { + userUpdateAction = (info) => info.TooManyCats = tooManyCats; + } + else + { + Console.WriteLine("Invalid boolean."); + } + break; + case "AddToBasket": + Console.WriteLine("Enter item to add to basket:"); + string addItem = Console.ReadLine()!; + userUpdateAction = (info) => info.Basket.Add(addItem); + break; + case "RemoveFromBasket": + Console.WriteLine("Enter item to remove from basket:"); + string removeItem = Console.ReadLine()!; + userUpdateAction = (info) => info.Basket.Remove(removeItem); + break; + default: + Console.WriteLine("Unknown property."); + break; + } + + // Update the user info in the database, and then for the REPL + (initialEtag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, initialEtag, userInfo, userUpdateAction); + Console.WriteLine($"Updated User Info: {JsonSerializer.Serialize(userInfo)}"); + } + + cts.Cancel(); + + try + { + await Task.WhenAll(client1Task, client2Task); + } + catch (OperationCanceledException) + { + Console.WriteLine("All clients killed."); + } + } + + static async Task Client1(string userKey, long initialEtag, ContosoUserInfo initialUserInfo, CancellationToken token) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + long etag = initialEtag; + ContosoUserInfo userInfo = initialUserInfo; + while (true) + { + token.ThrowIfCancellationRequested(); + (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => + { + userInfo.NumberOfCats++; + }); + await Task.Delay(TimeSpan.FromSeconds(random.Next(0, 15)), token); + } + } + + static async Task Client2(string userKey, long initialEtag, ContosoUserInfo initialUserInfo, CancellationToken token) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + long etag = initialEtag; + ContosoUserInfo userInfo = initialUserInfo; + while (true) + { + token.ThrowIfCancellationRequested(); + (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => + { + userInfo.TooManyCats = userInfo.NumberOfCats % 5 == 0; + }); + await Task.Delay(TimeSpan.FromSeconds(random.Next(0, 15)), token); + } + } + + static string GarnetConnectionStr = "localhost:6379,connectTimeout=999999,syncTimeout=999999"; + } + + class ContosoUserInfo : ICloneable + { + [JsonPropertyName("first_name")] + public required string FirstName { get; set; } + + [JsonPropertyName("last_name")] + public required string LastName { get; set; } + + [JsonPropertyName("number_of_cats")] + public required int NumberOfCats { get; set; } + + [JsonPropertyName("too_many_cats")] + public required bool TooManyCats { get; set; } + + [JsonPropertyName("basket")] + public required List Basket { get; set; } + + public object Clone() + { + return new ContosoUserInfo + { + FirstName = this.FirstName, + LastName = this.LastName, + NumberOfCats = this.NumberOfCats, + TooManyCats = this.TooManyCats, + Basket = new List(this.Basket) + }; + } + } +} \ No newline at end of file diff --git a/samples/ETag/Program.cs b/samples/ETag/Program.cs new file mode 100644 index 0000000000..e34c6502e1 --- /dev/null +++ b/samples/ETag/Program.cs @@ -0,0 +1,14 @@ +using System.Threading.Tasks; + +namespace ETag; + +class Program +{ + static async Task Main(string[] args) + { + // Uncomment whichever example you want to run + + await OccSimulation.RunSimulation(); + // await Caching.RunSimulation(); + } +} \ No newline at end of file diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md new file mode 100644 index 0000000000..3a1c859385 --- /dev/null +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -0,0 +1,260 @@ +--- +slug: etags-when-and-how +title: ETags, When and How +authors: [hkhalid, badrishc] +tags: [garnet, concurrency, caching, lock-free, etags] +--- + +**Garnet recently announced native support for ETag-based commands.** +ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control**, **more efficient network bandwidth utilization for caching**, and **keeping your cache consistent with the database.** + +Currently, Garnet provides ETags as raw strings. This feature is available without requiring any migration, allowing your existing key-value pairs to start leveraging ETags immediately without impacting performance metrics. +You can find the [ETag API documentation here](/docs/commands/garnet-specific-commands#native-etag-support). + +This article explores when and how you can use this new Garnet feature for both your current and future applications. + + + +--- + +## Why Read This Article? +If you're looking to: + +1. **Keep your cache consistent with your Database for keys with contention** +2. **Reduce network bandwidth utilization for caching.** +3. **Avoid the cost of transactions when working with non-atomic values in your cache store.** + +We'll cover these scenarios case by case. + + +--- + +## Keeping Cache consistent state with Database for High Contention Keys with Concurrent Clients + +In a distributed environment it is common to have a cache being updated along with your main source of truth database. This is not a challenge when every client is interacting with keys with no contention. However, in cases where a key in your cache is being updated concurrently by multiple clients as they coordinate writing to the database and then updating the cache. In such a situation whichever client writes to the cache last decides the final state (last write wins), even if they were not the last client to update the database! + +To make the above scenario consider a cache-database setup where 2 clients are interacting with the pair. They both follow the same protocol where on writes they first update the database, and then update the cache. We will denote each client by c1 and c2, the request to update the datbase as D, and the request to update the cache as C. +All is good when the sequence of events is as follows: +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state) + c1->>cache: C (older state) + c2->>db: D (latest state) + c2->>cache: C (latest state) +``` + +If c1 finishes its database-cache update protocol strictly before c2 starts its protocol. Even the case of interleaving is not entirely bound to produce errors! In the below sequencing the cache will eventually be consistent with the database! + +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state) + c2->>db: D (latest state) + c1->>cache: C (older state) + c2->>cache: C (latest state) +``` + +However, in the case where between c1 updating the database and the cache, c2 happens to interleave such that it updates the database and the cache before c1, we hit a consistency issue! +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state) + c2->>db: D (latest state) + c2->>cache: C (latest state) + c1->>cache: C (older state) +``` +In the above sequencing you will see that c2 is the last write to the database but c1 has the last write to the cache. So the cache and datbase have gone out of sync. + +For handling such cases we can rely on the newly introduced ETag feature in Garnet to construct a logical clock around the updates that protect establishing cache consistency (*provided your database also supports ETags such as Cosmos DB). + +In such a scenario the client should use our `SETIFGREATER` API [here](/docs/commands/garnet-specific-commands#setifgreater), when interacting with the cache. `SETIFGREATER` sends a key-value pair along with an etag from the client, and only sets the value if the sent ETag is greater than what is set against the key-value pair already. + +Every client would now follow the following protocol: +- `SETIFGREATER` or `SETIFMATCH` [API](/docs/commands/garnet-specific-commands#setifmatch) based update to the source of truth first. +- Use the retrieved ETag from our previous call as an argument for SETIFGREATER to update the cache. + +If every client follows the above protocol. We can ensure that only the last/latest database write is reflected in the client as well. +The same sequencing of events but with the clients following our new updated protocol. +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state with etag 'x') + c2->>db: D (latest state with etag 'x+1') + c2->>cache: C (latest state with etag 'x+1') + c1->>cache: C (older state REJECTED because latest etag on server ('x+1' is higher than sent etag 'x') +``` + + +## Reducing Network Bandwidth Utilization for Caching + +Every network call incurs a cost: the amount of data transmitted and the distance over which it travels. In performance-sensitive scenarios, it's beneficial to fetch data only if it has changed in the cache, thereby reducing bandwidth usage and network latency. + +### Scenario: Cache Invalidation +Consider the following setup: + +#### High-Level Diagram +```mermaid +graph TD + S1[Server 1] -->|reads from cache| C[Cache] + S2[Server 2] -->|writes to cache, invalidating server 1's read| C +``` + +#### Sequence Diagram +```mermaid +sequenceDiagram + participant S1 as Server 1 + participant C as Cache + participant S2 as Server 2 + + S1->>C: initial read from cache for k1 + C-->>S1: Send Data and ETag + S2->>C: update value for k1 (invalidates k1) + S1->>C: second read to cache for k1 + C-->>S1: (What is sent back?) +``` + +In the absence of ETags, the entire payload for `k1` is returned on every read, regardless of whether the value associated with `k1` has changed. + +While this might not matter when transferring small payloads (e.g., 100 bytes of data within a high-bandwidth local network), it becomes significant when you have **multiple machines egressing larger payloads (e.g., 1MB each)** on a cloud provider. You pay the cost of egress, bandwidth usage, and experience delays due to the transmission of larger amounts of data. + +To address this, Garnet provides the `GETIFNOTMATCH` API [here](/docs/commands/garnet-specific-commands#getifnotmatch). +, allowing you to fetch data only if it has changed since your last retrieval. Server 1 can store the ETag received in the initial payload in application memory and use `GETIFNOTMATCH` to refresh the local copy only if the value has changed. + +This approach is particularly beneficial in read-heavy systems where data changes infrequently. However, for frequently updated keys, using the regular `GET` API may still be preferable, as updated data will always need to be transmitted. + +Take a look at the ETag caching sample to see the usage of the `GETIFNOTMATCH` API in action. + +--- + +## Avoiding Costly Transactions When Working with Non-Atomic Operations + +Databases with ACID compliance (ignoring the durability for this discussion) rely on synchronization mechanisms like locks to ensure isolation. Garnet employs **state-of-the-art transaction concurrency control** using two-phase locking. However, transactions in Garnet are not permitted during certain initial server states, such as when the checkpointing mechanism is active for durability. + +ETags offer an alternative to transactions when working with a single key for handling the update logic, enabling coordination between multiple clients without locking while ensuring no missed updates. + +### Scenario: Concurrent Updates to a Non-Atomic Value + +Imagine multiple clients concurrently modifying an XML document stored in Garnet. +For example: + +- Client 1 reads the XML, updates Field A, and writes it back. +- Client 2 reads the same XML, updates Field B, and writes it back concurrently. + +Without ETags, the following sequence of events might occur: + +1. Client 1 reads value `v0` for key `k1`. +2. Client 1 modifies Field A, creating a local copy `v1`. +3. Client 2 reads the same value `v0` before Client 1 writes `v1`. +4. Client 2 modifies Field B, creating another local copy `v2`. +5. Either Client 1 or Client 2 writes its version back to the server, potentially overwriting the other’s changes since `v1` and `v2` both don't have either's changes. + +This race condition results in lost updates. + +With ETags, you can use the `SETIFMATCH` API [here](/docs/commands/garnet-specific-commands#setifmatch) to implement a **compare-and-swap** mechanism that guarantees no updates are lost. The following code snippets demonstrate how this can be achieved. + +--- + +### Example Code + +```csharp +static async Task Client(string userKey) +{ + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + // Initially read the latest ETag + var res = await EtagAbstractions.GetWithEtag(userKey); + long etag = res.Item1; + ContosoUserInfo userInfo = res.Item2; + + while (true) + { + token.ThrowIfCancellationRequested(); + (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate( + db, userKey, etag, userInfo, (ContosoUserInfo info) => + { + info.TooManyCats = info.NumberOfCats % 5 == 0; + }); + + await Task.Delay(TimeSpan.FromSeconds(random.Next(0, 15)), token); + } +} +``` + +#### Supporting Methods + +```csharp +public static async Task<(long, T?)> GetWithEtag(IDatabase db, string key) +{ + var executeResult = await db.ExecuteAsync("GETWITHETAG", key); + if (executeResult.IsNull) return (-1, default(T)); + + RedisResult[] result = (RedisResult[])executeResult!; + long etag = (long)result[0]; + T item = JsonSerializer.Deserialize((string)result[1]!)!; + return (etag, item); +} + +public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) +{ + // Compare and Swap Updating + long etag = initialEtag; + T item = initialItem; + while (true) + { + // perform custom action, since item is updated to it's correct latest state by the server this action is performed exactly once on + // an item before it is finally updated on the server. + // NOTE: Based on your application's needs you can modify this method to update a pure function that returns a copy of the data and does not use mutations as side effects. + updateAction(item); + var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); + etag = newEtag; + if (!updatedSuccesful) + item = newItem!; + else + break; + } + + return (etag, item); +} + +private static async Task<(bool updated, long etag, T?)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) +{ + string serializedItem = JsonSerializer.Serialize(value); + RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; + // successful update does not return updated value so we can just return what was passed for value. + if (res[1].IsNull) + return (true, (long)res[0], value); + + T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; + + return (false, (long)res[0], deserializedItem); +} +``` + +Every read-(extra logic/modify)-write call starts by first reading the latest etag and value for a key using `GETWITHETAG` [here](/docs/commands/garnet-specific-commands#getwithetag), it then wraps it's update logic in a callback action and then calls the `PerformLockFreeSafeUpdate` method in `ETagAbstractions` to safely apply the update. + +Internally the `PerformLockFreeSafeUpdate` method runs a loop that retrieves the data that performs your update on the object and sends a `SETIFMATCH` request, the server only then updates the value if your ETag indicates that at the time of your decision you had performed your update on the latest copy of the data. If the server sees that between your read and write there were any updates the value, the server sends the latest copy of the data along with the updated etag, your client code then reapplies the changes on the latest copy and resends the request back to the server for the update, this form of update will guarantees that eventually all changes synchronize themselves on the server one after other. + +In a read-heavy system where contention is not high on the same key this update will be performed in the very first loop itself, and be easier to manage than having a custom transaction. However, in a heavy key contention scenario this could result in multiple attempts to write to the latest copy especially if the logic between your read and write is slow. + +--- + +ETags are more of a lower level primitives that you can use to build abstractions that let you build logical clocks, and lock free transactions tailored to your needs. If you find yourself in the above commonly found distributed scenarios, you now have another tool in your toolbag to help overcome your scaling needs. \ No newline at end of file diff --git a/website/blog/authors.yml b/website/blog/authors.yml index 49a0bbd4f6..a0e62117dc 100644 --- a/website/blog/authors.yml +++ b/website/blog/authors.yml @@ -4,3 +4,8 @@ badrishc: url: https://badrish.net image_url: https://badrish.net/assets/icons/badrish4.jpg +hkhalid: + name: Hamdaan Khalid + title: Software Engineer, Azure Resource Graph + url: https://hamdaan-rails-personal.herokuapp.com/ + image_url: https://media.licdn.com/dms/image/v2/D5603AQEB5k6B-_kYcg/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1713142460509?e=1743033600&v=beta&t=efEhRJq1SLgi09uCSUQJN3ssq-_cwljG0ysUc54GcSc diff --git a/website/docs/commands/garnet-specific.md b/website/docs/commands/garnet-specific.md index 699db52412..e22664f6b8 100644 --- a/website/docs/commands/garnet-specific.md +++ b/website/docs/commands/garnet-specific.md @@ -147,8 +147,9 @@ for details. Garnet provides support for ETags on raw strings. By using the ETag-related commands outlined below, you can associate any **string based key-value pair** inserted into Garnet with an automatically updated ETag. Compatibility with non-ETag commands and the behavior of data inserted with ETags are detailed at the end of this document. +To initialize a key value pair with an ETag you can use either the SET command with the newly added "WITHETAG" optional flag, or you can take any existing Key value pair and call SETIFMATCH with the ETag argument as 0 (Any key value pair without an explicit ETag has an ETag of 0 implicitly). Read more about Etag use cases and patterns [here](../../blog/etags-when-and-how) + -To initialize a key value pair with an ETag you can use either the SET command with the newly added "WITHETAG" optional flag, or you can take any existing Key value pair and call SETIFMATCH with the ETag argument as 0 (Any key value pair without an explicit ETag has an ETag of 0 implicitly). **You can read more about setting an initial ETag via SET [here](../commands/raw-string#set)** --- ### **SET (WITHETAG)** @@ -213,8 +214,6 @@ Sets/updates a key value pair with the given etag only if (1) the etag given in #### **Response** -One of the following: - - **Array reply**: If the sent etag matches the existing etag the reponse will be an array where the first item is the updated etag, and the second value is nil. If the etags do not match then the response array will hold the latest etag, and the latest value in order. --- @@ -235,8 +234,6 @@ Sets/updates a key value pair with the given etag only if (1) the etag given in #### **Response** -One of the following: - - **Array reply**: If the sent etag is greater than the existing etag then an array where the first item is the updated etag, and the second value is nil is returned. If the sentEtag is less than or equal to the existing etag then the response array will hold the latest etag, and the latest value in order. ---