From 9486b67685ddc88a34c34985001d72fdcbdbe340 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Sat, 18 Jan 2025 21:36:14 -0800 Subject: [PATCH 01/15] Add OCC sample and begin documentation --- Garnet.sln | 13 +- .../Storage/Functions/MainStore/RMWMethods.cs | 2 +- playground/ETag/ETag.csproj | 14 + playground/ETag/OccSimulation.cs | 254 ++++++++++++++++++ playground/ETag/Program.cs | 11 + website/blog/2025-01-18-etag-when-and-how | 23 ++ website/blog/authors.yml | 5 + 7 files changed, 320 insertions(+), 2 deletions(-) create mode 100644 playground/ETag/ETag.csproj create mode 100644 playground/ETag/OccSimulation.cs create mode 100644 playground/ETag/Program.cs create mode 100644 website/blog/2025-01-18-etag-when-and-how diff --git a/Garnet.sln b/Garnet.sln index 668e5f3133..cdb5aa6472 100644 --- a/Garnet.sln +++ b/Garnet.sln @@ -1,4 +1,4 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 +Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.0.31808.319 MinimumVisualStudioVersion = 10.0.40219.1 @@ -111,6 +111,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Garnet.resources", "libs\re EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NoOpModule", "playground\NoOpModule\NoOpModule.csproj", "{D4C9A1A0-7053-F072-21F5-4E0C5827136D}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ETag", "playground\ETag\ETag.csproj", "{4FBA1587-BAFC-49F8-803A-D1CF431A26F5}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -335,6 +337,14 @@ Global {D4C9A1A0-7053-F072-21F5-4E0C5827136D}.Release|Any CPU.Build.0 = Release|Any CPU {D4C9A1A0-7053-F072-21F5-4E0C5827136D}.Release|x64.ActiveCfg = Release|Any CPU {D4C9A1A0-7053-F072-21F5-4E0C5827136D}.Release|x64.Build.0 = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|x64.ActiveCfg = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Debug|x64.Build.0 = Debug|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|Any CPU.Build.0 = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|x64.ActiveCfg = Release|Any CPU + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -370,6 +380,7 @@ Global {DF2DD03E-87EE-482A-9FBA-6C8FBC23BDC5} = {697766CD-2046-46D9-958A-0FD3B46C98D4} {A48412B4-FD60-467E-A5D9-F155CAB4F907} = {147FCE31-EC09-4C90-8E4D-37CA87ED18C3} {D4C9A1A0-7053-F072-21F5-4E0C5827136D} = {69A71E2C-00E3-42F3-854E-BE157A24834E} + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5} = {69A71E2C-00E3-42F3-854E-BE157A24834E} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {2C02C405-4798-41CA-AF98-61EDFEF6772E} diff --git a/libs/server/Storage/Functions/MainStore/RMWMethods.cs b/libs/server/Storage/Functions/MainStore/RMWMethods.cs index 1cd66baf85..c863471647 100644 --- a/libs/server/Storage/Functions/MainStore/RMWMethods.cs +++ b/libs/server/Storage/Functions/MainStore/RMWMethods.cs @@ -352,8 +352,8 @@ private bool InPlaceUpdaterWorker(ref SpanByte key, ref RawStringInput input, re long newEtag = functionsState.etagState.etag + 1; value.ShrinkSerializedLength(metadataSize + inputValue.Length + EtagConstants.EtagSize); - rmwInfo.SetUsedValueLength(ref recordInfo, ref value, value.TotalSize); rmwInfo.ClearExtraValueLength(ref recordInfo, ref value, value.TotalSize); + rmwInfo.SetUsedValueLength(ref recordInfo, ref value, value.TotalSize); value.SetEtagInPayload(newEtag); diff --git a/playground/ETag/ETag.csproj b/playground/ETag/ETag.csproj new file mode 100644 index 0000000000..597e77b7f6 --- /dev/null +++ b/playground/ETag/ETag.csproj @@ -0,0 +1,14 @@ + + + + Exe + net8.0 + disable + enable + + + + + + + \ No newline at end of file diff --git a/playground/ETag/OccSimulation.cs b/playground/ETag/OccSimulation.cs new file mode 100644 index 0000000000..68d6d81413 --- /dev/null +++ b/playground/ETag/OccSimulation.cs @@ -0,0 +1,254 @@ +using StackExchange.Redis; +using System; +using System.Collections.Generic; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; + +namespace ETag +{ + /* + This code sample shows how to use ETags to implement lock-free synchronization for non-atomic operations + + OCC is like using a CAS loop to make sure the data we are writing has not had a change in between the time + we have read it and written back. + + Scenario 1: Lock free Json manipulation, we are using JSON as our value but this could essentially be + any data that you falls in the below category that is not provided by the objects API of Garnet. + + Granular Data Structure: Refers to data that is divided into small, independent parts that can be manipulated individually. For example, MongoDB documents allow granular updates on individual fields. + + Mutable Object: If the object allows you to modify its individual components without recreating the entire object, it’s referred to as mutable. For example, Python dictionaries and lists are mutable. + + Partial Updatable Data: This term is used in contexts like databases where updates can target specific fields without affecting the entire record. + + Modular Data Structure: If the object is designed to have independent, self-contained modules (like classes or subcomponents), you might describe it as modular. + + Composable Data: This term applies when different parts of the data can be independently composed, used, or updated, often seen in functional programming. + + Hierarchical Data Structure: Refers to objects with nested components, like JSON or XML, where parts of the hierarchy can be accessed and modified independently + + Simulation Description: + + We have 2 different clients that are updating the same value for a key, but different parts of it concurrently + We want to make sure we don't lose the updates between these 2 clients. + + Client 1: Updates the number of cats a user has + Client 2: Changes the flag for the user for whether or not the user has too many cats. + Client 2 only considers that a user has too many cats when the number of cats is divisible by 5, + otherwise it marks the user as false for not having too many cats + */ + class OccSimulation + { + public static async Task RunSimulation() + { + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + ContosoUserInfo userInfo = new ContosoUserInfo + { + FirstName = "Hamdaan", + LastName = "Khalid", + NumberOfCats = 1, + TooManyCats = true, + Basket = new List() + }; + + string userKey = "hkhalid"; + string serializedUserInfo = JsonSerializer.Serialize(userInfo); + + // Seed the item in the database + long initialEtag = (long)await db.ExecuteAsync("SET", userKey, serializedUserInfo, "WITHETAG"); + + // Cancellation token is used to exit program on end of interactive repl + var cts = new CancellationTokenSource(); + // Clone user info so they are task local + var client1Task = Task.Run(() => Client1(userKey, initialEtag, (ContosoUserInfo)userInfo.Clone(), cts.Token)); + var client2Task = Task.Run(() => Client2(userKey, initialEtag, (ContosoUserInfo)userInfo.Clone(), cts.Token)); + + // Interactive REPL to change any property in the ContosoUserInfo + while (true) + { + Console.WriteLine("Enter the property to change (FirstName, LastName, NumberOfCats, TooManyCats, AddToBasket, RemoveFromBasket) or 'exit' to quit:"); + Console.WriteLine($"Initial User Info: {JsonSerializer.Serialize(userInfo)}"); + string input = Console.ReadLine()!; + + if (input.ToLower() == "exit") + { + cts.Cancel(); + break; + } + + Action userUpdateAction = (userInfo) => {}; + switch (input) + { + case "FirstName": + Console.WriteLine("Enter new FirstName:"); + string newFirstName = Console.ReadLine()!; + userUpdateAction = (info) => info.FirstName = newFirstName; + break; + case "LastName": + Console.WriteLine("Enter new LastName:"); + string newLastName = Console.ReadLine()!; + userUpdateAction = (info) => info.FirstName = newLastName; + break; + case "NumberOfCats": + Console.WriteLine("Enter new NumberOfCats:"); + if (int.TryParse(Console.ReadLine(), out int numberOfCats)) + { + userUpdateAction = (info) => info.NumberOfCats = numberOfCats; + } + else + { + Console.WriteLine("Invalid number."); + } + break; + case "TooManyCats": + Console.WriteLine("Enter new TooManyCats (true/false):"); + if (bool.TryParse(Console.ReadLine(), out bool tooManyCats)) + { + userUpdateAction = (info) => info.TooManyCats = tooManyCats; + } + else + { + Console.WriteLine("Invalid boolean."); + } + break; + case "AddToBasket": + Console.WriteLine("Enter item to add to basket:"); + string addItem = Console.ReadLine()!; + userUpdateAction = (info) => info.Basket.Add(addItem); + break; + case "RemoveFromBasket": + Console.WriteLine("Enter item to remove from basket:"); + string removeItem = Console.ReadLine()!; + userUpdateAction = (info) => info.Basket.Remove(removeItem); + break; + default: + Console.WriteLine("Unknown property."); + break; + } + + // Update the user info in the database, and then for the REPL + (initialEtag, userInfo) = await LockFreeUpdateUserInfo(db, userKey, initialEtag, userInfo, userUpdateAction); + Console.WriteLine($"Updated User Info: {JsonSerializer.Serialize(userInfo)}"); + } + + cts.Cancel(); + + try + { + await Task.WhenAll(client1Task, client2Task); + } + catch (OperationCanceledException) + { + Console.WriteLine("Tasks were cancelled."); + } + } + + static async Task Client1(string userKey, long initialEtag, ContosoUserInfo initialUserInfo, CancellationToken token) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + long etag = initialEtag; + ContosoUserInfo userInfo = initialUserInfo; + while (true) + { + token.ThrowIfCancellationRequested(); + (etag, userInfo) = await LockFreeUpdateUserInfo(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => + { + userInfo.NumberOfCats++; + }); + await Task.Delay(TimeSpan.FromSeconds(random.Next(0, 15)), token); + } + } + + static async Task Client2(string userKey, long initialEtag, ContosoUserInfo initialUserInfo, CancellationToken token) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + long etag = initialEtag; + ContosoUserInfo userInfo = initialUserInfo; + while (true) + { + token.ThrowIfCancellationRequested(); + (etag, userInfo) = await LockFreeUpdateUserInfo(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => + { + userInfo.TooManyCats = userInfo.NumberOfCats % 5 == 0; + }); + await Task.Delay(TimeSpan.FromSeconds(random.Next(0, 15)), token); + } + } + + static async Task<(long, ContosoUserInfo)> LockFreeUpdateUserInfo(IDatabase db, string userKey, long initialEtag, ContosoUserInfo initialUserInfo, Action updateAction) + { + // Compare and Swap Updating + long etag = initialEtag; + ContosoUserInfo userInfo = initialUserInfo; + while (true) + { + // perform invoker passed update on userInfo + updateAction(userInfo); + + var (updatedSuccesful, newEtag, newUserInfo) = await UpdateUserIfMatch(db, etag, userKey, userInfo); + etag = newEtag; + userInfo = newUserInfo; + + if (updatedSuccesful) + break; + } + + return (etag, userInfo); + } + + static async Task<(bool updated, long etag, ContosoUserInfo)> UpdateUserIfMatch(IDatabase db, long etag, string key, ContosoUserInfo value) + { + // You may notice the "!" that is because we know that SETIFMATCH doesn't return null + string serializedUserInfo = JsonSerializer.Serialize(value); + RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedUserInfo, etag))!; + + if (res[1].IsNull) + return (true, (long)res[0], value); + + ContosoUserInfo deserializedUserInfo = JsonSerializer.Deserialize((string)res[1]!)!; + return (false, (long)res[0], deserializedUserInfo); + } + + static string GarnetConnectionStr = "localhost:6379,connectTimeout=999999,syncTimeout=999999"; + } + + class ContosoUserInfo : ICloneable + { + [JsonPropertyName("first_name")] + public required string FirstName { get; set; } + + [JsonPropertyName("last_name")] + public required string LastName { get; set; } + + [JsonPropertyName("number_of_cats")] + public required int NumberOfCats { get; set; } + + [JsonPropertyName("too_many_cats")] + public required bool TooManyCats { get; set; } + + [JsonPropertyName("basket")] + public required List Basket { get; set; } + + public object Clone() + { + return new ContosoUserInfo + { + FirstName = this.FirstName, + LastName = this.LastName, + NumberOfCats = this.NumberOfCats, + TooManyCats = this.TooManyCats, + Basket = new List(this.Basket) + }; + } + } +} \ No newline at end of file diff --git a/playground/ETag/Program.cs b/playground/ETag/Program.cs new file mode 100644 index 0000000000..e44669c96d --- /dev/null +++ b/playground/ETag/Program.cs @@ -0,0 +1,11 @@ +using System.Threading.Tasks; + +namespace ETag; + +class Program +{ + static async Task Main(string[] args) + { + await OccSimulation.RunSimulation(); + } +} diff --git a/website/blog/2025-01-18-etag-when-and-how b/website/blog/2025-01-18-etag-when-and-how new file mode 100644 index 0000000000..6ac79bfb16 --- /dev/null +++ b/website/blog/2025-01-18-etag-when-and-how @@ -0,0 +1,23 @@ +--- +slug: etags-when-and-how +title: ETags, When and How +authors: hkhalid +tags: [garnet, concurrency, caching, lock-free] +--- + +Garnet recently announced native support for ETag based commands. ETags are a primitive that give users access to sophisticated techniques such as Optimistic Concurrency Control as well as more efficient network bandwidth utilization for caching. + +ETags are currently provided over Raw Strings in Garnet. + +ETags are available to users without the need of any migration; this means your existing key-value pairs can start using ETags on the fly without affecting existing performance metrics. +You can find the [ETag API documentation here](HK TODO). + +This article will help you explore when and how you can use this new shiny Garnet feature for your current and future applications. + +**Read this article further if you need or might want**: +1. Reduced network bandwidth utilization for caching. +2. Avoid the cost of transactions for working with non-atomic values in your cache-store + +Both the above cases are common cases for folks using Caches, lets go case by case. + +## \ No newline at end of file diff --git a/website/blog/authors.yml b/website/blog/authors.yml index 49a0bbd4f6..a0e62117dc 100644 --- a/website/blog/authors.yml +++ b/website/blog/authors.yml @@ -4,3 +4,8 @@ badrishc: url: https://badrish.net image_url: https://badrish.net/assets/icons/badrish4.jpg +hkhalid: + name: Hamdaan Khalid + title: Software Engineer, Azure Resource Graph + url: https://hamdaan-rails-personal.herokuapp.com/ + image_url: https://media.licdn.com/dms/image/v2/D5603AQEB5k6B-_kYcg/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1713142460509?e=1743033600&v=beta&t=efEhRJq1SLgi09uCSUQJN3ssq-_cwljG0ysUc54GcSc From f1c65bbd75827f96f23f6be8b178148aa4db9f54 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Sun, 19 Jan 2025 11:42:20 -0800 Subject: [PATCH 02/15] first draft of code samples --- playground/ETag/Caching.cs | 196 ++++++++++++++++++++++++++++ playground/ETag/EtagAbstractions.cs | 110 ++++++++++++++++ playground/ETag/OccSimulation.cs | 42 +----- playground/ETag/Program.cs | 5 +- 4 files changed, 314 insertions(+), 39 deletions(-) create mode 100644 playground/ETag/Caching.cs create mode 100644 playground/ETag/EtagAbstractions.cs diff --git a/playground/ETag/Caching.cs b/playground/ETag/Caching.cs new file mode 100644 index 0000000000..17ad412424 --- /dev/null +++ b/playground/ETag/Caching.cs @@ -0,0 +1,196 @@ +using System; +using System.Collections.Generic; +using System.Data.Common; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Threading; +using System.Threading.Tasks; +using StackExchange.Redis; + +namespace ETag; + +public class Caching +{ + /* + The whole idea of using ETag based commands for caching purposes is to reduce network utilization by only sending and recieving + what is needed over the network. + + Scenario: + We are in an application, cache, and database setup. + In the read path the application always attempts to read from the cache and based on a hit or a miss it reaches into the database. + In the write path the application may use "write-through" or "write-back" to internally update the cache. The write path will be simulated in it's own thread. + The read path will be interacted with via a REPL. + + Anytime the client stores a local "copy" of data that may exist on the cache, it will first make a call to the Cache and based on a hit or miss it will reach into the database. + Everything till now describes your commonly used caching use-case. + + ETags help further speed up your Cache-Hit use case. When the client uses the Garnet GETIFNOTMATCH command they send their current ETag and only incur the extra network bandwidth + of recieving the entire payload when their local version is different from what is there on the server. With large payloads this can help reduce latency in your cache-hit route. + */ + + public static async Task RunSimulation() + { + // LocalApplicationState represents the data that you keep within your Client's reach + Dictionary localApplicationState = new Dictionary(); + + Console.WriteLine("Seeding server and local state..."); + await SeedCache(localApplicationState); + + Console.WriteLine("Booting up fake server threads..."); + // run fake server threads in the background that invalidates entries in the cache and changes things either in write-through or write-back manner + CancellationTokenSource cts = new CancellationTokenSource(); + Task srvThread1 = Task.Run(() => FakeServerThread(cts.Token), cts.Token); + Task srvThread2 = Task.Run(() => FakeServerThread(cts.Token), cts.Token); + Task srvThread3 = Task.Run(() => FakeServerThread(cts.Token), cts.Token); + + // Run interactive repl (application) + await InteractiveRepl(localApplicationState); + + cts.Cancel(); + try + { + await Task.WhenAll(srvThread1, srvThread2, srvThread3); + } + catch (OperationCanceledException) + { + Console.WriteLine("Server threads killed."); + } + } + + static async Task InteractiveRepl(Dictionary localApplicationState) + { + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + while (true) + { + Console.WriteLine("Enter a review ID (0-19) to fetch or type 'exit' to quit:"); + string input = Console.ReadLine()!; + + if (input.ToLower() == "exit") + { + break; + } + + if (int.TryParse(input, out int reviewId) && reviewId >= 0 && reviewId <= 19) + { + var (existingEtag, existingItem) = localApplicationState[reviewId]; + var (etag, movieReview) = await ETagAbstractions.GetIfNotMatch(db, reviewId.ToString(), existingEtag, existingItem); + + if (movieReview != null) + { + // update local application state/in-memory cache + localApplicationState[reviewId] = (etag, movieReview); + Console.WriteLine($"Movie Name: {movieReview.MovieName}"); + Console.WriteLine($"Reviewer Name: {movieReview.ReviewerName}"); + Console.WriteLine($"Rating: {movieReview.Rating}"); + Console.WriteLine($"Review: {movieReview.Review.Substring(0, 50)}..."); + } + else + { + Console.WriteLine("Review not found."); + } + } + else + { + Console.WriteLine("Invalid input. Please enter a number between 0 and 19."); + } + } + } + + static async Task SeedCache(Dictionary localApplicationState) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + // Add 100 things with sufficiently large payloads into your cache, the maximum size of your values depends on your pagesize config on Garnet + for (int i = 0; i < 20; i++) + { + string key = i.ToString(); + MovieReview movieReview = MovieReview.CreateRandomReview(random); + string value = JsonSerializer.Serialize(movieReview); + long etag = (long)await db.ExecuteAsync("SET", key, value, "WITHETAG"); + localApplicationState.Add(i, (etag, movieReview)); + Console.WriteLine($"Seeded {i}"); + } + } + + static async Task FakeServerThread(CancellationToken token) + { + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + // Run a loop where you are updating the items every now and then + while (true) + { + token.ThrowIfCancellationRequested(); + // choose a random number [0 - 19] aka review ID in our database + // change the review and rating for it + string serverToMessWith = random.Next(19).ToString(); + var (etag, movieReview) = await ETagAbstractions.GetWithEtag(db, serverToMessWith); + await ETagAbstractions.PerformLockFreeSafeUpdate(db, serverToMessWith, etag, movieReview!, + (moviewReview) => + { + // the application server decides to reduce or increase the moview review rating + moviewReview.Review += random.Next(-2, 2); + }); + + // sleep anywhere from 10-60 seconds + await Task.Delay(TimeSpan.FromSeconds(random.Next(10, 60))); + } + } + + static string GarnetConnectionStr = "localhost:6379,connectTimeout=999999,syncTimeout=999999"; +} + +class MovieReview +{ + [JsonPropertyName("movie_name")] + public required string MovieName { get; set; } + + [JsonPropertyName("reviewer_name")] + public required string ReviewerName { get; set; } + + [JsonPropertyName("rating")] + public required int Rating { get; set; } + + [JsonPropertyName("review")] + public required string Review { get; set; } + + public static MovieReview CreateRandomReview(Random random) + { + var movieName = $"{CommonWords[random.Next(CommonWords.Length)]} {CommonWords[random.Next(CommonWords.Length)]}"; + var reviewerName = $"{CommonWords[random.Next(CommonWords.Length)]} {CommonWords[random.Next(CommonWords.Length)]}"; + var rating = random.Next(0, 101); + var review = GenerateLargeLoremIpsumText(1 * 1024 * 1024); // 1MB of text + + return new MovieReview + { + MovieName = movieName, + ReviewerName = reviewerName, + Rating = rating, + Review = review + }; + } + + private static string GenerateLargeLoremIpsumText(int sizeInBytes) + { + const string loremIpsum = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "; + var stringBuilder = new StringBuilder(); + + while (Encoding.UTF8.GetByteCount(stringBuilder.ToString()) < sizeInBytes) + { + stringBuilder.Append(loremIpsum); + } + + return stringBuilder.ToString(); + } + + private static readonly string[] CommonWords = + [ + "The", "Amazing", "Incredible", "Fantastic", "Journey", "Adventure", "Mystery", "Legend", "Quest", "Saga", + "John", "Jane", "Smith", "Doe", "Alice", "Bob", "Charlie", "David", "Eve", "Frank" + ]; +} \ No newline at end of file diff --git a/playground/ETag/EtagAbstractions.cs b/playground/ETag/EtagAbstractions.cs new file mode 100644 index 0000000000..513a625f16 --- /dev/null +++ b/playground/ETag/EtagAbstractions.cs @@ -0,0 +1,110 @@ +using System; +using System.Text.Json; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using StackExchange.Redis; + +namespace ETag; + +public static class ETagAbstractions +{ + /// + /// Performs a lock-free update on an item in the database using a compare-and-swap mechanism. + /// + /// The type of the item to be updated. + /// The database instance where the item is stored. + /// The key identifying the item in the database. + /// The initial ETag value of the item. + /// The initial state of the item. + /// The action to perform on the item before updating it in the database. + /// A task that represents the asynchronous operation. The task result contains a tuple with the final ETag value and the updated item. + public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) + { + // Compare and Swap Updating + long etag = initialEtag; + T item = initialItem; + while (true) + { + // perform custom action, since item is updated to it's correct latest state by the server this action is performed exactly once on + // an item before it is finally updated on the server + updateAction(item); + + var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); + etag = newEtag; + item = newItem; + + if (updatedSuccesful) + break; + } + + return (etag, item); + } + + /// + /// Retrieves an item from the database if the provided ETag does not match the existing ETag. + /// Saves the network badwidth usage for cases where we have the right state in-memory already + /// + /// The type of the item to be retrieved. + /// The database instance to execute the command on. + /// The key of the item to be retrieved. + /// The existing ETag to compare against. + /// + /// A tuple containing the new ETag and the item if the ETag does not match; otherwise, a tuple with -1 and the default value of T. + /// + public static async Task<(long, T?)> GetIfNotMatch(IDatabase db, string key, long existingEtag, T existingItem, ILogger? logger = null) + { + RedisResult res = await db.ExecuteAsync("GETIFNOTMATCH", key, existingEtag); + if (res.IsNull) + return (-1, default); + + long etag = (long)res[0]; + + if (res[1].IsNull) + { + logger?.LogInformation("Network overhead saved, what we have is already good."); + return (etag, existingItem); + } + + logger?.LogInformation("Network overhead incurred, entire item retrieved over network."); + T item = JsonSerializer.Deserialize((string)res[1]!)!; + return (etag, item); + } + + /// + /// Retrieves an item from the database along with its ETag. + /// + /// The type of the item to retrieve. + /// The database instance to query. + /// The key of the item to retrieve. + /// + /// A tuple containing the ETag as a long and the item casted to type T. + /// If the database call returns null, the ETag will be -1 and the item will be null. + /// + public static async Task<(long, T?)> GetWithEtag(IDatabase db, string key) + { + var executeResult = await db.ExecuteAsync("GETWITHETAG", key); + // If key is not found we get null + if (executeResult.IsNull) + { + return (-1, default(T)); + } + + RedisResult[] result = (RedisResult[])executeResult!; + long etag = (long)result[0]; + T item = JsonSerializer.Deserialize((string)result[1]!)!; + return (etag, item); + } + + private static async Task<(bool updated, long etag, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) + { + // You may notice the "!" that is because we know that SETIFMATCH doesn't return null + string serializedItem = JsonSerializer.Serialize(value); + RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; + + if (res[1].IsNull) + return (true, (long)res[0], value); + + T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; + return (false, (long)res[0], deserializedItem); + } +} diff --git a/playground/ETag/OccSimulation.cs b/playground/ETag/OccSimulation.cs index 68d6d81413..354d59b779 100644 --- a/playground/ETag/OccSimulation.cs +++ b/playground/ETag/OccSimulation.cs @@ -131,7 +131,7 @@ public static async Task RunSimulation() } // Update the user info in the database, and then for the REPL - (initialEtag, userInfo) = await LockFreeUpdateUserInfo(db, userKey, initialEtag, userInfo, userUpdateAction); + (initialEtag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, initialEtag, userInfo, userUpdateAction); Console.WriteLine($"Updated User Info: {JsonSerializer.Serialize(userInfo)}"); } @@ -143,7 +143,7 @@ public static async Task RunSimulation() } catch (OperationCanceledException) { - Console.WriteLine("Tasks were cancelled."); + Console.WriteLine("All clients killed."); } } @@ -158,7 +158,7 @@ static async Task Client1(string userKey, long initialEtag, ContosoUserInfo init while (true) { token.ThrowIfCancellationRequested(); - (etag, userInfo) = await LockFreeUpdateUserInfo(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => + (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => { userInfo.NumberOfCats++; }); @@ -177,7 +177,7 @@ static async Task Client2(string userKey, long initialEtag, ContosoUserInfo init while (true) { token.ThrowIfCancellationRequested(); - (etag, userInfo) = await LockFreeUpdateUserInfo(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => + (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, etag, userInfo, (ContosoUserInfo userInfo) => { userInfo.TooManyCats = userInfo.NumberOfCats % 5 == 0; }); @@ -185,40 +185,6 @@ static async Task Client2(string userKey, long initialEtag, ContosoUserInfo init } } - static async Task<(long, ContosoUserInfo)> LockFreeUpdateUserInfo(IDatabase db, string userKey, long initialEtag, ContosoUserInfo initialUserInfo, Action updateAction) - { - // Compare and Swap Updating - long etag = initialEtag; - ContosoUserInfo userInfo = initialUserInfo; - while (true) - { - // perform invoker passed update on userInfo - updateAction(userInfo); - - var (updatedSuccesful, newEtag, newUserInfo) = await UpdateUserIfMatch(db, etag, userKey, userInfo); - etag = newEtag; - userInfo = newUserInfo; - - if (updatedSuccesful) - break; - } - - return (etag, userInfo); - } - - static async Task<(bool updated, long etag, ContosoUserInfo)> UpdateUserIfMatch(IDatabase db, long etag, string key, ContosoUserInfo value) - { - // You may notice the "!" that is because we know that SETIFMATCH doesn't return null - string serializedUserInfo = JsonSerializer.Serialize(value); - RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedUserInfo, etag))!; - - if (res[1].IsNull) - return (true, (long)res[0], value); - - ContosoUserInfo deserializedUserInfo = JsonSerializer.Deserialize((string)res[1]!)!; - return (false, (long)res[0], deserializedUserInfo); - } - static string GarnetConnectionStr = "localhost:6379,connectTimeout=999999,syncTimeout=999999"; } diff --git a/playground/ETag/Program.cs b/playground/ETag/Program.cs index e44669c96d..ae6b3e1a45 100644 --- a/playground/ETag/Program.cs +++ b/playground/ETag/Program.cs @@ -6,6 +6,9 @@ class Program { static async Task Main(string[] args) { - await OccSimulation.RunSimulation(); + // Uncomment whichever example you want to run + + // await OccSimulation.RunSimulation(); + await Caching.RunSimulation(); } } From 0a65d16c64c5f0b6208d274b8816ac3c501c5bf9 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Mon, 20 Jan 2025 17:53:53 -0800 Subject: [PATCH 03/15] finish draft of blog --- playground/ETag/Caching.cs | 4 +- playground/ETag/EtagAbstractions.cs | 3 +- website/blog/2025-01-18-etag-when-and-how | 23 --- website/blog/2025-01-18-etag-when-and-how.md | 197 +++++++++++++++++++ 4 files changed, 201 insertions(+), 26 deletions(-) delete mode 100644 website/blog/2025-01-18-etag-when-and-how create mode 100644 website/blog/2025-01-18-etag-when-and-how.md diff --git a/playground/ETag/Caching.cs b/playground/ETag/Caching.cs index 17ad412424..e5942d87cb 100644 --- a/playground/ETag/Caching.cs +++ b/playground/ETag/Caching.cs @@ -15,7 +15,7 @@ public class Caching /* The whole idea of using ETag based commands for caching purposes is to reduce network utilization by only sending and recieving what is needed over the network. - + Scenario: We are in an application, cache, and database setup. In the read path the application always attempts to read from the cache and based on a hit or a miss it reaches into the database. @@ -104,7 +104,7 @@ static async Task SeedCache(Dictionary localApplicatio Random random = new Random(); using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); var db = redis.GetDatabase(0); - // Add 100 things with sufficiently large payloads into your cache, the maximum size of your values depends on your pagesize config on Garnet + // Add a bunch of things with sufficiently large payloads into your cache, the maximum size of your values depends on your pagesize config on Garnet for (int i = 0; i < 20; i++) { string key = i.ToString(); diff --git a/playground/ETag/EtagAbstractions.cs b/playground/ETag/EtagAbstractions.cs index 513a625f16..7f3a1d05df 100644 --- a/playground/ETag/EtagAbstractions.cs +++ b/playground/ETag/EtagAbstractions.cs @@ -26,7 +26,8 @@ public static class ETagAbstractions while (true) { // perform custom action, since item is updated to it's correct latest state by the server this action is performed exactly once on - // an item before it is finally updated on the server + // an item before it is finally updated on the server. + // NOTE: Based on your application's needs you can modify this method to update a pure function that returns a copy of the data and does not use mutations as side effects. updateAction(item); var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); diff --git a/website/blog/2025-01-18-etag-when-and-how b/website/blog/2025-01-18-etag-when-and-how deleted file mode 100644 index 6ac79bfb16..0000000000 --- a/website/blog/2025-01-18-etag-when-and-how +++ /dev/null @@ -1,23 +0,0 @@ ---- -slug: etags-when-and-how -title: ETags, When and How -authors: hkhalid -tags: [garnet, concurrency, caching, lock-free] ---- - -Garnet recently announced native support for ETag based commands. ETags are a primitive that give users access to sophisticated techniques such as Optimistic Concurrency Control as well as more efficient network bandwidth utilization for caching. - -ETags are currently provided over Raw Strings in Garnet. - -ETags are available to users without the need of any migration; this means your existing key-value pairs can start using ETags on the fly without affecting existing performance metrics. -You can find the [ETag API documentation here](HK TODO). - -This article will help you explore when and how you can use this new shiny Garnet feature for your current and future applications. - -**Read this article further if you need or might want**: -1. Reduced network bandwidth utilization for caching. -2. Avoid the cost of transactions for working with non-atomic values in your cache-store - -Both the above cases are common cases for folks using Caches, lets go case by case. - -## \ No newline at end of file diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md new file mode 100644 index 0000000000..a43291a1d9 --- /dev/null +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -0,0 +1,197 @@ +--- +slug: etags-when-and-how +title: ETags, When and How +authors: hkhalid +tags: [garnet, concurrency, caching, lock-free, etags] +--- + +Garnet recently announced native support for ETag-based commands. ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control** and **more efficient network bandwidth utilization for caching**. + +Currently, Garnet provides ETags as raw strings. The feature is available without requiring any migration, meaning your existing key-value pairs can start leveraging ETags on the fly without impacting performance metrics. You can find the [ETag API documentation here](../commands/garnet-specific-commands#native-etag-support). + +This article explores when and how you can use this new Garnet feature for both your current and future applications. + +--- + +## Why Read This Article? +If you need or want: + +1. **Reduced network bandwidth utilization for caching**. +2. **A way to avoid the cost of transactions when working with non-atomic values in your cache store**. + +These are common scenarios that we'll cover case by case. + +--- + +## Reducing Network Bandwidth Utilization for Caching + +Every network call incurs a cost: the amount of data transmitted and the distance over which it travels. In performance-sensitive scenarios, it's beneficial to fetch data only if it has changed in the cache, thereby reducing bandwidth usage and network latency. + +### Scenario: Server-Cache-Database Interaction +Consider the following setup: + +#### High-Level Diagram +``` +(server 1)----[server 1 reads from cache]----(cache) + | + | + [server 2 writes to the cache] + | + | + (server 2) +``` + +### Sequence Diagram +``` + server 1 cache 1 server 2 +1. initial read from cache for k1--------------------> +2. <----------------------------------------Send Data and ETag + <-------------------------update value for k1 , that invalidates k1 +3. second read to cache for k1 ------------------------> + <--------------------------------------(What is sent back?) +```` + +Reading through the above scenario will bring you to the last item in our sequence, the second read and what is returned for it. +In the situation where no ETag is used, the entire payload is returned for k1 everytime. Regardless of whether or not value associated with k1 was changed you have incurred the cost of sending the entire value. + +While this may not be a concern when working with a single machine transfering 100 Bytes of data where the network bandwidth is 10Gbps over fiber optic. This matters when you have 50 machines egressing 1MB of data from your network on your favorite cloud provider from Garnet (or any other software), you are paying with your cost of egressing, bandwidth usage, and you are incurring the delay of recieving more bits over the internet. + +This is where in our above sequence diagram we can use the `GETIFNOTMATCH` [here](../commands/garnet-specific-commands#getifnotmatch) Garnet API to only retrieve the entire payload from the server when the value on the server has changed from what we had retrieved last. Server-1 can use application memory to store the value of the ETag it recieved in the initial payload, and employ the `GETIFNOTMATCH` api and refresh the latest in-memory and etag of the object in it's local state. + +In a read heavy systems where the items are read very frequently but not updated as frequently using the above technique can help reduce your network bandwidth utilization. I recommend that in the case where you know the value of your key will be changed very heavily, you should continue to use the regular `GET` call since the updated the data will always need to be sent by the cache. + +Take a look at our ETag caching sample to see the usage of the `GETIFNOTMATCH` API in action. + +## Avoiding Costly Transactions When Working With Non-Atomic Operations + +Any ACI( ignore the D) compliant database that gives you the ability to data transactions has some level of non-trivial synchronization that it creates. Garnet too while having state of the art transaction concurrency control does depend on Locks via 2 phase locking to be able to isolate transactions from each other. On top of the use of locks Transactions are not allowed to run for the initial phase of a server when concurrently using the Checkpointing mechanism for durability. + +ETags provide a way to handle logic that could previously only been provided either by you making a Garnet server side change to create custom commands or to employ the use of Transactions and Transaction Procedures in Garnet. ETag semantics let multiple clients co-ordinate their update logic without the use of locking and ensuring no-missed updates. Let's see how. + +Imagine a scenario where we have multiple clients actively mutating an XML(could be any non-atomic value like JSON or even protobufs) field that they store in Garnet. + +Client 1 read the XML value for a key, updates field A and writes back. Client 2 also concurrently reads the XML from Garnet for the same key, updates a different field and writes back. + +Now in our concurrent scenario we have multiple different interleavings that exist between the time we read the value, modify it, and write back. Depending on how these interleavings happen we could overwrite, double write, or by chance even end up with the correct value for the XML. This is by definition a race condition. Below is an example sequence showing an unwanted interleaving. + +1. Client 1 read value for k1 at v0 +2. Client 1 mutate value at v0 for field A and now we v0 is still on server but locally we have v1` +3. Client 2 reads the value for k1 at v0 since client 1 has still not written it's value at v1 to Garnet +4. Client 2 mutates value for v0 for field B and now we have v0 at v1`` but there are client 1 and client 2 hold 2 different copies of data. The mutated copy of v0 on client 1 does not have the mutation on field A that client 1 makes, and vice versa for client 2. +5. Now based on who writes back the server first, the last write on the server will be the v1 that exists but either way we will miss the update from eiter client 1 or client 2! + +To eliminate this race condition we can make use of the ETag semantics, let's create an ETag for our existing key value pair in Garnet. + +Using the `SETIFMATCH`[here](../docs/commands/garnet-specific-commands#setifmatch) API for our updates to the value we can create a Compare And Swap like loop that guarantees no lost updates. + +The code snippets shown at the bottom, taken from the ETag samples illustrate this well. + +Every read-(extra logic/modify)-write call starts by first reading the latest etag and value for a key, it then wraps it's update logic in a callback action and then calls the `PerformLockFreeSafeUpdate` method in ETagAbstractions to safely apply the update. + +Internally the `PerformLockFreeSafeUpdate` runs a loop that retrieves the data that performs your update on the object and sends a `SETIFMATCH` request, the server only then updates the value if your ETag indicates that at the time of your decision you had performed your update on the latest copy of the data. If the server sees that between your read and write there were any updates the value, the server sends the latest copy of the data along with the updated etag, your client code then reapplies the changes on the latest copy and resends the request back to the server for the update, this form of update will guarantees that eventually all changes synchronize themselves on the server one after other. + +In a read-heavy system where contention is not high on the same key this update will be performed in the very first loop itself, and be easier to manage than having a custom transaction. However, in a heavy key contention scenario this could result in multiple attempts to write to the latest copy especially if the logic between your read and write is slow. + +``` +static async Task Client(string userKey) +{ + Random random = new Random(); + using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); + var db = redis.GetDatabase(0); + + // initially read latest etag + var res = await EtagAbstractions.GetWithEtag(userKey) + long etag = (long)res[0]; + ContosoUserInfo userInfo = res[1]; + while (true) + { + token.ThrowIfCancellationRequested(); + // + (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, etag, userInfo, (ContosoUserInfo info) => + { + info.TooManyCats = info.NumberOfCats % 5 == 0; + }); + await Task.Delay(TimeSpan.FromSeconds(random.Next(0, 15)), token); + } +} + +.... +.... +.... +.... + +/// +/// Retrieves an item from the database along with its ETag. +/// +/// The type of the item to retrieve. +/// The database instance to query. +/// The key of the item to retrieve. +/// +/// A tuple containing the ETag as a long and the item casted to type T. +/// If the database call returns null, the ETag will be -1 and the item will be null. +/// +public static async Task<(long, T?)> GetWithEtag(IDatabase db, string key) +{ + var executeResult = await db.ExecuteAsync("GETWITHETAG", key); + // If key is not found we get null + if (executeResult.IsNull) + { + return (-1, default(T)); + } + + RedisResult[] result = (RedisResult[])executeResult!; + long etag = (long)result[0]; + T item = JsonSerializer.Deserialize((string)result[1]!)!; + return (etag, item); +} + +/// +/// Performs a lock-free update on an item in the database using a compare-and-swap mechanism. +/// +/// The type of the item to be updated. +/// The database instance where the item is stored. +/// The key identifying the item in the database. +/// The initial ETag value of the item. +/// The initial state of the item. +/// The action to perform on the item before updating it in the database. +/// A task that represents the asynchronous operation. The task result contains a tuple with the final ETag value and the updated item. +public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) +{ + // Compare and Swap Updating + long etag = initialEtag; + T item = initialItem; + while (true) + { + // perform custom action, since item is updated to it's correct latest state by the server this action is performed exactly once on + // an item before it is finally updated on the server + updateAction(item); + + var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); + etag = newEtag; + item = newItem; + + if (updatedSuccesful) + break; + } + + return (etag, item); +} + +private static async Task<(bool updated, long etag, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) +{ + // You may notice the "!" that is because we know that SETIFMATCH doesn't return null + string serializedItem = JsonSerializer.Serialize(value); + RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; + + if (res[1].IsNull) + return (true, (long)res[0], value); + + T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; + return (false, (long)res[0], deserializedItem); +} + +``` + +Please refer to the Garnet Samples to see complete examples. + +ETags are not a silver bullet to your transaction overhead. However, in a system where the lock contention on the same key is low, ETags can be used to avoid transaction locking overhead as well as reduce network bandwidth usage. From 815f2c5ce2ab7bb4d7899a931c88f8ff442c0c03 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Tue, 21 Jan 2025 11:53:12 -0800 Subject: [PATCH 04/15] Finish blog --- website/blog/2025-01-18-etag-when-and-how.md | 159 ++++++++----------- 1 file changed, 70 insertions(+), 89 deletions(-) diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index a43291a1d9..ae39d07196 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -5,21 +5,23 @@ authors: hkhalid tags: [garnet, concurrency, caching, lock-free, etags] --- -Garnet recently announced native support for ETag-based commands. ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control** and **more efficient network bandwidth utilization for caching**. +**Garnet recently announced native support for ETag-based commands.** +ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control** and **more efficient network bandwidth utilization for caching.** -Currently, Garnet provides ETags as raw strings. The feature is available without requiring any migration, meaning your existing key-value pairs can start leveraging ETags on the fly without impacting performance metrics. You can find the [ETag API documentation here](../commands/garnet-specific-commands#native-etag-support). +Currently, Garnet provides ETags as raw strings. This feature is available without requiring any migration, allowing your existing key-value pairs to start leveraging ETags immediately without impacting performance metrics. +You can find the [ETag API documentation here](./docs/commands/garnet-specific-commands#native-etag-support). -This article explores when and how you can use this new Garnet feature for both your current and future applications. +This article explores when and how you can use this new Garnet feature for both your current and future applications. --- ## Why Read This Article? -If you need or want: +If you're looking to: -1. **Reduced network bandwidth utilization for caching**. -2. **A way to avoid the cost of transactions when working with non-atomic values in your cache store**. +1. **Reduce network bandwidth utilization for caching.** +2. **Avoid the cost of transactions when working with non-atomic values in your cache store.** -These are common scenarios that we'll cover case by case. +We'll cover these scenarios case by case. --- @@ -27,7 +29,7 @@ These are common scenarios that we'll cover case by case. Every network call incurs a cost: the amount of data transmitted and the distance over which it travels. In performance-sensitive scenarios, it's beneficial to fetch data only if it has changed in the cache, thereby reducing bandwidth usage and network latency. -### Scenario: Server-Cache-Database Interaction +### Scenario: Cache Invalidation Consider the following setup: #### High-Level Diagram @@ -35,109 +37,98 @@ Consider the following setup: (server 1)----[server 1 reads from cache]----(cache) | | - [server 2 writes to the cache] + [server 2 writes to the cache, invalidating whatever server 1 had read] | | (server 2) ``` -### Sequence Diagram +#### Sequence Diagram ``` - server 1 cache 1 server 2 -1. initial read from cache for k1--------------------> + server 1 cache server 2 +1. initial read from cache for k1-----------------------> 2. <----------------------------------------Send Data and ETag - <-------------------------update value for k1 , that invalidates k1 -3. second read to cache for k1 ------------------------> + <-------------------------update value for k1 (invalidates k1) +3. second read to cache for k1 -------------------------> <--------------------------------------(What is sent back?) -```` +``` -Reading through the above scenario will bring you to the last item in our sequence, the second read and what is returned for it. -In the situation where no ETag is used, the entire payload is returned for k1 everytime. Regardless of whether or not value associated with k1 was changed you have incurred the cost of sending the entire value. +In the absence of ETags, the entire payload for `k1` is returned on every read, regardless of whether the value associated with `k1` has changed. -While this may not be a concern when working with a single machine transfering 100 Bytes of data where the network bandwidth is 10Gbps over fiber optic. This matters when you have 50 machines egressing 1MB of data from your network on your favorite cloud provider from Garnet (or any other software), you are paying with your cost of egressing, bandwidth usage, and you are incurring the delay of recieving more bits over the internet. +While this might not matter when transferring small payloads (e.g., 100 bytes of data within a high-bandwidth local network), it becomes significant when you have **multiple machines egressing larger payloads (e.g., 1MB each)** on a cloud provider. You pay the cost of egress, bandwidth usage, and experience delays due to the transmission of larger amounts of data. -This is where in our above sequence diagram we can use the `GETIFNOTMATCH` [here](../commands/garnet-specific-commands#getifnotmatch) Garnet API to only retrieve the entire payload from the server when the value on the server has changed from what we had retrieved last. Server-1 can use application memory to store the value of the ETag it recieved in the initial payload, and employ the `GETIFNOTMATCH` api and refresh the latest in-memory and etag of the object in it's local state. +To address this, Garnet provides the `GETIFNOTMATCH` API [here](./docs/commands/garnet-specific-commands#getifnotmatch). +, allowing you to fetch data only if it has changed since your last retrieval. Server 1 can store the ETag received in the initial payload in application memory and use `GETIFNOTMATCH` to refresh the local copy only if the value has changed. -In a read heavy systems where the items are read very frequently but not updated as frequently using the above technique can help reduce your network bandwidth utilization. I recommend that in the case where you know the value of your key will be changed very heavily, you should continue to use the regular `GET` call since the updated the data will always need to be sent by the cache. +This approach is particularly beneficial in read-heavy systems where data changes infrequently. However, for frequently updated keys, using the regular `GET` API may still be preferable, as updated data will always need to be transmitted. -Take a look at our ETag caching sample to see the usage of the `GETIFNOTMATCH` API in action. +Take a look at the ETag caching sample to see the usage of the `GETIFNOTMATCH` API in action. -## Avoiding Costly Transactions When Working With Non-Atomic Operations +--- -Any ACI( ignore the D) compliant database that gives you the ability to data transactions has some level of non-trivial synchronization that it creates. Garnet too while having state of the art transaction concurrency control does depend on Locks via 2 phase locking to be able to isolate transactions from each other. On top of the use of locks Transactions are not allowed to run for the initial phase of a server when concurrently using the Checkpointing mechanism for durability. +## Avoiding Costly Transactions When Working with Non-Atomic Operations -ETags provide a way to handle logic that could previously only been provided either by you making a Garnet server side change to create custom commands or to employ the use of Transactions and Transaction Procedures in Garnet. ETag semantics let multiple clients co-ordinate their update logic without the use of locking and ensuring no-missed updates. Let's see how. +Databases with ACID compliance (ignoring the durability for this discussion) rely on synchronization mechanisms like locks to ensure isolation. Garnet employs **state-of-the-art transaction concurrency control** using two-phase locking. However, transactions in Garnet are not permitted during certain initial server states, such as when the checkpointing mechanism is active for durability. -Imagine a scenario where we have multiple clients actively mutating an XML(could be any non-atomic value like JSON or even protobufs) field that they store in Garnet. +ETags offer an alternative to transactions when working with a single key for handling the update logic, enabling coordination between multiple clients without locking while ensuring no missed updates. -Client 1 read the XML value for a key, updates field A and writes back. Client 2 also concurrently reads the XML from Garnet for the same key, updates a different field and writes back. +### Scenario: Concurrent Updates to a Non-Atomic Value -Now in our concurrent scenario we have multiple different interleavings that exist between the time we read the value, modify it, and write back. Depending on how these interleavings happen we could overwrite, double write, or by chance even end up with the correct value for the XML. This is by definition a race condition. Below is an example sequence showing an unwanted interleaving. +Imagine multiple clients concurrently modifying an XML document stored in Garnet. +For example: -1. Client 1 read value for k1 at v0 -2. Client 1 mutate value at v0 for field A and now we v0 is still on server but locally we have v1` -3. Client 2 reads the value for k1 at v0 since client 1 has still not written it's value at v1 to Garnet -4. Client 2 mutates value for v0 for field B and now we have v0 at v1`` but there are client 1 and client 2 hold 2 different copies of data. The mutated copy of v0 on client 1 does not have the mutation on field A that client 1 makes, and vice versa for client 2. -5. Now based on who writes back the server first, the last write on the server will be the v1 that exists but either way we will miss the update from eiter client 1 or client 2! +- Client 1 reads the XML, updates Field A, and writes it back. +- Client 2 reads the same XML, updates Field B, and writes it back concurrently. -To eliminate this race condition we can make use of the ETag semantics, let's create an ETag for our existing key value pair in Garnet. +Without ETags, the following sequence of events might occur: -Using the `SETIFMATCH`[here](../docs/commands/garnet-specific-commands#setifmatch) API for our updates to the value we can create a Compare And Swap like loop that guarantees no lost updates. +1. Client 1 reads value `v0` for key `k1`. +2. Client 1 modifies Field A, creating a local copy `v1`. +3. Client 2 reads the same value `v0` before Client 1 writes `v1`. +4. Client 2 modifies Field B, creating another local copy `v2`. +5. Either Client 1 or Client 2 writes its version back to the server, potentially overwriting the other’s changes since `v1` and `v2` both don't have either's changes. -The code snippets shown at the bottom, taken from the ETag samples illustrate this well. +This race condition results in lost updates. -Every read-(extra logic/modify)-write call starts by first reading the latest etag and value for a key, it then wraps it's update logic in a callback action and then calls the `PerformLockFreeSafeUpdate` method in ETagAbstractions to safely apply the update. +With ETags, you can use the `SETIFMATCH` API [here](./docs/commands/garnet-specific-commands#setifmatch) to implement a **compare-and-swap** mechanism that guarantees no updates are lost. The following code snippets demonstrate how this can be achieved. -Internally the `PerformLockFreeSafeUpdate` runs a loop that retrieves the data that performs your update on the object and sends a `SETIFMATCH` request, the server only then updates the value if your ETag indicates that at the time of your decision you had performed your update on the latest copy of the data. If the server sees that between your read and write there were any updates the value, the server sends the latest copy of the data along with the updated etag, your client code then reapplies the changes on the latest copy and resends the request back to the server for the update, this form of update will guarantees that eventually all changes synchronize themselves on the server one after other. +--- -In a read-heavy system where contention is not high on the same key this update will be performed in the very first loop itself, and be easier to manage than having a custom transaction. However, in a heavy key contention scenario this could result in multiple attempts to write to the latest copy especially if the logic between your read and write is slow. +### Example Code -``` +```csharp static async Task Client(string userKey) { Random random = new Random(); using var redis = await ConnectionMultiplexer.ConnectAsync(GarnetConnectionStr); var db = redis.GetDatabase(0); - // initially read latest etag - var res = await EtagAbstractions.GetWithEtag(userKey) - long etag = (long)res[0]; - ContosoUserInfo userInfo = res[1]; + // Initially read the latest ETag + var res = await EtagAbstractions.GetWithEtag(userKey); + long etag = res.Item1; + ContosoUserInfo userInfo = res.Item2; + while (true) { token.ThrowIfCancellationRequested(); - // - (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate(db, userKey, etag, userInfo, (ContosoUserInfo info) => + (etag, userInfo) = await ETagAbstractions.PerformLockFreeSafeUpdate( + db, userKey, etag, userInfo, (ContosoUserInfo info) => { info.TooManyCats = info.NumberOfCats % 5 == 0; }); + await Task.Delay(TimeSpan.FromSeconds(random.Next(0, 15)), token); } } +``` -.... -.... -.... -.... - -/// -/// Retrieves an item from the database along with its ETag. -/// -/// The type of the item to retrieve. -/// The database instance to query. -/// The key of the item to retrieve. -/// -/// A tuple containing the ETag as a long and the item casted to type T. -/// If the database call returns null, the ETag will be -1 and the item will be null. -/// +#### Supporting Methods + +```csharp public static async Task<(long, T?)> GetWithEtag(IDatabase db, string key) { var executeResult = await db.ExecuteAsync("GETWITHETAG", key); - // If key is not found we get null - if (executeResult.IsNull) - { - return (-1, default(T)); - } + if (executeResult.IsNull) return (-1, default(T)); RedisResult[] result = (RedisResult[])executeResult!; long etag = (long)result[0]; @@ -145,53 +136,43 @@ public static async Task<(long, T?)> GetWithEtag(IDatabase db, string key) return (etag, item); } -/// -/// Performs a lock-free update on an item in the database using a compare-and-swap mechanism. -/// -/// The type of the item to be updated. -/// The database instance where the item is stored. -/// The key identifying the item in the database. -/// The initial ETag value of the item. -/// The initial state of the item. -/// The action to perform on the item before updating it in the database. -/// A task that represents the asynchronous operation. The task result contains a tuple with the final ETag value and the updated item. public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) { - // Compare and Swap Updating long etag = initialEtag; T item = initialItem; + while (true) { - // perform custom action, since item is updated to it's correct latest state by the server this action is performed exactly once on - // an item before it is finally updated on the server updateAction(item); - var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); + var (updated, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); etag = newEtag; item = newItem; - if (updatedSuccesful) - break; + if (updated) break; } return (etag, item); } -private static async Task<(bool updated, long etag, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) +private static async Task<(bool, long, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) { - // You may notice the "!" that is because we know that SETIFMATCH doesn't return null - string serializedItem = JsonSerializer.Serialize(value); + string serializedItem = JsonSerializer.Serialize(value); RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; - if (res[1].IsNull) - return (true, (long)res[0], value); + if (res[1].IsNull) return (true, (long)res[0], value); T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; return (false, (long)res[0], deserializedItem); } - ``` -Please refer to the Garnet Samples to see complete examples. +Every read-(extra logic/modify)-write call starts by first reading the latest etag and value for a key using `GETWITHETAG` [here](./docs/commands/garnet-specific-commands#getwithetag), it then wraps it's update logic in a callback action and then calls the `PerformLockFreeSafeUpdate` method in `ETagAbstractions` to safely apply the update. + +Internally the `PerformLockFreeSafeUpdate` method runs a loop that retrieves the data that performs your update on the object and sends a `SETIFMATCH` request, the server only then updates the value if your ETag indicates that at the time of your decision you had performed your update on the latest copy of the data. If the server sees that between your read and write there were any updates the value, the server sends the latest copy of the data along with the updated etag, your client code then reapplies the changes on the latest copy and resends the request back to the server for the update, this form of update will guarantees that eventually all changes synchronize themselves on the server one after other. + +In a read-heavy system where contention is not high on the same key this update will be performed in the very first loop itself, and be easier to manage than having a custom transaction. However, in a heavy key contention scenario this could result in multiple attempts to write to the latest copy especially if the logic between your read and write is slow. + +--- -ETags are not a silver bullet to your transaction overhead. However, in a system where the lock contention on the same key is low, ETags can be used to avoid transaction locking overhead as well as reduce network bandwidth usage. +ETags are not a silver bullet. However, in low contention scenarios, they reduce transaction overhead and network bandwidth usage, offering a lightweight alternative to traditional locking mechanisms. \ No newline at end of file From 8b26437efe3d7478913a1d79bc34e341f8a57ea6 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Tue, 21 Jan 2025 11:58:51 -0800 Subject: [PATCH 05/15] Move code to samples dir --- Garnet.sln | 4 ++-- {playground => samples}/ETag/Caching.cs | 0 {playground => samples}/ETag/ETag.csproj | 0 {playground => samples}/ETag/EtagAbstractions.cs | 0 {playground => samples}/ETag/OccSimulation.cs | 2 +- {playground => samples}/ETag/Program.cs | 0 6 files changed, 3 insertions(+), 3 deletions(-) rename {playground => samples}/ETag/Caching.cs (100%) rename {playground => samples}/ETag/ETag.csproj (100%) rename {playground => samples}/ETag/EtagAbstractions.cs (100%) rename {playground => samples}/ETag/OccSimulation.cs (99%) rename {playground => samples}/ETag/Program.cs (100%) diff --git a/Garnet.sln b/Garnet.sln index cdb5aa6472..e0cffb087b 100644 --- a/Garnet.sln +++ b/Garnet.sln @@ -111,7 +111,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Garnet.resources", "libs\re EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NoOpModule", "playground\NoOpModule\NoOpModule.csproj", "{D4C9A1A0-7053-F072-21F5-4E0C5827136D}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ETag", "playground\ETag\ETag.csproj", "{4FBA1587-BAFC-49F8-803A-D1CF431A26F5}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ETag", "samples\ETag\ETag.csproj", "{4FBA1587-BAFC-49F8-803A-D1CF431A26F5}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -380,7 +380,7 @@ Global {DF2DD03E-87EE-482A-9FBA-6C8FBC23BDC5} = {697766CD-2046-46D9-958A-0FD3B46C98D4} {A48412B4-FD60-467E-A5D9-F155CAB4F907} = {147FCE31-EC09-4C90-8E4D-37CA87ED18C3} {D4C9A1A0-7053-F072-21F5-4E0C5827136D} = {69A71E2C-00E3-42F3-854E-BE157A24834E} - {4FBA1587-BAFC-49F8-803A-D1CF431A26F5} = {69A71E2C-00E3-42F3-854E-BE157A24834E} + {4FBA1587-BAFC-49F8-803A-D1CF431A26F5} = {7068BB97-1958-4060-B5F1-859464592E56} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {2C02C405-4798-41CA-AF98-61EDFEF6772E} diff --git a/playground/ETag/Caching.cs b/samples/ETag/Caching.cs similarity index 100% rename from playground/ETag/Caching.cs rename to samples/ETag/Caching.cs diff --git a/playground/ETag/ETag.csproj b/samples/ETag/ETag.csproj similarity index 100% rename from playground/ETag/ETag.csproj rename to samples/ETag/ETag.csproj diff --git a/playground/ETag/EtagAbstractions.cs b/samples/ETag/EtagAbstractions.cs similarity index 100% rename from playground/ETag/EtagAbstractions.cs rename to samples/ETag/EtagAbstractions.cs diff --git a/playground/ETag/OccSimulation.cs b/samples/ETag/OccSimulation.cs similarity index 99% rename from playground/ETag/OccSimulation.cs rename to samples/ETag/OccSimulation.cs index 354d59b779..4a1c09ea20 100644 --- a/playground/ETag/OccSimulation.cs +++ b/samples/ETag/OccSimulation.cs @@ -80,7 +80,7 @@ public static async Task RunSimulation() break; } - Action userUpdateAction = (userInfo) => {}; + Action userUpdateAction = (userInfo) => { }; switch (input) { case "FirstName": diff --git a/playground/ETag/Program.cs b/samples/ETag/Program.cs similarity index 100% rename from playground/ETag/Program.cs rename to samples/ETag/Program.cs From 32e6a3a5d7181572d6c98b6b8f7d505dc2708170 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Tue, 21 Jan 2025 12:38:14 -0800 Subject: [PATCH 06/15] fmt --- samples/ETag/Caching.cs | 6 +----- samples/ETag/EtagAbstractions.cs | 2 +- samples/ETag/OccSimulation.cs | 2 +- samples/ETag/Program.cs | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/samples/ETag/Caching.cs b/samples/ETag/Caching.cs index e5942d87cb..3009324e7a 100644 --- a/samples/ETag/Caching.cs +++ b/samples/ETag/Caching.cs @@ -188,9 +188,5 @@ private static string GenerateLargeLoremIpsumText(int sizeInBytes) return stringBuilder.ToString(); } - private static readonly string[] CommonWords = - [ - "The", "Amazing", "Incredible", "Fantastic", "Journey", "Adventure", "Mystery", "Legend", "Quest", "Saga", - "John", "Jane", "Smith", "Doe", "Alice", "Bob", "Charlie", "David", "Eve", "Frank" - ]; + private static readonly string[] CommonWords = ["The", "Amazing", "Incredible", "Fantastic", "Journey", "Adventure", "Mystery", "Legend", "Quest", "Saga", "John", "Jane", "Smith", "Doe", "Alice", "Bob", "Charlie", "David", "Eve", "Frank"]; } \ No newline at end of file diff --git a/samples/ETag/EtagAbstractions.cs b/samples/ETag/EtagAbstractions.cs index 7f3a1d05df..c0ad17cad0 100644 --- a/samples/ETag/EtagAbstractions.cs +++ b/samples/ETag/EtagAbstractions.cs @@ -108,4 +108,4 @@ public static class ETagAbstractions T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; return (false, (long)res[0], deserializedItem); } -} +} \ No newline at end of file diff --git a/samples/ETag/OccSimulation.cs b/samples/ETag/OccSimulation.cs index 4a1c09ea20..2e71f09184 100644 --- a/samples/ETag/OccSimulation.cs +++ b/samples/ETag/OccSimulation.cs @@ -1,10 +1,10 @@ -using StackExchange.Redis; using System; using System.Collections.Generic; using System.Text.Json; using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; +using StackExchange.Redis; namespace ETag { diff --git a/samples/ETag/Program.cs b/samples/ETag/Program.cs index ae6b3e1a45..c1f324a5b0 100644 --- a/samples/ETag/Program.cs +++ b/samples/ETag/Program.cs @@ -11,4 +11,4 @@ static async Task Main(string[] args) // await OccSimulation.RunSimulation(); await Caching.RunSimulation(); } -} +} \ No newline at end of file From b30b002a88d38f0e30638454eca209e0ee1cfb89 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Tue, 21 Jan 2025 12:45:33 -0800 Subject: [PATCH 07/15] fix links --- website/blog/2025-01-18-etag-when-and-how.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index ae39d07196..00b7e590c6 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -9,7 +9,7 @@ tags: [garnet, concurrency, caching, lock-free, etags] ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control** and **more efficient network bandwidth utilization for caching.** Currently, Garnet provides ETags as raw strings. This feature is available without requiring any migration, allowing your existing key-value pairs to start leveraging ETags immediately without impacting performance metrics. -You can find the [ETag API documentation here](./docs/commands/garnet-specific-commands#native-etag-support). +You can find the [ETag API documentation here](/docs/commands/garnet-specific-commands#native-etag-support). This article explores when and how you can use this new Garnet feature for both your current and future applications. @@ -57,7 +57,7 @@ In the absence of ETags, the entire payload for `k1` is returned on every read, While this might not matter when transferring small payloads (e.g., 100 bytes of data within a high-bandwidth local network), it becomes significant when you have **multiple machines egressing larger payloads (e.g., 1MB each)** on a cloud provider. You pay the cost of egress, bandwidth usage, and experience delays due to the transmission of larger amounts of data. -To address this, Garnet provides the `GETIFNOTMATCH` API [here](./docs/commands/garnet-specific-commands#getifnotmatch). +To address this, Garnet provides the `GETIFNOTMATCH` API [here](/docs/commands/garnet-specific-commands#getifnotmatch). , allowing you to fetch data only if it has changed since your last retrieval. Server 1 can store the ETag received in the initial payload in application memory and use `GETIFNOTMATCH` to refresh the local copy only if the value has changed. This approach is particularly beneficial in read-heavy systems where data changes infrequently. However, for frequently updated keys, using the regular `GET` API may still be preferable, as updated data will always need to be transmitted. @@ -90,7 +90,7 @@ Without ETags, the following sequence of events might occur: This race condition results in lost updates. -With ETags, you can use the `SETIFMATCH` API [here](./docs/commands/garnet-specific-commands#setifmatch) to implement a **compare-and-swap** mechanism that guarantees no updates are lost. The following code snippets demonstrate how this can be achieved. +With ETags, you can use the `SETIFMATCH` API [here](/docs/commands/garnet-specific-commands#setifmatch) to implement a **compare-and-swap** mechanism that guarantees no updates are lost. The following code snippets demonstrate how this can be achieved. --- From 2346cbe3f01ce108efdfa7c38dbd7407f601d428 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Tue, 21 Jan 2025 12:52:15 -0800 Subject: [PATCH 08/15] fix last link --- website/blog/2025-01-18-etag-when-and-how.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index 00b7e590c6..68ac0d6eab 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -13,6 +13,8 @@ You can find the [ETag API documentation here](/docs/commands/garnet-specific-co This article explores when and how you can use this new Garnet feature for both your current and future applications. + + --- ## Why Read This Article? @@ -23,6 +25,7 @@ If you're looking to: We'll cover these scenarios case by case. + --- ## Reducing Network Bandwidth Utilization for Caching @@ -167,7 +170,7 @@ private static async Task<(bool, long, T)> _updateItemIfMatch(IDatabase db, l } ``` -Every read-(extra logic/modify)-write call starts by first reading the latest etag and value for a key using `GETWITHETAG` [here](./docs/commands/garnet-specific-commands#getwithetag), it then wraps it's update logic in a callback action and then calls the `PerformLockFreeSafeUpdate` method in `ETagAbstractions` to safely apply the update. +Every read-(extra logic/modify)-write call starts by first reading the latest etag and value for a key using `GETWITHETAG` [here](/docs/commands/garnet-specific-commands#getwithetag), it then wraps it's update logic in a callback action and then calls the `PerformLockFreeSafeUpdate` method in `ETagAbstractions` to safely apply the update. Internally the `PerformLockFreeSafeUpdate` method runs a loop that retrieves the data that performs your update on the object and sends a `SETIFMATCH` request, the server only then updates the value if your ETag indicates that at the time of your decision you had performed your update on the latest copy of the data. If the server sees that between your read and write there were any updates the value, the server sends the latest copy of the data along with the updated etag, your client code then reapplies the changes on the latest copy and resends the request back to the server for the update, this form of update will guarantees that eventually all changes synchronize themselves on the server one after other. From 96e5b495bc1635a6a66a7468c34486def986bcda Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Tue, 21 Jan 2025 13:58:24 -0800 Subject: [PATCH 09/15] fix rmwmethod --- libs/server/Storage/Functions/MainStore/RMWMethods.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/server/Storage/Functions/MainStore/RMWMethods.cs b/libs/server/Storage/Functions/MainStore/RMWMethods.cs index c863471647..eb8b5baa41 100644 --- a/libs/server/Storage/Functions/MainStore/RMWMethods.cs +++ b/libs/server/Storage/Functions/MainStore/RMWMethods.cs @@ -351,8 +351,9 @@ private bool InPlaceUpdaterWorker(ref SpanByte key, ref RawStringInput input, re // Increment the ETag long newEtag = functionsState.etagState.etag + 1; - value.ShrinkSerializedLength(metadataSize + inputValue.Length + EtagConstants.EtagSize); rmwInfo.ClearExtraValueLength(ref recordInfo, ref value, value.TotalSize); + value.UnmarkExtraMetadata(); + value.ShrinkSerializedLength(metadataSize + inputValue.Length + EtagConstants.EtagSize); rmwInfo.SetUsedValueLength(ref recordInfo, ref value, value.TotalSize); value.SetEtagInPayload(newEtag); From 5028eebbb05639ec0838cc28a28327c2ad652ff2 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Fri, 21 Feb 2025 22:32:50 -0800 Subject: [PATCH 10/15] Update doc and blog --- samples/ETag/EtagAbstractions.cs | 17 ++- samples/ETag/Program.cs | 4 +- website/blog/2025-01-18-etag-when-and-how.md | 110 ++++++++++++++++--- website/docs/commands/garnet-specific.md | 2 +- 4 files changed, 106 insertions(+), 27 deletions(-) diff --git a/samples/ETag/EtagAbstractions.cs b/samples/ETag/EtagAbstractions.cs index c0ad17cad0..25d3526304 100644 --- a/samples/ETag/EtagAbstractions.cs +++ b/samples/ETag/EtagAbstractions.cs @@ -18,7 +18,7 @@ public static class ETagAbstractions /// The initial state of the item. /// The action to perform on the item before updating it in the database. /// A task that represents the asynchronous operation. The task result contains a tuple with the final ETag value and the updated item. - public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) + public static async Task<(long, T?)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) { // Compare and Swap Updating long etag = initialEtag; @@ -31,6 +31,12 @@ public static class ETagAbstractions updateAction(item); var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); + if (newItem == null) + { + // item was deleted, break out of the loop + return (newEtag, newItem); + } + etag = newEtag; item = newItem; @@ -96,16 +102,15 @@ public static class ETagAbstractions return (etag, item); } - private static async Task<(bool updated, long etag, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) + private static async Task<(bool updated, long etag, T?)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) { - // You may notice the "!" that is because we know that SETIFMATCH doesn't return null string serializedItem = JsonSerializer.Serialize(value); + // You may notice the "!" that is because we know that SETIFMATCH doesn't return null RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; - if (res[1].IsNull) - return (true, (long)res[0], value); + T? deserializedItem = res[1].IsNull ? default(T) : + JsonSerializer.Deserialize((string)res[1]!)!; - T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; return (false, (long)res[0], deserializedItem); } } \ No newline at end of file diff --git a/samples/ETag/Program.cs b/samples/ETag/Program.cs index c1f324a5b0..e34c6502e1 100644 --- a/samples/ETag/Program.cs +++ b/samples/ETag/Program.cs @@ -8,7 +8,7 @@ static async Task Main(string[] args) { // Uncomment whichever example you want to run - // await OccSimulation.RunSimulation(); - await Caching.RunSimulation(); + await OccSimulation.RunSimulation(); + // await Caching.RunSimulation(); } } \ No newline at end of file diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index 68ac0d6eab..a16ad09533 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -6,7 +6,7 @@ tags: [garnet, concurrency, caching, lock-free, etags] --- **Garnet recently announced native support for ETag-based commands.** -ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control** and **more efficient network bandwidth utilization for caching.** +ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control**, **more efficient network bandwidth utilization for caching**, and **keeping cache consistent with the database.** Currently, Garnet provides ETags as raw strings. This feature is available without requiring any migration, allowing your existing key-value pairs to start leveraging ETags immediately without impacting performance metrics. You can find the [ETag API documentation here](/docs/commands/garnet-specific-commands#native-etag-support). @@ -20,14 +20,88 @@ This article explores when and how you can use this new Garnet feature for both ## Why Read This Article? If you're looking to: -1. **Reduce network bandwidth utilization for caching.** -2. **Avoid the cost of transactions when working with non-atomic values in your cache store.** +1. **Keep your cache consistent with your Database for contentended keys** +2. **Reduce network bandwidth utilization for caching.** +3. **Avoid the cost of transactions when working with non-atomic values in your cache store.** We'll cover these scenarios case by case. --- +## Keeping Cache consistent state with Database for High Contention Keys with Concurrent Clients + +In a distributed environment it is common to have a cache being updated along with your main source of truth database. This is not a challenge when every client is interacting with keys with no contention. However, in cases where a key in your cache is being updated concurrently by multiple clients as they coordinate writing to the database and then updating the cache. In such a situation whichever client writes to the cache last decides the final state (last write wins), even if they were not the last client to update the database! + +To make the above scenario consider a cache-database setup where 2 clients are interacting with the pair. They both follow the same protocol where on writes they first update the database, and then update the cache. We will denote each client by c1 and c2, the request to update the datbase as D, and the request to update the cache as C. +All is good when the sequence of events is as follows: +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state) + c1->>cache: C (older state) + c2->>db: D (latest state) + c2->>cache: C (latest state) +``` + +If c1 finishes its database-cache update protocol strictly before c2 starts its protocol. Even the case of interleaving is not entirely bound to produce errors! In the below sequencing the cache will eventually be consistent with the database! + +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state) + c2->>db: D (latest state) + c1->>cache: C (older state) + c2->>cache: C (latest state) +``` + +However, in the case where between c1 updating the database and the cache, c2 happens to interleave such that it updates the database and the cache before c1, we hit a consistency issue! +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state) + c2->>db: D (latest state) + c2->>cache: C (latest state) + c1->>cache: C (older state) +``` +In the above sequencing you will see that c2 is the last write to the database but c1 has the last write to the cache. So the cache and datbase have gone out of sync. + +For handling such cases we can rely on the newly introduced ETag feature in Garnet to construct a logical clock around the updates that protect establishing cache consistency (*provided your database also supports ETags such as Cosmos DB). + +In such a scenario the client should use our `SETIFGREATER` API [here](/docs/commands/garnet-specific-commands#setifgreater), when interacting with the cache. `SETIFGREATER` sends a key-value pair along with an etag from the client, and only sets the value if the sent ETag is greater than what is set against the key-value pair already. + +Every client would now follow the following protocol: +- `SETIFGREATER` or `SETIFMATCH` [API](/docs/commands/garnet-specific-commands#setifmatch) based update to the source of truth first. +- Use the retrieved ETag from our previous call as an argument for SETIFGREATER to update the cache. + +If every client follows the above protocol. We can ensure that only the last/latest database write is reflected in the client as well. +The same sequencing of events but with the clients following our new updated protocol. +```mermaid +sequenceDiagram + participant c1 as Client 1 + participant db as Database + participant cache as Cache + participant c2 as Client 2 + + c1->>db: D (older state with etag 'x') + c2->>db: D (latest state with etag 'x+1') + c2->>cache: C (latest state with etag 'x+1') + c1->>cache: C (older state REJECTED because latest etag on server ('x+1' is higher than sent etag 'x') +``` + + ## Reducing Network Bandwidth Utilization for Caching Every network call incurs a cost: the amount of data transmitted and the distance over which it travels. In performance-sensitive scenarios, it's beneficial to fetch data only if it has changed in the cache, thereby reducing bandwidth usage and network latency. @@ -36,24 +110,24 @@ Every network call incurs a cost: the amount of data transmitted and the distanc Consider the following setup: #### High-Level Diagram -``` -(server 1)----[server 1 reads from cache]----(cache) - | - | - [server 2 writes to the cache, invalidating whatever server 1 had read] - | - | - (server 2) +```mermaid +graph TD + S1[Server 1] -->|reads from cache| C[Cache] + S2[Server 2] -->|writes to cache, invalidating server 1's read| C ``` #### Sequence Diagram -``` - server 1 cache server 2 -1. initial read from cache for k1-----------------------> -2. <----------------------------------------Send Data and ETag - <-------------------------update value for k1 (invalidates k1) -3. second read to cache for k1 -------------------------> - <--------------------------------------(What is sent back?) +```mermaid +sequenceDiagram + participant S1 as Server 1 + participant C as Cache + participant S2 as Server 2 + + S1->>C: initial read from cache for k1 + C-->>S1: Send Data and ETag + S2->>C: update value for k1 (invalidates k1) + S1->>C: second read to cache for k1 + C-->>S1: (What is sent back?) ``` In the absence of ETags, the entire payload for `k1` is returned on every read, regardless of whether the value associated with `k1` has changed. diff --git a/website/docs/commands/garnet-specific.md b/website/docs/commands/garnet-specific.md index 699db52412..556b88b9ab 100644 --- a/website/docs/commands/garnet-specific.md +++ b/website/docs/commands/garnet-specific.md @@ -147,8 +147,8 @@ for details. Garnet provides support for ETags on raw strings. By using the ETag-related commands outlined below, you can associate any **string based key-value pair** inserted into Garnet with an automatically updated ETag. Compatibility with non-ETag commands and the behavior of data inserted with ETags are detailed at the end of this document. - To initialize a key value pair with an ETag you can use either the SET command with the newly added "WITHETAG" optional flag, or you can take any existing Key value pair and call SETIFMATCH with the ETag argument as 0 (Any key value pair without an explicit ETag has an ETag of 0 implicitly). **You can read more about setting an initial ETag via SET [here](../commands/raw-string#set)** + --- ### **SET (WITHETAG)** From 9abee772011133bd1a8f4ebf9da3dc2fecca8932 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Fri, 21 Feb 2025 22:46:04 -0800 Subject: [PATCH 11/15] doc updates --- website/blog/2025-01-18-etag-when-and-how.md | 4 ++-- website/docs/commands/garnet-specific.md | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index a16ad09533..f20b502b99 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -6,7 +6,7 @@ tags: [garnet, concurrency, caching, lock-free, etags] --- **Garnet recently announced native support for ETag-based commands.** -ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control**, **more efficient network bandwidth utilization for caching**, and **keeping cache consistent with the database.** +ETags are a powerful feature that enable techniques such as **Optimistic Concurrency Control**, **more efficient network bandwidth utilization for caching**, and **keeping your cache consistent with the database.** Currently, Garnet provides ETags as raw strings. This feature is available without requiring any migration, allowing your existing key-value pairs to start leveraging ETags immediately without impacting performance metrics. You can find the [ETag API documentation here](/docs/commands/garnet-specific-commands#native-etag-support). @@ -252,4 +252,4 @@ In a read-heavy system where contention is not high on the same key this update --- -ETags are not a silver bullet. However, in low contention scenarios, they reduce transaction overhead and network bandwidth usage, offering a lightweight alternative to traditional locking mechanisms. \ No newline at end of file +ETags are more of a lower level primitives that you can use to build abstractions that let you build logical clocks, and lock free transactions tailored to your needs. If you find yourself in the above commonly found distributed scenarios, you now have another tool in your toolbag to help overcome your scaling needs. \ No newline at end of file diff --git a/website/docs/commands/garnet-specific.md b/website/docs/commands/garnet-specific.md index 556b88b9ab..6cc802ffd3 100644 --- a/website/docs/commands/garnet-specific.md +++ b/website/docs/commands/garnet-specific.md @@ -147,7 +147,8 @@ for details. Garnet provides support for ETags on raw strings. By using the ETag-related commands outlined below, you can associate any **string based key-value pair** inserted into Garnet with an automatically updated ETag. Compatibility with non-ETag commands and the behavior of data inserted with ETags are detailed at the end of this document. -To initialize a key value pair with an ETag you can use either the SET command with the newly added "WITHETAG" optional flag, or you can take any existing Key value pair and call SETIFMATCH with the ETag argument as 0 (Any key value pair without an explicit ETag has an ETag of 0 implicitly). **You can read more about setting an initial ETag via SET [here](../commands/raw-string#set)** +To initialize a key value pair with an ETag you can use either the SET command with the newly added "WITHETAG" optional flag, or you can take any existing Key value pair and call SETIFMATCH with the ETag argument as 0 (Any key value pair without an explicit ETag has an ETag of 0 implicitly). Read more about Etag use cases and patterns [here](../../blog/etags-when-and-how) + --- From 91987e8320af4f6bf874e1de01d9355a085f048f Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Sat, 22 Feb 2025 13:21:48 -0800 Subject: [PATCH 12/15] Fmt --- samples/ETag/EtagAbstractions.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/ETag/EtagAbstractions.cs b/samples/ETag/EtagAbstractions.cs index 25d3526304..021cf2e649 100644 --- a/samples/ETag/EtagAbstractions.cs +++ b/samples/ETag/EtagAbstractions.cs @@ -108,7 +108,7 @@ public static class ETagAbstractions // You may notice the "!" that is because we know that SETIFMATCH doesn't return null RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; - T? deserializedItem = res[1].IsNull ? default(T) : + T? deserializedItem = res[1].IsNull ? default(T) : JsonSerializer.Deserialize((string)res[1]!)!; return (false, (long)res[0], deserializedItem); From dc2fd031c925af7bb03be9d6d116328cd7358dec Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Sun, 23 Feb 2025 09:11:52 -0800 Subject: [PATCH 13/15] update code and docs --- samples/ETag/EtagAbstractions.cs | 26 +++++++------------- website/blog/2025-01-18-etag-when-and-how.md | 25 +++++++++++-------- website/docs/commands/garnet-specific.md | 4 --- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/samples/ETag/EtagAbstractions.cs b/samples/ETag/EtagAbstractions.cs index 021cf2e649..15441f74db 100644 --- a/samples/ETag/EtagAbstractions.cs +++ b/samples/ETag/EtagAbstractions.cs @@ -18,7 +18,7 @@ public static class ETagAbstractions /// The initial state of the item. /// The action to perform on the item before updating it in the database. /// A task that represents the asynchronous operation. The task result contains a tuple with the final ETag value and the updated item. - public static async Task<(long, T?)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) + public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) { // Compare and Swap Updating long etag = initialEtag; @@ -29,18 +29,11 @@ public static class ETagAbstractions // an item before it is finally updated on the server. // NOTE: Based on your application's needs you can modify this method to update a pure function that returns a copy of the data and does not use mutations as side effects. updateAction(item); - var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); - if (newItem == null) - { - // item was deleted, break out of the loop - return (newEtag, newItem); - } - etag = newEtag; - item = newItem; - - if (updatedSuccesful) + if (!updatedSuccesful) + item = newItem!; + else break; } @@ -92,9 +85,7 @@ public static class ETagAbstractions var executeResult = await db.ExecuteAsync("GETWITHETAG", key); // If key is not found we get null if (executeResult.IsNull) - { return (-1, default(T)); - } RedisResult[] result = (RedisResult[])executeResult!; long etag = (long)result[0]; @@ -102,14 +93,15 @@ public static class ETagAbstractions return (etag, item); } - private static async Task<(bool updated, long etag, T?)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) + private static async Task<(bool updated, long etag, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) { string serializedItem = JsonSerializer.Serialize(value); - // You may notice the "!" that is because we know that SETIFMATCH doesn't return null RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; + // successful update does not return updated value so we can just return what was passed for value. + if (res[1].IsNull) + return (true, (long)res[0], value); - T? deserializedItem = res[1].IsNull ? default(T) : - JsonSerializer.Deserialize((string)res[1]!)!; + T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; return (false, (long)res[0], deserializedItem); } diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index f20b502b99..d94bd81a0b 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -215,31 +215,36 @@ public static async Task<(long, T?)> GetWithEtag(IDatabase db, string key) public static async Task<(long, T)> PerformLockFreeSafeUpdate(IDatabase db, string key, long initialEtag, T initialItem, Action updateAction) { + // Compare and Swap Updating long etag = initialEtag; T item = initialItem; - while (true) { + // perform custom action, since item is updated to it's correct latest state by the server this action is performed exactly once on + // an item before it is finally updated on the server. + // NOTE: Based on your application's needs you can modify this method to update a pure function that returns a copy of the data and does not use mutations as side effects. updateAction(item); - - var (updated, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); + var (updatedSuccesful, newEtag, newItem) = await _updateItemIfMatch(db, etag, key, item); etag = newEtag; - item = newItem; - - if (updated) break; + if (!updatedSuccesful) + item = newItem!; + else + break; } return (etag, item); } -private static async Task<(bool, long, T)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) +private static async Task<(bool updated, long etag, T?)> _updateItemIfMatch(IDatabase db, long etag, string key, T value) { - string serializedItem = JsonSerializer.Serialize(value); + string serializedItem = JsonSerializer.Serialize(value); RedisResult[] res = (RedisResult[])(await db.ExecuteAsync("SETIFMATCH", key, serializedItem, etag))!; - - if (res[1].IsNull) return (true, (long)res[0], value); + // successful update does not return updated value so we can just return what was passed for value. + if (res[1].IsNull) + return (true, (long)res[0], value); T deserializedItem = JsonSerializer.Deserialize((string)res[1]!)!; + return (false, (long)res[0], deserializedItem); } ``` diff --git a/website/docs/commands/garnet-specific.md b/website/docs/commands/garnet-specific.md index 6cc802ffd3..e22664f6b8 100644 --- a/website/docs/commands/garnet-specific.md +++ b/website/docs/commands/garnet-specific.md @@ -214,8 +214,6 @@ Sets/updates a key value pair with the given etag only if (1) the etag given in #### **Response** -One of the following: - - **Array reply**: If the sent etag matches the existing etag the reponse will be an array where the first item is the updated etag, and the second value is nil. If the etags do not match then the response array will hold the latest etag, and the latest value in order. --- @@ -236,8 +234,6 @@ Sets/updates a key value pair with the given etag only if (1) the etag given in #### **Response** -One of the following: - - **Array reply**: If the sent etag is greater than the existing etag then an array where the first item is the updated etag, and the second value is nil is returned. If the sentEtag is less than or equal to the existing etag then the response array will hold the latest etag, and the latest value in order. --- From b44815e02dd7dd6a423699c66842317507ff7824 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Sun, 23 Feb 2025 09:16:48 -0800 Subject: [PATCH 14/15] Fmt --- website/blog/2025-01-18-etag-when-and-how.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index d94bd81a0b..562549d7e6 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -20,7 +20,7 @@ This article explores when and how you can use this new Garnet feature for both ## Why Read This Article? If you're looking to: -1. **Keep your cache consistent with your Database for contentended keys** +1. **Keep your cache consistent with your Database for keys with contention** 2. **Reduce network bandwidth utilization for caching.** 3. **Avoid the cost of transactions when working with non-atomic values in your cache store.** From f62c5f14a10753ecbb217e10101a1cef17f61622 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Sun, 23 Feb 2025 09:32:51 -0800 Subject: [PATCH 15/15] co-auth badrishc --- website/blog/2025-01-18-etag-when-and-how.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/2025-01-18-etag-when-and-how.md b/website/blog/2025-01-18-etag-when-and-how.md index 562549d7e6..3a1c859385 100644 --- a/website/blog/2025-01-18-etag-when-and-how.md +++ b/website/blog/2025-01-18-etag-when-and-how.md @@ -1,7 +1,7 @@ --- slug: etags-when-and-how title: ETags, When and How -authors: hkhalid +authors: [hkhalid, badrishc] tags: [garnet, concurrency, caching, lock-free, etags] ---