Skip to content

Commit

Permalink
fix(pageserver): allow incomplete history in btm-gc-compaction (neond…
Browse files Browse the repository at this point in the history
…atabase#8500)

This pull request (should) fix the failure of test_gc_feedback. See the
explanation in the newly-added test case.

Part of neondatabase#8002

Allow incomplete history for the compaction algorithm.

Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh authored Jul 25, 2024
1 parent 3977e0a commit bea0468
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 9 deletions.
121 changes: 118 additions & 3 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7309,7 +7309,9 @@ mod tests {
(
key,
Lsn(0x80),
Value::WalRecord(NeonWalRecord::wal_append(";0x80")),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
key,
Expand Down Expand Up @@ -7371,7 +7373,9 @@ mod tests {
),
(
Lsn(0x80),
Value::WalRecord(NeonWalRecord::wal_append(";0x80")),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
Lsn(0x90),
Expand All @@ -7380,7 +7384,118 @@ mod tests {
]),
};
assert_eq!(res, expected_res);
// TODO: more tests with mixed image + delta, adding with k-merge test cases; e2e compaction test

// We expect GC-compaction to run with the original GC. This would create a situation that
// the original GC algorithm removes some delta layers b/c there are full image coverage,
// therefore causing some keys to have an incomplete history below the lowest retain LSN.
// For example, we have
// ```plain
// init delta @ 0x10, image @ 0x20, delta @ 0x30 (gc_horizon), image @ 0x40.
// ```
// Now the GC horizon moves up, and we have
// ```plain
// init delta @ 0x10, image @ 0x20, delta @ 0x30, image @ 0x40 (gc_horizon)
// ```
// The original GC algorithm kicks in, and removes delta @ 0x10, image @ 0x20.
// We will end up with
// ```plain
// delta @ 0x30, image @ 0x40 (gc_horizon)
// ```
// Now we run the GC-compaction, and this key does not have a full history.
// We should be able to handle this partial history and drop everything before the
// gc_horizon image.

let history = vec![
(
key,
Lsn(0x20),
Value::WalRecord(NeonWalRecord::wal_append(";0x20")),
),
(
key,
Lsn(0x30),
Value::WalRecord(NeonWalRecord::wal_append(";0x30")),
),
(
key,
Lsn(0x40),
Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x30;0x40")),
),
(
key,
Lsn(0x50),
Value::WalRecord(NeonWalRecord::wal_append(";0x50")),
),
(
key,
Lsn(0x60),
Value::WalRecord(NeonWalRecord::wal_append(";0x60")),
),
(
key,
Lsn(0x70),
Value::WalRecord(NeonWalRecord::wal_append(";0x70")),
),
(
key,
Lsn(0x80),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
key,
Lsn(0x90),
Value::WalRecord(NeonWalRecord::wal_append(";0x90")),
),
];
let res = tline
.generate_key_retention(key, &history, Lsn(0x60), &[Lsn(0x40), Lsn(0x50)], 3)
.await
.unwrap();
let expected_res = KeyHistoryRetention {
below_horizon: vec![
(
Lsn(0x40),
KeyLogAtLsn(vec![(
Lsn(0x40),
Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x30;0x40")),
)]),
),
(
Lsn(0x50),
KeyLogAtLsn(vec![(
Lsn(0x50),
Value::WalRecord(NeonWalRecord::wal_append(";0x50")),
)]),
),
(
Lsn(0x60),
KeyLogAtLsn(vec![(
Lsn(0x60),
Value::WalRecord(NeonWalRecord::wal_append(";0x60")),
)]),
),
],
above_horizon: KeyLogAtLsn(vec![
(
Lsn(0x70),
Value::WalRecord(NeonWalRecord::wal_append(";0x70")),
),
(
Lsn(0x80),
Value::Image(Bytes::copy_from_slice(
b"0x10;0x20;0x30;0x40;0x50;0x60;0x70;0x80",
)),
),
(
Lsn(0x90),
Value::WalRecord(NeonWalRecord::wal_append(";0x90")),
),
]),
};
assert_eq!(res, expected_res);

Ok(())
}

Expand Down
13 changes: 7 additions & 6 deletions pageserver/src/tenant/timeline/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1122,9 +1122,10 @@ impl Timeline {
);
}
}
if let Value::WalRecord(rec) = &history[0].2 {
assert!(rec.will_init(), "no base image");
}
// There was an assertion for no base image that checks if the first
// record in the history is `will_init` before, but it was removed.
// This is explained in the test cases for generate_key_retention.
// Search "incomplete history" for more information.
for lsn in retain_lsn_below_horizon {
assert!(lsn < &horizon, "retain lsn must be below horizon")
}
Expand Down Expand Up @@ -1200,16 +1201,16 @@ impl Timeline {
false
};
replay_history.extend(split_for_lsn.iter().map(|x| (*x).clone()));
if let Some((_, _, val)) = replay_history.first() {
assert!(val.will_init(), "invalid history, no base image");
}
// Only retain the items after the last image record
for idx in (0..replay_history.len()).rev() {
if replay_history[idx].2.will_init() {
replay_history = replay_history[idx..].to_vec();
break;
}
}
if let Some((_, _, val)) = replay_history.first() {
assert!(val.will_init(), "invalid history, no base image");
}
if generate_image && records_since_last_image > 0 {
records_since_last_image = 0;
let history = std::mem::take(&mut replay_history);
Expand Down

0 comments on commit bea0468

Please sign in to comment.