Skip to content

Commit

Permalink
GH-986 Use code hash instead of executing action id to avoid issue wi…
Browse files Browse the repository at this point in the history
…th action id not matching currently running contract code
  • Loading branch information
heifner committed Nov 21, 2024
1 parent be4ee80 commit 966273e
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 56 deletions.
4 changes: 2 additions & 2 deletions libraries/chain/include/eosio/chain/wasm_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ namespace eosio { namespace chain {
// returns true if EOS VM OC is enabled
bool is_eos_vm_oc_enabled() const;

// return internal executing action id, used for testing
uint64_t get_executing_action_id() const;
// return number of wasm execution interrupted by eos vm oc compile completing, used for testing
uint64_t get_eos_vm_oc_compile_interrupt_count() const;
#endif

//call before dtor to skip what can be minutes of dtor overhead with some runtimes; can cause leaks
Expand Down
52 changes: 27 additions & 25 deletions libraries/chain/include/eosio/chain/wasm_interface_private.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <eosio/chain/code_object.hpp>
#include <eosio/chain/global_property_object.hpp>
#include <eosio/chain/exceptions.hpp>
#include <eosio/chain/thread_utils.hpp>
#include <fc/scoped_exit.hpp>

#include "IR/Module.h"
Expand Down Expand Up @@ -100,8 +101,8 @@ struct eosvmoc_tier {
#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
if(eosvmoc_tierup != wasm_interface::vm_oc_enable::oc_none) {
EOS_ASSERT(vm != wasm_interface::vm_type::eos_vm_oc, wasm_exception, "You can't use EOS VM OC as the base runtime when tier up is activated");
eosvmoc = std::make_unique<eosvmoc_tier>(data_dir, eosvmoc_config, d, [this](boost::asio::io_context& ctx, uint64_t executing_action_id, fc::time_point queued_time) {
async_compile_complete(ctx, executing_action_id, queued_time);
eosvmoc = std::make_unique<eosvmoc_tier>(data_dir, eosvmoc_config, d, [this](boost::asio::io_context& ctx, const digest_type& code_id, fc::time_point queued_time) {
async_compile_complete(ctx, code_id, queued_time);
});
}
#endif
Expand All @@ -111,19 +112,18 @@ struct eosvmoc_tier {

#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
// called from async thread
void async_compile_complete(boost::asio::io_context& ctx, uint64_t exec_action_id, fc::time_point queued_time) {
if (exec_action_id == executing_action_id) { // is action still executing?
void async_compile_complete(boost::asio::io_context& ctx, const digest_type& code_id, fc::time_point queued_time) {
if (executing_code_hash.load() == code_id) { // is action still executing?
auto elapsed = fc::time_point::now() - queued_time;
ilog("EOS VM OC tier up for ${id} compile complete ${t}ms",
("id", exec_action_id)("t", elapsed.count()/1000));
ilog("EOS VM OC tier up for ${id} compile complete ${t}ms", ("id", code_id)("t", elapsed.count()/1000));
auto expire_in = std::max(fc::microseconds(0), fc::milliseconds(500) - elapsed);
std::shared_ptr<boost::asio::steady_timer> timer = std::make_shared<boost::asio::steady_timer>(ctx);
timer->expires_from_now(std::chrono::microseconds(expire_in.count()));
timer->async_wait([timer, this, exec_action_id](const boost::system::error_code& ec) {
timer->async_wait([timer, this, code_id](const boost::system::error_code& ec) {
if (ec)
return;
if (exec_action_id == executing_action_id) {
ilog("EOS VM OC tier up interrupting ${id}", ("id", exec_action_id));
if (executing_code_hash.load() == code_id) {
ilog("EOS VM OC tier up interrupting ${id}", ("id", code_id));
eos_vm_oc_compile_interrupt = true;
main_thread_timer.expire_now();
}
Expand All @@ -135,14 +135,9 @@ struct eosvmoc_tier {
void apply( const digest_type& code_hash, const uint8_t& vm_type, const uint8_t& vm_version, apply_context& context ) {
bool attempt_tierup = false;
#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
auto ex = fc::make_scoped_exit([&]() {
eos_vm_oc_compile_interrupt = false;
++executing_action_id; // indicate no longer executing
});
attempt_tierup = eosvmoc && (eosvmoc_tierup == wasm_interface::vm_oc_enable::oc_all || context.should_use_eos_vm_oc());
const bool allow_oc_interrupt = attempt_tierup && context.is_applying_block() && context.trx_context.has_undo();
if (attempt_tierup) {
const bool allow_oc_interrupt = context.is_applying_block() && context.trx_context.has_undo();
const uint32_t exec_action_id = allow_oc_interrupt ? executing_action_id.load() : 0;
const chain::eosvmoc::code_descriptor* cd = nullptr;
chain::eosvmoc::code_cache_base::get_cd_failure failure = chain::eosvmoc::code_cache_base::get_cd_failure::temporary;
try {
Expand All @@ -158,7 +153,7 @@ struct eosvmoc_tier {
context.trx_context.resume_billing_timer();
});
context.trx_context.pause_billing_timer();
cd = eosvmoc->cc.get_descriptor_for_code(m, exec_action_id, code_hash, vm_version, failure);
cd = eosvmoc->cc.get_descriptor_for_code(m, code_hash, vm_version, failure);
} catch (...) {
// swallow errors here, if EOS VM OC has gone in to the weeds we shouldn't bail: continue to try and run baseline
// In the future, consider moving bits of EOS VM that can fire exceptions and such out of this call path
Expand All @@ -175,23 +170,29 @@ struct eosvmoc_tier {
}
}
#endif
auto ex = fc::make_scoped_exit([&]() {
eos_vm_oc_compile_interrupt = false;
executing_code_hash.store({}); // indicate no longer executing
});
executing_code_hash.store(code_hash);
try {
get_instantiated_module(code_hash, vm_type, vm_version, context.trx_context)->apply(context);
} catch (const interrupt_exception& e) {
if (eos_vm_oc_compile_interrupt) {
wlog("EOS VM OC compile complete id: ${id}, interrupt of ${r} <= ${a}::${act} code ${h}",
("id", executing_action_id.load())("r", context.get_receiver())("a", context.get_action().account)
("act", context.get_action().name)("h", code_hash));
EOS_THROW(interrupt_oc_exception, "EOS VM OC compile complete id: ${id}, interrupt of ${r} <= ${a}::${act} code ${h}",
("id", executing_action_id.load())("r", context.get_receiver())("a", context.get_action().account)
("act", context.get_action().name)("h", code_hash));
if (allow_oc_interrupt && eos_vm_oc_compile_interrupt) {
++eos_vm_oc_compile_interrupt_count;
wlog("EOS VM OC compile complete interrupt of ${r} <= ${a}::${act} code ${h}, interrupt #${c}",
("r", context.get_receiver())("a", context.get_action().account)
("act", context.get_action().name)("h", code_hash)("c", eos_vm_oc_compile_interrupt_count));
EOS_THROW(interrupt_oc_exception, "EOS VM OC compile complete interrupt of ${r} <= ${a}::${act} code ${h}, interrupt #${c}",
("r", context.get_receiver())("a", context.get_action().account)
("act", context.get_action().name)("h", code_hash)("c", eos_vm_oc_compile_interrupt_count));
}
throw;
}
}

// used for testing
uint64_t get_executing_action_id() const { return executing_action_id; }
uint64_t get_eos_vm_oc_compile_interrupt_count() const { return eos_vm_oc_compile_interrupt_count; }

bool is_code_cached(const digest_type& code_hash, const uint8_t& vm_type, const uint8_t& vm_version) const {
// This method is only called from tests; performance is not critical.
Expand Down Expand Up @@ -305,8 +306,9 @@ struct eosvmoc_tier {
platform_timer& main_thread_timer;
const wasm_interface::vm_type wasm_runtime_time;
const wasm_interface::vm_oc_enable eosvmoc_tierup;
std::atomic<uint64_t> executing_action_id{1}; // monotonic increasing for each action apply
large_atomic<digest_type> executing_code_hash{};
std::atomic<bool> eos_vm_oc_compile_interrupt{false};
uint32_t eos_vm_oc_compile_interrupt_count{0}; // for testing

#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
std::unique_ptr<struct eosvmoc_tier> eosvmoc{nullptr}; // used by all threads
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ class code_cache_base {

class code_cache_async : public code_cache_base {
public:
// called from async thread, provides executing_action_id of any compiles spawned by get_descriptor_for_code
using compile_complete_callback = std::function<void(boost::asio::io_context&, uint64_t, fc::time_point)>;
// called from async thread, provides code_id of any compiles spawned by get_descriptor_for_code
using compile_complete_callback = std::function<void(boost::asio::io_context&, const digest_type&, fc::time_point)>;

code_cache_async(const std::filesystem::path& data_dir, const eosvmoc::config& eosvmoc_config,
const chainbase::database& db, compile_complete_callback cb);
Expand All @@ -123,9 +123,8 @@ class code_cache_async : public code_cache_base {
//If code is in cache: returns pointer & bumps to front of MRU list
//If code is not in cache, and not blacklisted, and not currently compiling: return nullptr and kick off compile
//otherwise: return nullptr
const code_descriptor* const
get_descriptor_for_code(mode m, uint64_t executing_action_id,
const digest_type& code_id, const uint8_t& vm_version, get_cd_failure& failure);
const code_descriptor* const get_descriptor_for_code(mode m, const digest_type& code_id, const uint8_t& vm_version,
get_cd_failure& failure);

private:
compile_complete_callback _compile_complete_func; // called from async thread, provides executing_action_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ struct code_tuple {

struct compile_wasm_message {
code_tuple code;
uint64_t executing_action_id{0}; // action id that initiated the compilation
fc::time_point queued_time; // when compilation was queued to begin
std::optional<eosvmoc::subjective_compile_limits> limits;
//Two sent fd: 1) communication socket for result, 2) the wasm to compile
Expand All @@ -37,7 +36,6 @@ struct code_compilation_result_message {
unsigned apply_offset;
int starting_memory_pages;
unsigned initdata_prologue_size;
uint64_t executing_action_id{0}; // action id that initiated the compilation
fc::time_point queued_time; // when compilation was queued to begin
//Two sent fds: 1) wasm code, 2) initial memory snapshot
};
Expand All @@ -54,7 +52,6 @@ struct wasm_compilation_result_message {
code_tuple code;
wasm_compilation_result result;
size_t cache_free_bytes;
uint64_t executing_action_id{0}; // action id that initiated the compilation, copied from compile_wasm_message
fc::time_point queued_time; // when compilation was queued to begin, copied from compile_wasm_message
};

Expand All @@ -69,9 +66,9 @@ using eosvmoc_message = std::variant<initialize_message,
FC_REFLECT(eosio::chain::eosvmoc::initialize_message, )
FC_REFLECT(eosio::chain::eosvmoc::initalize_response_message, (error_message))
FC_REFLECT(eosio::chain::eosvmoc::code_tuple, (code_id)(vm_version))
FC_REFLECT(eosio::chain::eosvmoc::compile_wasm_message, (code)(executing_action_id)(queued_time)(limits))
FC_REFLECT(eosio::chain::eosvmoc::compile_wasm_message, (code)(queued_time)(limits))
FC_REFLECT(eosio::chain::eosvmoc::evict_wasms_message, (codes))
FC_REFLECT(eosio::chain::eosvmoc::code_compilation_result_message, (start)(apply_offset)(starting_memory_pages)(initdata_prologue_size)(executing_action_id)(queued_time))
FC_REFLECT(eosio::chain::eosvmoc::code_compilation_result_message, (start)(apply_offset)(starting_memory_pages)(initdata_prologue_size)(queued_time))
FC_REFLECT(eosio::chain::eosvmoc::compilation_result_unknownfailure, )
FC_REFLECT(eosio::chain::eosvmoc::compilation_result_toofull, )
FC_REFLECT(eosio::chain::eosvmoc::wasm_compilation_result_message, (code)(result)(cache_free_bytes)(executing_action_id)(queued_time))
FC_REFLECT(eosio::chain::eosvmoc::wasm_compilation_result_message, (code)(result)(cache_free_bytes)(queued_time))
4 changes: 2 additions & 2 deletions libraries/chain/wasm_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ namespace eosio { namespace chain {
return my->is_eos_vm_oc_enabled();
}

uint64_t wasm_interface::get_executing_action_id() const {
return my->get_executing_action_id();
uint64_t wasm_interface::get_eos_vm_oc_compile_interrupt_count() const {
return my->get_eos_vm_oc_compile_interrupt_count();
}
#endif

Expand Down
6 changes: 2 additions & 4 deletions libraries/chain/webassembly/runtimes/eos-vm-oc/code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void code_cache_async::wait_on_compile_monitor_message() {
const auto& msg = std::get<wasm_compilation_result_message>(message);
_result_queue.push(msg);

_compile_complete_func(_ctx, msg.executing_action_id, msg.queued_time);
_compile_complete_func(_ctx, msg.code.code_id, msg.queued_time);

process_queued_compiles();

Expand Down Expand Up @@ -151,7 +151,7 @@ std::tuple<size_t, size_t> code_cache_async::consume_compile_thread_queue() {


const code_descriptor* const
code_cache_async::get_descriptor_for_code(mode m, uint64_t executing_action_id, const digest_type& code_id, const uint8_t& vm_version, get_cd_failure& failure) {
code_cache_async::get_descriptor_for_code(mode m, const digest_type& code_id, const uint8_t& vm_version, get_cd_failure& failure) {
//if there are any outstanding compiles, process the result queue now
//When app is in write window, all tasks are running sequentially and read-only threads
//are not running. Safe to update cache entries.
Expand Down Expand Up @@ -202,7 +202,6 @@ code_cache_async::get_descriptor_for_code(mode m, uint64_t executing_action_id,

auto msg = compile_wasm_message{
.code = { code_id, vm_version },
.executing_action_id = executing_action_id,
.queued_time = fc::time_point::now(),
.limits = !m.whitelisted ? _eosvmoc_config.non_whitelisted_limits : std::optional<subjective_compile_limits>{}
};
Expand Down Expand Up @@ -253,7 +252,6 @@ const code_descriptor* const code_cache_sync::get_descriptor_for_code_sync(mode

auto msg = compile_wasm_message{
.code = { code_id, vm_version },
.executing_action_id = 0,
.queued_time = fc::time_point{}, // could use now() if compile time measurement desired
.limits = !m.whitelisted ? _eosvmoc_config.non_whitelisted_limits : std::optional<subjective_compile_limits>{}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ struct compile_monitor_session {

eosvmoc_message trampoline_compile_request = msg;
if(write_message_with_fds(_trampoline_socket, trampoline_compile_request, fds_pass_to_trampoline) == false) {
wasm_compilation_result_message reply{msg.code, compilation_result_unknownfailure{}, _allocator->get_free_memory(), msg.executing_action_id, msg.queued_time};
wasm_compilation_result_message reply{msg.code, compilation_result_unknownfailure{}, _allocator->get_free_memory(), msg.queued_time};
write_message_with_fds(_nodeos_instance_socket, reply);
return;
}
Expand All @@ -123,8 +123,8 @@ struct compile_monitor_session {
return;
auto& [code, socket] = *current_compile_it;
auto [success, message, fds] = read_message_with_fds(socket);
wasm_compilation_result_message reply{code, compilation_result_unknownfailure{}, _allocator->get_free_memory(), 0, fc::time_point{}};

wasm_compilation_result_message reply{code, compilation_result_unknownfailure{}, _allocator->get_free_memory(), fc::time_point{}};

void* code_ptr = nullptr;
void* mem_ptr = nullptr;
Expand All @@ -143,7 +143,6 @@ struct compile_monitor_session {
copy_memfd_contents_to_pointer(code_ptr, fds[0]);
copy_memfd_contents_to_pointer(mem_ptr, fds[1]);

reply.executing_action_id = result.executing_action_id;
reply.queued_time = result.queued_time;
reply.result = code_descriptor {
code.code_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ using namespace IR;
namespace eosio { namespace chain { namespace eosvmoc {

void run_compile(wrapped_fd&& response_sock, wrapped_fd&& wasm_code, uint64_t stack_size_limit,
size_t generated_code_size_limit, uint64_t executing_action_id, fc::time_point queued_time) noexcept { //noexcept; we'll just blow up if anything tries to cross this boundry
size_t generated_code_size_limit, fc::time_point queued_time) noexcept { //noexcept; we'll just blow up if anything tries to cross this boundry
std::vector<uint8_t> wasm = vector_for_memfd(wasm_code);

//ideally we catch exceptions and sent them upstream as strings for easier reporting
Expand All @@ -34,7 +34,6 @@ void run_compile(wrapped_fd&& response_sock, wrapped_fd&& wasm_code, uint64_t st
instantiated_code code = LLVMJIT::instantiateModule(module, stack_size_limit, generated_code_size_limit);

code_compilation_result_message result_message;
result_message.executing_action_id = executing_action_id;
result_message.queued_time = queued_time;

const std::map<unsigned, uintptr_t>& function_to_offsets = code.function_offsets;
Expand Down Expand Up @@ -197,7 +196,7 @@ void run_compile_trampoline(int fd) {
struct rlimit core_limits = {0u, 0u};
setrlimit(RLIMIT_CORE, &core_limits);

run_compile(std::move(fds[0]), std::move(fds[1]), stack_size, generated_code_size_limit, msg.executing_action_id, msg.queued_time);
run_compile(std::move(fds[0]), std::move(fds[1]), stack_size, generated_code_size_limit, msg.queued_time);
_exit(0);
}
else if(pid == -1)
Expand Down
9 changes: 4 additions & 5 deletions unittests/eosvmoc_interrupt_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ BOOST_AUTO_TEST_CASE( wasm_interrupt_test ) { try {
t.set_code( "testapi"_n, test_contracts::test_api_wasm() );
t.produce_block();

auto pre_id = t.control->get_wasm_interface().get_executing_action_id();
auto pre_count = t.control->get_wasm_interface().get_eos_vm_oc_compile_interrupt_count();

// Use an infinite executing action. When oc compile completes it will kill the action and restart it under
// eosvmoc. That action will then fail when it hits the 5000ms deadline.
Expand All @@ -46,11 +46,10 @@ BOOST_AUTO_TEST_CASE( wasm_interrupt_test ) { try {
0, 150, 5000, true, fc::raw::pack(10000000000000000000ULL) ),
deadline_exception );

auto post_id = t.control->get_wasm_interface().get_executing_action_id();
auto post_count = t.control->get_wasm_interface().get_eos_vm_oc_compile_interrupt_count();

// each action uses 1 id, 2 if retried because of oc compile completion interruption
// if post_id == pre_id + 1, then likely that 5000ms above was not long enough for oc compile to complete
BOOST_TEST(post_id == pre_id + 2);
// if post_count == pre_count, then likely that 5000ms above was not long enough for oc compile to complete
BOOST_TEST(post_count == pre_count + 1);

BOOST_REQUIRE_EQUAL( t.validate(), true );
#endif
Expand Down

0 comments on commit 966273e

Please sign in to comment.