Skip to content

Commit

Permalink
Merge pull request #1027 from AntelopeIO/GH-986-retry-with-oc
Browse files Browse the repository at this point in the history
Restart long-running transaction with OC
  • Loading branch information
heifner authored Dec 4, 2024
2 parents 08f01d6 + c482721 commit 5be1d8b
Show file tree
Hide file tree
Showing 17 changed files with 447 additions and 229 deletions.
2 changes: 1 addition & 1 deletion libraries/chain/controller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1288,7 +1288,7 @@ struct controller_impl {
thread_pool(),
my_finalizers(cfg.finalizers_dir / config::safety_filename),
main_thread_timer(timer), // assumes constructor is called from main thread
wasmif( conf.wasm_runtime, conf.eosvmoc_tierup, db, conf.state_dir, conf.eosvmoc_config, !conf.profile_accounts.empty() )
wasmif( conf.wasm_runtime, conf.eosvmoc_tierup, db, main_thread_timer, conf.state_dir, conf.eosvmoc_config, !conf.profile_accounts.empty() )
{
assert(cfg.chain_thread_pool_size > 0);
thread_pool.start( cfg.chain_thread_pool_size, [this]( const fc::exception& e ) {
Expand Down
2 changes: 2 additions & 0 deletions libraries/chain/include/eosio/chain/exceptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ namespace eosio { namespace chain {
3080010, "Read-only transaction eos-vm-oc compile permanent failure" )
FC_DECLARE_DERIVED_EXCEPTION( interrupt_exception, resource_exhausted_exception,
3080011, "Transaction interrupted by signal" )
FC_DECLARE_DERIVED_EXCEPTION( interrupt_oc_exception, resource_exhausted_exception,
3080012, "Transaction interrupted by oc compile" )

FC_DECLARE_DERIVED_EXCEPTION( authorization_exception, chain_exception,
3090000, "Authorization exception" )
Expand Down
2 changes: 1 addition & 1 deletion libraries/chain/include/eosio/chain/trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ namespace eosio::chain {
block_timestamp_type block_time;
std::optional<block_id_type> producer_block_id;
std::optional<transaction_receipt_header> receipt;
fc::microseconds elapsed;
fc::microseconds elapsed{};
uint64_t net_usage = 0;
bool scheduled = false;
vector<action_trace> action_traces;
Expand Down
15 changes: 10 additions & 5 deletions libraries/chain/include/eosio/chain/transaction_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace eosio::chain {
std::optional<digests_t> digests_l; // legacy
std::optional<digests_t> digests_s; // savanna

action_digests_t(store_which_t sw) {
explicit action_digests_t(store_which_t sw) {
if (sw == store_which_t::legacy || sw == store_which_t::both)
digests_l = digests_t{};
if (sw == store_which_t::savanna || sw == store_which_t::both)
Expand Down Expand Up @@ -100,6 +100,10 @@ namespace eosio::chain {

class transaction_context {
private:
// construction/reset initialization
void initialize();
void reset();
// common init called by init_for_* methods below
void init( uint64_t initial_net_usage);

public:
Expand All @@ -113,7 +117,7 @@ namespace eosio::chain {
transaction_metadata::trx_type type = transaction_metadata::trx_type::input);
~transaction_context();

void init_for_implicit_trx( uint64_t initial_net_usage = 0 );
void init_for_implicit_trx();

void init_for_input_trx( uint64_t packed_trx_unprunable_size,
uint64_t packed_trx_prunable_size );
Expand All @@ -125,7 +129,7 @@ namespace eosio::chain {
void squash();
void undo();

inline void add_net_usage( uint64_t u ) { net_usage += u; check_net_usage(); }
inline void add_net_usage( uint64_t u ) { trace->net_usage += u; check_net_usage(); }

void check_net_usage()const;

Expand All @@ -146,7 +150,7 @@ namespace eosio::chain {
}

void pause_billing_timer();
void resume_billing_timer();
void resume_billing_timer(fc::time_point resume_from = fc::time_point{});

uint32_t update_billed_cpu_time( fc::time_point now );

Expand All @@ -157,6 +161,7 @@ namespace eosio::chain {
bool is_dry_run()const { return trx_type == transaction_metadata::trx_type::dry_run; };
bool is_read_only()const { return trx_type == transaction_metadata::trx_type::read_only; };
bool is_transient()const { return trx_type == transaction_metadata::trx_type::read_only || trx_type == transaction_metadata::trx_type::dry_run; };
bool has_undo()const;

int64_t set_proposed_producers(vector<producer_authority> producers);
void set_proposed_finalizers(finalizer_policy&& fin_pol);
Expand Down Expand Up @@ -237,7 +242,7 @@ namespace eosio::chain {
bool net_limit_due_to_block = true;
bool net_limit_due_to_greylist = false;
uint64_t eager_net_limit = 0;
uint64_t& net_usage; /// reference to trace->net_usage
uint64_t init_net_usage = 0;

bool cpu_limit_due_to_greylist = false;

Expand Down
9 changes: 5 additions & 4 deletions libraries/chain/include/eosio/chain/wasm_interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

namespace eosio { namespace chain {

struct platform_timer;
class apply_context;
class wasm_runtime_interface;
class controller;
Expand Down Expand Up @@ -46,9 +47,7 @@ namespace eosio { namespace chain {
oc_none
};

inline static bool test_disable_tierup = false; // set by unittests to test tierup failing

wasm_interface(vm_type vm, vm_oc_enable eosvmoc_tierup, const chainbase::database& d, const std::filesystem::path data_dir, const eosvmoc::config& eosvmoc_config, bool profile);
wasm_interface(vm_type vm, vm_oc_enable eosvmoc_tierup, const chainbase::database& d, platform_timer& main_thread_timer, const std::filesystem::path data_dir, const eosvmoc::config& eosvmoc_config, bool profile);
~wasm_interface();

#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
Expand All @@ -57,6 +56,9 @@ namespace eosio { namespace chain {

// returns true if EOS VM OC is enabled
bool is_eos_vm_oc_enabled() const;

// return number of wasm execution interrupted by eos vm oc compile completing, used for testing
uint64_t get_eos_vm_oc_compile_interrupt_count() const;
#endif

//call before dtor to skip what can be minutes of dtor overhead with some runtimes; can cause leaks
Expand All @@ -82,7 +84,6 @@ namespace eosio { namespace chain {
std::function<bool(const digest_type& code_hash, uint8_t vm_type, uint8_t vm_version, apply_context& context)> substitute_apply;

private:
vm_oc_enable eosvmoc_tierup;
unique_ptr<struct wasm_interface_impl> my;
};

Expand Down
107 changes: 103 additions & 4 deletions libraries/chain/include/eosio/chain/wasm_interface_private.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <eosio/chain/code_object.hpp>
#include <eosio/chain/global_property_object.hpp>
#include <eosio/chain/exceptions.hpp>
#include <eosio/chain/thread_utils.hpp>
#include <fc/scoped_exit.hpp>

#include "IR/Module.h"
Expand Down Expand Up @@ -48,8 +49,9 @@ namespace eosio { namespace chain {
#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
struct eosvmoc_tier {
// Called from main thread
eosvmoc_tier(const std::filesystem::path& d, const eosvmoc::config& c, const chainbase::database& db)
: cc(d, c, db) {
eosvmoc_tier(const std::filesystem::path& d, const eosvmoc::config& c, const chainbase::database& db,
eosvmoc::code_cache_async::compile_complete_callback cb)
: cc(d, c, db, std::move(cb)) {
// Construct exec and mem for the main thread
exec = std::make_unique<eosvmoc::executor>(cc);
mem = std::make_unique<eosvmoc::memory>(wasm_constraints::maximum_linear_memory/wasm_constraints::wasm_page_size);
Expand All @@ -70,9 +72,12 @@ struct eosvmoc_tier {
#endif

wasm_interface_impl(wasm_interface::vm_type vm, wasm_interface::vm_oc_enable eosvmoc_tierup, const chainbase::database& d,
const std::filesystem::path data_dir, const eosvmoc::config& eosvmoc_config, bool profile)
platform_timer& main_thread_timer, const std::filesystem::path data_dir,
const eosvmoc::config& eosvmoc_config, bool profile)
: db(d)
, main_thread_timer(main_thread_timer)
, wasm_runtime_time(vm)
, eosvmoc_tierup(eosvmoc_tierup)
{
#ifdef EOSIO_EOS_VM_RUNTIME_ENABLED
if(vm == wasm_interface::vm_type::eos_vm)
Expand All @@ -96,13 +101,102 @@ struct eosvmoc_tier {
#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
if(eosvmoc_tierup != wasm_interface::vm_oc_enable::oc_none) {
EOS_ASSERT(vm != wasm_interface::vm_type::eos_vm_oc, wasm_exception, "You can't use EOS VM OC as the base runtime when tier up is activated");
eosvmoc = std::make_unique<eosvmoc_tier>(data_dir, eosvmoc_config, d);
eosvmoc = std::make_unique<eosvmoc_tier>(data_dir, eosvmoc_config, d, [this](boost::asio::io_context& ctx, const digest_type& code_id, fc::time_point queued_time) {
async_compile_complete(ctx, code_id, queued_time);
});
}
#endif
}

~wasm_interface_impl() = default;

#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
// called from async thread
void async_compile_complete(boost::asio::io_context& ctx, const digest_type& code_id, fc::time_point queued_time) {
if (executing_code_hash.load() == code_id) { // is action still executing?
auto elapsed = fc::time_point::now() - queued_time;
ilog("EOS VM OC tier up for ${id} compile complete ${t}ms", ("id", code_id)("t", elapsed.count()/1000));
auto expire_in = std::max(fc::microseconds(0), fc::milliseconds(500) - elapsed);
std::shared_ptr<boost::asio::steady_timer> timer = std::make_shared<boost::asio::steady_timer>(ctx);
timer->expires_from_now(std::chrono::microseconds(expire_in.count()));
timer->async_wait([timer, this, code_id](const boost::system::error_code& ec) {
if (ec)
return;
if (executing_code_hash.load() == code_id) {
ilog("EOS VM OC tier up interrupting ${id}", ("id", code_id));
eos_vm_oc_compile_interrupt = true;
main_thread_timer.expire_now();
}
});
}
}
#endif

void apply( const digest_type& code_hash, const uint8_t& vm_type, const uint8_t& vm_version, apply_context& context ) {
bool attempt_tierup = false;
#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
attempt_tierup = eosvmoc && (eosvmoc_tierup == wasm_interface::vm_oc_enable::oc_all || context.should_use_eos_vm_oc());
if (attempt_tierup) {
const chain::eosvmoc::code_descriptor* cd = nullptr;
chain::eosvmoc::code_cache_base::get_cd_failure failure = chain::eosvmoc::code_cache_base::get_cd_failure::temporary;
try {
// Ideally all validator nodes would switch to using oc before block producer nodes so that validators
// are never overwhelmed. Compile whitelisted account contracts first on non-produced blocks. This makes
// it more likely that validators will switch to the oc compiled contract before the block producer runs
// an action for the contract with oc.
chain::eosvmoc::code_cache_async::mode m;
m.whitelisted = context.is_eos_vm_oc_whitelisted();
m.high_priority = m.whitelisted && context.is_applying_block();
m.write_window = context.control.is_write_window();
auto timer_pause = fc::make_scoped_exit([&](){
context.trx_context.resume_billing_timer();
});
context.trx_context.pause_billing_timer();
cd = eosvmoc->cc.get_descriptor_for_code(m, code_hash, vm_version, failure);
} catch (...) {
// swallow errors here, if EOS VM OC has gone in to the weeds we shouldn't bail: continue to try and run baseline
// In the future, consider moving bits of EOS VM that can fire exceptions and such out of this call path
static bool once_is_enough;
if (!once_is_enough)
elog("EOS VM OC has encountered an unexpected failure");
once_is_enough = true;
}
if (cd) {
if (!context.is_applying_block()) // read_only_trx_test.py looks for this log statement
tlog("${a} speculatively executing ${h} with eos vm oc", ("a", context.get_receiver())("h", code_hash));
eosvmoc->exec->execute(*cd, *eosvmoc->mem, context);
return;
}
}
#endif
const bool allow_oc_interrupt = attempt_tierup && context.is_applying_block() && context.trx_context.has_undo();
auto ex = fc::make_scoped_exit([&]() {
if (allow_oc_interrupt) {
eos_vm_oc_compile_interrupt = false;
executing_code_hash.store({}); // indicate no longer executing
}
});
if (allow_oc_interrupt)
executing_code_hash.store(code_hash);
try {
get_instantiated_module(code_hash, vm_type, vm_version, context.trx_context)->apply(context);
} catch (const interrupt_exception& e) {
if (allow_oc_interrupt && eos_vm_oc_compile_interrupt) {
++eos_vm_oc_compile_interrupt_count;
dlog("EOS VM OC compile complete interrupt of ${r} <= ${a}::${act} code ${h}, interrupt #${c}",
("r", context.get_receiver())("a", context.get_action().account)
("act", context.get_action().name)("h", code_hash)("c", eos_vm_oc_compile_interrupt_count));
EOS_THROW(interrupt_oc_exception, "EOS VM OC compile complete interrupt of ${r} <= ${a}::${act} code ${h}, interrupt #${c}",
("r", context.get_receiver())("a", context.get_action().account)
("act", context.get_action().name)("h", code_hash)("c", eos_vm_oc_compile_interrupt_count));
}
throw;
}
}

// used for testing
uint64_t get_eos_vm_oc_compile_interrupt_count() const { return eos_vm_oc_compile_interrupt_count; }

bool is_code_cached(const digest_type& code_hash, const uint8_t& vm_type, const uint8_t& vm_version) const {
// This method is only called from tests; performance is not critical.
// No need for an additional check if we should lock or not.
Expand Down Expand Up @@ -212,7 +306,12 @@ struct eosvmoc_tier {
wasm_cache_index wasm_instantiation_cache;

const chainbase::database& db;
platform_timer& main_thread_timer;
const wasm_interface::vm_type wasm_runtime_time;
const wasm_interface::vm_oc_enable eosvmoc_tierup;
large_atomic<digest_type> executing_code_hash{};
std::atomic<bool> eos_vm_oc_compile_interrupt{false};
uint32_t eos_vm_oc_compile_interrupt_count{0}; // for testing

#ifdef EOSIO_EOS_VM_OC_RUNTIME_ENABLED
std::unique_ptr<struct eosvmoc_tier> eosvmoc{nullptr}; // used by all threads
Expand Down
Loading

0 comments on commit 5be1d8b

Please sign in to comment.