diff --git a/src/grpcbox_socket.erl b/src/grpcbox_socket.erl index 7a72e9e..ffdd281 100644 --- a/src/grpcbox_socket.erl +++ b/src/grpcbox_socket.erl @@ -16,19 +16,19 @@ start_link(Pool, ListenOpts, AcceptorOpts) -> gen_server:start_link(?MODULE, [Pool, ListenOpts, AcceptorOpts], []). - %% gen_server api init([Pool, ListenOpts, PoolOpts]) -> - %% Trapping exit so can close socket in terminate/2 - _ = process_flag(trap_exit, true), Port = maps:get(port, ListenOpts, 8080), IPAddress = maps:get(ip, ListenOpts, {0, 0, 0, 0}), AcceptorPoolSize = maps:get(size, PoolOpts, 10), SocketOpts = maps:get(socket_options, ListenOpts, [{reuseaddr, true}, {nodelay, true}, + {reuseaddr, true}, {backlog, 32768}, {keepalive, true}]), + %% Trapping exit so can close socket in terminate/2 + _ = process_flag(trap_exit, true), Opts = [{active, false}, {mode, binary}, {packet, raw}, {ip, IPAddress} | SocketOpts], case gen_tcp:listen(Port, Opts) of {ok, Socket} -> @@ -36,18 +36,23 @@ init([Pool, ListenOpts, PoolOpts]) -> MRef = monitor(port, Socket), {ok, _} = grpcbox_pool:accept_socket(Pool, Socket, AcceptorPoolSize), {ok, {Socket, MRef}}; - {error, eaddrinuse} -> + {error, eaddrinuse} = Error -> %% our desired port is already in use - %% its likely this grpcbox_socket server is restarting + %% its likely this grpcbox_socket server has been killed ( for reason unknown ) and is restarting %% previously it would have bound to the port before passing control to our acceptor pool + %% the socket remains open %% in the restart scenario, the socket process would attempt to bind again %% to the port and then stop, the sup would keep restarting it %% and we would end up breaching the restart strategy of the parent sup %% eventually taking down the entire tree %% result of which is we have no active listener and grpcbox is effectively down - %% so now if we hit eaddrinuse, we check if our acceptor pool is already the - %% controlling process, if so we reuse the port from its state and - %% allow grpcbox_socket to start cleanly + %% so now if we hit eaddrinuse, we check if our acceptor pool using it + %% if so we close the port here and stop this process + %% NOTE: issuing stop in init wont trigger terminate and so cant rely on + %% the socket being closed there + %% This allows the sup to restart things cleanly + %% We could try to reuse the exising port rather than closing it + %% but side effects were encountered there, so deliberately avoiding %% NOTE: acceptor_pool has a grace period for connections before it terminates %% grpcbox_pool sets this to a default of 5 secs @@ -55,31 +60,8 @@ init([Pool, ListenOpts, PoolOpts]) -> %% AND keep in mind the acceptor pool will continue accepting new connections %% during this grace period - %% Other possible fixes here include changing the grpcbox_services_sup from its - %% rest_for_one to a one_for_all strategy. This ensures the pool and thus the - %% current controlling process of the socket is terminated - %% and allows things to restart cleanly if grpcbox_socket dies - %% the disadvantage there however is we will drop all existing grpc connections - - %% Another possible fix is to play with the restart strategy intensity and periods - %% and ensure the top level sup doesnt get breached but... - %% a requirement will be to ensure the grpcbox_service_sup forces a restart - %% of grpcbox_pool and therefore the acceptor_pool process - %% as only by doing that will be free up the socket and allow grpcbox_socket to rebind - %% thus we end up terminating any existing grpc connections - - %% Yet another possible fix is to move the cleanup of closing the socket - %% out of grpcbox_socket's terminate and into acceptor_pool's terminate - %% that however puts two way co-ordination between two distinct libs - %% which is far from ideal and in addition will also result in existing grpc connection - %% being dropped - - %% my view is, if at all possible, its better to restart the grpcbox_socket process without - %% impacting existing connections, the fix below allows for that, albeit a lil messy - %% there is most likely a better solution to all of this, TODO: revisit - %% get the current sockets in use by the acceptor pool - %% if one is bound to our target port then reuse + %% if one is bound to our target port then close it %% need to allow for possibility of multiple services, each with its own socket %% so we need to identify our interested socket via port number PoolSockets = grpcbox_pool:pool_sockets(Pool), @@ -89,15 +71,15 @@ init([Pool, ListenOpts, PoolOpts]) -> {ok, Socket}; (_, Acc) -> Acc - end, {error, eaddrinuse}, PoolSockets), + end, socket_not_found, PoolSockets), case MaybeHaveExistingSocket of {ok, Socket} -> - MRef = monitor(port, Socket), - {ok, {Socket, MRef}}; - {error, Reason} -> - {stop, Reason} - end; - {error, Reason}-> + gen_tcp:close(Socket); + socket_not_found -> + noop + end, + Error; + {error, Reason} -> {stop, Reason} end. @@ -115,9 +97,10 @@ handle_info(_, State) -> code_change(_, State, _) -> {ok, State}. -terminate(_, {Socket, MRef}) -> +terminate(_Reason, {Socket, MRef}) -> %% Socket may already be down but need to ensure it is closed to avoid %% eaddrinuse error on restart + %% this takes care of that, unless of course this process is killed... case demonitor(MRef, [flush, info]) of true -> gen_tcp:close(Socket); false -> ok