From aab6a35c0dbebc0660ce463c18e25b116383751a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Fri, 3 Jan 2025 17:18:33 +0100 Subject: [PATCH] WIP --- src/cow_http2_machine.erl | 23 +++- src/cow_ws.erl | 256 +++++++++++++++++++++++++++++++++----- 2 files changed, 250 insertions(+), 29 deletions(-) diff --git a/src/cow_http2_machine.erl b/src/cow_http2_machine.erl index 808c6cf..b08538b 100644 --- a/src/cow_http2_machine.erl +++ b/src/cow_http2_machine.erl @@ -293,17 +293,38 @@ init_upgrade_stream(Method, State=#http2_machine{mode=server, remote_streamid=0, | {error, {stream_error, cow_http2:streamid(), cow_http2:error(), atom()}, State} | {error, {connection_error, cow_http2:error(), atom()}, State} when State::http2_machine(). + +%-define(HTTP2_MACHINE_DEBUG, 1). +-ifdef(HTTP2_MACHINE_DEBUG). +-define(LOG_FRAME(Frame, State), + begin + Frame2 = case Frame of + {data,_,_,_} -> setelement(4, Frame, {'BINARY-DATA', byte_size(element(4, Frame))}); + {continuation,_,_,_} -> setelement(4, Frame, {'BINARY-DATA', byte_size(element(4, Frame))}); + _ -> Frame + end, + io:format(user, "~p rcv: ~p~n", [State#http2_machine.mode, Frame2]) + end +). +-else. +-define(LOG_FRAME(Frame, State), _ = Frame). +-endif. + frame(Frame, State=#http2_machine{state=settings, preface_timer=TRef}) -> + ?LOG_FRAME(Frame, State), ok = case TRef of undefined -> ok; _ -> erlang:cancel_timer(TRef, [{async, true}, {info, false}]) end, settings_frame(Frame, State#http2_machine{state=normal, preface_timer=undefined}); frame(Frame, State=#http2_machine{state={continuation, _, _}}) -> + ?LOG_FRAME(Frame, State), maybe_discard_result(continuation_frame(Frame, State)); -frame(settings_ack, State=#http2_machine{state=normal}) -> +frame(Frame = settings_ack, State=#http2_machine{state=normal}) -> + ?LOG_FRAME(Frame, State), settings_ack_frame(State); frame(Frame, State=#http2_machine{state=normal}) -> + ?LOG_FRAME(Frame, State), Result = case element(1, Frame) of data -> data_frame(Frame, State); headers -> headers_frame(Frame, State); diff --git a/src/cow_ws.erl b/src/cow_ws.erl index 27c7c87..8ff23f7 100644 --- a/src/cow_ws.erl +++ b/src/cow_ws.erl @@ -72,6 +72,8 @@ -type utf8_state() :: 0..8 | undefined. -export_type([utf8_state/0]). +-compile({inline, [utf8t/0, utf8s/0, utf8s0/0]}). + %% @doc Generate a key for the Websocket handshake request. -spec key() -> binary(). @@ -559,14 +561,14 @@ validate_payload(Payload, Rest, undefined, _, _, _, true) -> {ok, Payload, undefined, Rest}; %% Text frames and close control frames MUST have a payload that is valid UTF-8. validate_payload(Payload, Rest, Utf8State, _, Type, _, Eof) when Type =:= text; Type =:= close -> - case validate_utf8(Payload, Utf8State) of + case validate_text(Payload, Utf8State) of 1 -> {error, badencoding}; Utf8State2 when not Eof -> {more, Payload, Utf8State2}; 0 when Eof -> {ok, Payload, 0, Rest}; _ -> {error, badencoding} end; validate_payload(Payload, Rest, Utf8State, _, fragment, {Fin, text, _}, Eof) -> - case validate_utf8(Payload, Utf8State) of + case validate_text(Payload, Utf8State) of 1 -> {error, badencoding}; 0 when Eof -> {ok, Payload, 0, Rest}; Utf8State2 when Eof, Fin =:= nofin -> {ok, Payload, Utf8State2, Rest}; @@ -585,32 +587,230 @@ validate_payload(Payload, Rest, Utf8State, _, _, _, true) -> %% each with a clause. The common clauses were then grouped together. %% %% This function returns 0 on success, 1 on error, and 2..8 on incomplete data. -validate_utf8(<<>>, State) -> State; -validate_utf8(<< C, Rest/bits >>, 0) when C < 128 -> validate_utf8(Rest, 0); -validate_utf8(<< C, Rest/bits >>, 2) when C >= 128, C < 144 -> validate_utf8(Rest, 0); -validate_utf8(<< C, Rest/bits >>, 3) when C >= 128, C < 144 -> validate_utf8(Rest, 2); -validate_utf8(<< C, Rest/bits >>, 5) when C >= 128, C < 144 -> validate_utf8(Rest, 2); -validate_utf8(<< C, Rest/bits >>, 7) when C >= 128, C < 144 -> validate_utf8(Rest, 3); -validate_utf8(<< C, Rest/bits >>, 8) when C >= 128, C < 144 -> validate_utf8(Rest, 3); -validate_utf8(<< C, Rest/bits >>, 2) when C >= 144, C < 160 -> validate_utf8(Rest, 0); -validate_utf8(<< C, Rest/bits >>, 3) when C >= 144, C < 160 -> validate_utf8(Rest, 2); -validate_utf8(<< C, Rest/bits >>, 5) when C >= 144, C < 160 -> validate_utf8(Rest, 2); -validate_utf8(<< C, Rest/bits >>, 6) when C >= 144, C < 160 -> validate_utf8(Rest, 3); -validate_utf8(<< C, Rest/bits >>, 7) when C >= 144, C < 160 -> validate_utf8(Rest, 3); -validate_utf8(<< C, Rest/bits >>, 2) when C >= 160, C < 192 -> validate_utf8(Rest, 0); -validate_utf8(<< C, Rest/bits >>, 3) when C >= 160, C < 192 -> validate_utf8(Rest, 2); -validate_utf8(<< C, Rest/bits >>, 4) when C >= 160, C < 192 -> validate_utf8(Rest, 2); -validate_utf8(<< C, Rest/bits >>, 6) when C >= 160, C < 192 -> validate_utf8(Rest, 3); -validate_utf8(<< C, Rest/bits >>, 7) when C >= 160, C < 192 -> validate_utf8(Rest, 3); -validate_utf8(<< C, Rest/bits >>, 0) when C >= 194, C < 224 -> validate_utf8(Rest, 2); -validate_utf8(<< 224, Rest/bits >>, 0) -> validate_utf8(Rest, 4); -validate_utf8(<< C, Rest/bits >>, 0) when C >= 225, C < 237 -> validate_utf8(Rest, 3); -validate_utf8(<< 237, Rest/bits >>, 0) -> validate_utf8(Rest, 5); -validate_utf8(<< C, Rest/bits >>, 0) when C =:= 238; C =:= 239 -> validate_utf8(Rest, 3); -validate_utf8(<< 240, Rest/bits >>, 0) -> validate_utf8(Rest, 6); -validate_utf8(<< C, Rest/bits >>, 0) when C =:= 241; C =:= 242; C =:= 243 -> validate_utf8(Rest, 7); -validate_utf8(<< 244, Rest/bits >>, 0) -> validate_utf8(Rest, 8); -validate_utf8(_, _) -> 1. +validate_text(Text, 0) -> validate_plain(Text); +%% @todo +%validate_text(Text, State) -> validate_utf8(Text, State). +validate_text(Text, 2) -> validate_s2(Text); +validate_text(Text, 3) -> validate_s3(Text); +validate_text(Text, 4) -> validate_s4(Text); +validate_text(Text, 5) -> validate_s5(Text); +validate_text(Text, 6) -> validate_s6(Text); +validate_text(Text, 7) -> validate_s7(Text); +validate_text(Text, 8) -> validate_s8(Text). + +validate_plain(<<>>) -> 0; %% @todo Move this last? +validate_plain(<>) when C1 < 128, C2 < 128, C3 < 128, C4 < 128 -> validate_plain(R); +validate_plain(<>) when C1 < 128 -> validate_plain(R); +validate_plain(Text) -> validate_s0(Text). + +%% @todo This step useful only for text with lots of UTF-8. +%validate_plain(<>) -> +% State = element(C - 127, utf8s0()), +% case State of +% 0 -> validate_plain(R); +% 12 -> 12; +% _ -> validate_utf8(R, State) +% end. +% +%%% @todo Not much better than the other functions so far. +%validate_utf8(<>, State0) -> +% Type = element(C - 127, utf8t()), +% State = element(State0 + Type, utf8s()), +% case State of +% 0 -> validate_plain(R); +% 12 -> 12; +% _ -> validate_utf8(R, State) +% end; +%validate_utf8(<<>>, State) -> +% State. + + +%% @todo If we find UTF-8 there's chances that we'll find more UTF-8 characters right after. +%% Just like currently if we find plain we expect more plain. +%% This is done below: + +validate_s0(<>) when C >= 128 -> + Class = element(C - 127, utf8t()), + case Class of + 2 -> validate_s2(R); + 3 -> validate_s3(R); + 4 -> validate_s5(R); + 5 -> validate_s8(R); + 6 -> validate_s7(R); + 10 -> validate_s4(R); + 11 -> validate_s6(R); + _ -> 1 + end; +validate_s0(Text) -> + validate_plain(Text). + +validate_s2(<>) -> + Class = element(C - 127, utf8t()), + case Class of + 1 -> validate_s0(R); + 7 -> validate_s0(R); + 9 -> validate_s0(R); + _ -> 1 + end; +validate_s2(<<>>) -> + 2. + +validate_s3(<>) -> + Class = element(C - 127, utf8t()), + case Class of + 1 -> validate_s2(R); + 7 -> validate_s2(R); + 9 -> validate_s2(R); + _ -> 1 + end; +validate_s3(<<>>) -> + 3. + +validate_s4(<>) -> + Class = element(C - 127, utf8t()), + case Class of + 7 -> validate_s2(R); + _ -> 1 + end; +validate_s4(<<>>) -> + 4. + +validate_s5(<>) -> + Class = element(C - 127, utf8t()), + case Class of + 1 -> validate_s2(R); + 9 -> validate_s2(R); + _ -> 1 + end; +validate_s5(<<>>) -> + 5. + +validate_s6(<>) -> + Class = element(C - 127, utf8t()), + case Class of + 7 -> validate_s3(R); + 9 -> validate_s3(R); + _ -> 1 + end; +validate_s6(<<>>) -> + 6. + +validate_s7(<>) -> + Class = element(C - 127, utf8t()), + case Class of + 1 -> validate_s3(R); + 7 -> validate_s3(R); + 9 -> validate_s3(R); + _ -> 1 + end; +validate_s7(<<>>) -> + 7. + +validate_s8(<>) -> + Class = element(C - 127, utf8t()), + case Class of + 1 -> validate_s3(R); + _ -> 1 + end; +validate_s8(<<>>) -> + 8. + + + +%% @todo Perhaps we can have one function per state and see if it's even faster. + +utf8t() -> + { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8 + }. + +utf8s() -> + { + 12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12 + }. + +utf8s0() -> + { + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,24,24,24,24,24,24,24,24,24,24,24,24,24,24, + 24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, + 48,36,36,36,36,36,36,36,36,36,36,36,36,60,36,36, + 72,84,84,84,96,12,12,12,12,12,12,12,12,12,12,12 + }. + + +%validate_utf8(<<>>, State) -> State; +% +%validate_utf8(<< C, Rest/bits >>, 2) when C < 144 -> validate_plain(Rest); +%validate_utf8(<< C, Rest/bits >>, 3) when C < 144 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 5) when C < 144 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 7) when C < 144 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 8) when C < 144 -> validate_utf8(Rest, 3); +% +%validate_utf8(<< C, Rest/bits >>, 2) when C < 160 -> validate_plain(Rest); +%validate_utf8(<< C, Rest/bits >>, 3) when C < 160 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 5) when C < 160 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 6) when C < 160 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 7) when C < 160 -> validate_utf8(Rest, 3); +% +%validate_utf8(<< C, Rest/bits >>, 2) when C < 192 -> validate_plain(Rest); +%validate_utf8(<< C, Rest/bits >>, 3) when C < 192 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 4) when C < 192 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 6) when C < 192 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 7) when C < 192 -> validate_utf8(Rest, 3); +% +%validate_utf8(<< C, _/bits >>, _) when C < 194 -> 1; +% +%validate_utf8(<< C, Rest/bits >>, 0) when C < 224 -> validate_utf8(Rest, 2); +%validate_utf8(<< 224, Rest/bits >>, 0) -> validate_utf8(Rest, 4); +%validate_utf8(<< C, Rest/bits >>, 0) when C < 237 -> validate_utf8(Rest, 3); +%validate_utf8(<< 237, Rest/bits >>, 0) -> validate_utf8(Rest, 5); +%validate_utf8(<< C, Rest/bits >>, 0) when C < 240 -> validate_utf8(Rest, 3); +%validate_utf8(<< 240, Rest/bits >>, 0) -> validate_utf8(Rest, 6); +%validate_utf8(<< C, Rest/bits >>, 0) when C < 244 -> validate_utf8(Rest, 7); +%validate_utf8(<< 244, Rest/bits >>, 0) -> validate_utf8(Rest, 8); +% +%validate_utf8(_, _) -> 1. + + +%validate_utf8(<<>>, State) -> State; +%validate_utf8(<< C, Rest/bits >>, 0) when C < 128 -> validate_plain(Rest); +%validate_utf8(<< C, Rest/bits >>, 2) when C >= 128, C < 144 -> validate_plain(Rest); +%validate_utf8(<< C, Rest/bits >>, 3) when C >= 128, C < 144 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 5) when C >= 128, C < 144 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 7) when C >= 128, C < 144 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 8) when C >= 128, C < 144 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 2) when C >= 144, C < 160 -> validate_plain(Rest); +%validate_utf8(<< C, Rest/bits >>, 3) when C >= 144, C < 160 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 5) when C >= 144, C < 160 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 6) when C >= 144, C < 160 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 7) when C >= 144, C < 160 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 2) when C >= 160, C < 192 -> validate_plain(Rest); +%validate_utf8(<< C, Rest/bits >>, 3) when C >= 160, C < 192 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 4) when C >= 160, C < 192 -> validate_utf8(Rest, 2); +%validate_utf8(<< C, Rest/bits >>, 6) when C >= 160, C < 192 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 7) when C >= 160, C < 192 -> validate_utf8(Rest, 3); +%validate_utf8(<< C, Rest/bits >>, 0) when C >= 194, C < 224 -> validate_utf8(Rest, 2); +%validate_utf8(<< 224, Rest/bits >>, 0) -> validate_utf8(Rest, 4); +%validate_utf8(<< C, Rest/bits >>, 0) when C >= 225, C < 237 -> validate_utf8(Rest, 3); +%validate_utf8(<< 237, Rest/bits >>, 0) -> validate_utf8(Rest, 5); +%validate_utf8(<< C, Rest/bits >>, 0) when C =:= 238; C =:= 239 -> validate_utf8(Rest, 3); +%validate_utf8(<< 240, Rest/bits >>, 0) -> validate_utf8(Rest, 6); +%validate_utf8(<< C, Rest/bits >>, 0) when C =:= 241; C =:= 242; C =:= 243 -> validate_utf8(Rest, 7); +%validate_utf8(<< 244, Rest/bits >>, 0) -> validate_utf8(Rest, 8); +%validate_utf8(_, _) -> 1. + + %% @doc Return a frame tuple from parsed state and data.