Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(stdlib): Add main log format to parse_nginx_log function #1202

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions benches/stdlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1969,6 +1969,22 @@ bench_function! {
})),
}

main {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
format: "main"
],
want: Ok(value!({
"remote_addr": "172.24.0.3",
"timestamp": (DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().with_timezone(&Utc)),
"request": "GET / HTTP/1.1",
"status": 200,
"body_bytes_size": 615,
"http_user_agent": "curl/8.11.1",
"http_x_forwarded_for": "1.2.3.4, 10.10.1.1",
})),
}

error {
args: func_args![value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
format: "error"
Expand Down
1 change: 1 addition & 0 deletions changelog.d/1202.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `main` log format for `parse_nginx_log`.
24 changes: 24 additions & 0 deletions src/stdlib/log_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,30 @@ pub(crate) static REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG: Lazy<Regex> = Lazy::new(
.expect("failed compiling regex for Ingress Nginx upstreaminfo log")
});

// - Main Nginx docs:
// - https://nginx.org/en/linux_packages.html
// - https://hg.nginx.org/pkg-oss/file/tip/alpine/alpine/nginx.conf
// - https://hg.nginx.org/pkg-oss/file/tip/debian/debian/nginx.conf
// - https://hg.nginx.org/pkg-oss/file/tip/rpm/SOURCES/nginx.conf
pub(crate) static REGEX_NGINX_MAIN_LOG: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?x) # Ignore whitespace and comments in the regex expression.
^\s* # Start with any number of whitespaces
(-|(?P<remote_addr>\S+))\s+ # Match `-` or any non space character
\-\s+ # Always a dash
(-|(?P<remote_user>\S+))\s+ # Match `-` or any non space character
\[(?P<timestamp>[^\]]+)\]\s+ # Match date between brackets
"(?P<request>[^"]*)"\s+ # Match any non double-quote character
(?P<status>\d+)\s+ # Match numbers
(?P<body_bytes_size>\d+)\s+ # Match numbers
"(-|(?P<http_referer>[^"]*))"\s+ # Match `-` or any non double-quote character
"(-|(?P<http_user_agent>[^"]+))"\s+ # Match `-` or any non double-quote character
"(-|(?P<http_x_forwarded_for>[^"]+))" # Match `-` or any non double-quote character
\s*$ # Match any number of whitespaces (to be discarded).
"#)
.expect("failed compiling regex for Nginx main log")
});

pub(crate) static REGEX_NGINX_ERROR_LOG: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?x) # Ignore whitespace and comments in the regex expression.
Expand Down
103 changes: 103 additions & 0 deletions src/stdlib/parse_nginx_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ fn variants() -> Vec<Value> {
value!("combined"),
value!("error"),
value!("ingress_upstreaminfo"),
value!("main"),
]
}

Expand Down Expand Up @@ -90,6 +91,13 @@ impl Function for ParseNginxLog {
r#"s'{"agent":"curl/7.75.0","client":"172.17.0.1","referer":"-","request":"GET / HTTP/1.1","size":612,"status":200,"timestamp":"2021-03-31T12:04:07Z"}'"#,
),
},
Example {
title: "parse nginx main log",
source: r#"encode_json(parse_nginx_log!(s'172.24.0.1 - alice [03/Jan/2025:16:42:58 +0000] "GET / HTTP/1.1" 200 615 "http://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1"', "main"))"#,
result: Ok(
r#"s'{"body_bytes_size":615,"http_referer":"http://domain.tld/path","http_user_agent":"curl/8.11.1","http_x_forwarded_for":"1.2.3.4, 10.10.1.1","remote_addr":"172.24.0.1","remote_user":"alice","request":"GET / HTTP/1.1","status":200,"timestamp":"2025-01-03T16:42:58Z"}'"#,
),
},
Example {
title: "parse nginx error log",
source: r#"encode_json(parse_nginx_log!(s'2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081"', "error"))"#,
Expand All @@ -105,6 +113,7 @@ fn regex_for_format(format: &[u8]) -> &Regex {
match format {
b"combined" => &log_util::REGEX_NGINX_COMBINED_LOG,
b"ingress_upstreaminfo" => &log_util::REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG,
b"main" => &log_util::REGEX_NGINX_MAIN_LOG,
b"error" => &log_util::REGEX_NGINX_ERROR_LOG,
_ => unreachable!(),
}
Expand All @@ -114,6 +123,7 @@ fn time_format_for_format(format: &[u8]) -> String {
match format {
b"combined" => "%d/%b/%Y:%T %z".to_owned(),
b"ingress_upstreaminfo" => "%d/%b/%Y:%T %z".to_owned(),
b"main" => "%d/%b/%Y:%T %z".to_owned(),
b"error" => "%Y/%m/%d %H:%M:%S".to_owned(),
_ => unreachable!(),
}
Expand Down Expand Up @@ -152,6 +162,7 @@ impl FunctionExpression for ParseNginxLogFn {
TypeDef::object(match self.format.as_ref() {
b"combined" => kind_combined(),
b"ingress_upstreaminfo" => kind_ingress_upstreaminfo(),
b"main" => kind_main(),
b"error" => kind_error(),
_ => unreachable!(),
})
Expand Down Expand Up @@ -198,6 +209,20 @@ fn kind_ingress_upstreaminfo() -> BTreeMap<Field, Kind> {
])
}

fn kind_main() -> BTreeMap<Field, Kind> {
BTreeMap::from([
("remote_addr".into(), Kind::bytes().or_undefined()),
("remote_user".into(), Kind::bytes().or_undefined()),
("timestamp".into(), Kind::timestamp()),
("request".into(), Kind::bytes()),
("status".into(), Kind::integer()),
("body_bytes_size".into(), Kind::integer()),
("http_referer".into(), Kind::bytes().or_undefined()),
("http_user_agent".into(), Kind::bytes().or_undefined()),
("http_x_forwarded_for".into(), Kind::bytes().or_undefined()),
])
}

fn kind_error() -> BTreeMap<Field, Kind> {
BTreeMap::from([
("timestamp".into(), Kind::timestamp()),
Expand Down Expand Up @@ -427,6 +452,84 @@ mod tests {
tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
}

main_line_valid_no_proxy {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "-""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.3",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_user_agent" => "curl/8.11.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_valid_single_proxy {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "172.24.0.1""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.3",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_user_agent" => "curl/8.11.1",
"http_x_forwarded_for" => "172.24.0.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_valid_two_proxies {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.3",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_user_agent" => "curl/8.11.1",
"http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_valid_all_fields {
args: func_args![
value: r#"172.24.0.2 - alice [03/Jan/2025:16:42:58 +0000] "GET / HTTP/1.1" 200 615 "http://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.2",
"remote_user" => "alice",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2025-01-03T16:42:58Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_referer" => "http://domain.tld/path",
"http_user_agent" => "curl/8.11.1",
"http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_invalid {
args: func_args![
value: r#"2025/01/03 16:41:26 [error] 31#31: *3 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 172.24.0.2, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:4080", referrer: "http://localhost:4080/""#,
format: "main"
],
want: Err("failed parsing log line"),
tdef: TypeDef::object(kind_main()).fallible(),
}

error_line_valid {
args: func_args![
value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
Expand Down
Loading