Skip to content

Commit

Permalink
feat(stdlib): Add main log format to parse_nginx_log function
Browse files Browse the repository at this point in the history
Signed-off-by: VirtualTam <[email protected]>
  • Loading branch information
virtualtam committed Jan 3, 2025
1 parent ebee7f3 commit e58d4b8
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 0 deletions.
16 changes: 16 additions & 0 deletions benches/stdlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1969,6 +1969,22 @@ bench_function! {
})),
}

main {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
format: "main"
],
want: Ok(value!({
"remote_addr": "172.24.0.3",
"timestamp": (DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().with_timezone(&Utc)),
"request": "GET / HTTP/1.1",
"status": 200,
"body_bytes_size": 615,
"http_user_agent": "curl/8.11.1",
"http_x_forwarded_for": "1.2.3.4, 10.10.1.1",
})),
}

error {
args: func_args![value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
format: "error"
Expand Down
1 change: 1 addition & 0 deletions changelog.d/1202.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `main` log format for `parse_nginx_log`.
24 changes: 24 additions & 0 deletions src/stdlib/log_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,30 @@ pub(crate) static REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG: Lazy<Regex> = Lazy::new(
.expect("failed compiling regex for Ingress Nginx upstreaminfo log")
});

// - Main Nginx docs:
// - https://nginx.org/en/linux_packages.html
// - https://hg.nginx.org/pkg-oss/file/tip/alpine/alpine/nginx.conf
// - https://hg.nginx.org/pkg-oss/file/tip/debian/debian/nginx.conf
// - https://hg.nginx.org/pkg-oss/file/tip/rpm/SOURCES/nginx.conf
pub(crate) static REGEX_NGINX_MAIN_LOG: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?x) # Ignore whitespace and comments in the regex expression.
^\s* # Start with any number of whitespaces
(-|(?P<remote_addr>\S+))\s+ # Match `-` or any non space character
\-\s+ # Always a dash
(-|(?P<remote_user>\S+))\s+ # Match `-` or any non space character
\[(?P<timestamp>[^\]]+)\]\s+ # Match date between brackets
"(?P<request>[^"]*)"\s+ # Match any non double-quote character
(?P<status>\d+)\s+ # Match numbers
(?P<body_bytes_size>\d+)\s+ # Match numbers
"(-|(?P<http_referer>[^"]*))"\s+ # Match `-` or any non double-quote character
"(-|(?P<http_user_agent>[^"]+))"\s+ # Match `-` or any non double-quote character
"(-|(?P<http_x_forwarded_for>[^"]+))" # Match `-` or any non double-quote character
\s*$ # Match any number of whitespaces (to be discarded).
"#)
.expect("failed compiling regex for Nginx main log")
});

pub(crate) static REGEX_NGINX_ERROR_LOG: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?x) # Ignore whitespace and comments in the regex expression.
Expand Down
103 changes: 103 additions & 0 deletions src/stdlib/parse_nginx_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ fn variants() -> Vec<Value> {
value!("combined"),
value!("error"),
value!("ingress_upstreaminfo"),
value!("main"),
]
}

Expand Down Expand Up @@ -90,6 +91,13 @@ impl Function for ParseNginxLog {
r#"s'{"agent":"curl/7.75.0","client":"172.17.0.1","referer":"-","request":"GET / HTTP/1.1","size":612,"status":200,"timestamp":"2021-03-31T12:04:07Z"}'"#,
),
},
Example {
title: "parse nginx main log",
source: r#"encode_json(parse_nginx_log!(s'172.24.0.1 - alice [03/Jan/2025:16:42:58 +0000] "GET / HTTP/1.1" 200 615 "http://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1"', "main"))"#,
result: Ok(
r#"s'{"body_bytes_size":615,"http_referer":"http://domain.tld/path","http_user_agent":"curl/8.11.1","http_x_forwarded_for":"1.2.3.4, 10.10.1.1","remote_addr":"172.24.0.1","remote_user":"alice","request":"GET / HTTP/1.1","status":200,"timestamp":"2025-01-03T16:42:58Z"}'"#,
),
},
Example {
title: "parse nginx error log",
source: r#"encode_json(parse_nginx_log!(s'2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081"', "error"))"#,
Expand All @@ -105,6 +113,7 @@ fn regex_for_format(format: &[u8]) -> &Regex {
match format {
b"combined" => &log_util::REGEX_NGINX_COMBINED_LOG,
b"ingress_upstreaminfo" => &log_util::REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG,
b"main" => &log_util::REGEX_NGINX_MAIN_LOG,
b"error" => &log_util::REGEX_NGINX_ERROR_LOG,
_ => unreachable!(),
}
Expand All @@ -114,6 +123,7 @@ fn time_format_for_format(format: &[u8]) -> String {
match format {
b"combined" => "%d/%b/%Y:%T %z".to_owned(),
b"ingress_upstreaminfo" => "%d/%b/%Y:%T %z".to_owned(),
b"main" => "%d/%b/%Y:%T %z".to_owned(),
b"error" => "%Y/%m/%d %H:%M:%S".to_owned(),
_ => unreachable!(),
}
Expand Down Expand Up @@ -152,6 +162,7 @@ impl FunctionExpression for ParseNginxLogFn {
TypeDef::object(match self.format.as_ref() {
b"combined" => kind_combined(),
b"ingress_upstreaminfo" => kind_ingress_upstreaminfo(),
b"main" => kind_main(),
b"error" => kind_error(),
_ => unreachable!(),
})
Expand Down Expand Up @@ -198,6 +209,20 @@ fn kind_ingress_upstreaminfo() -> BTreeMap<Field, Kind> {
])
}

fn kind_main() -> BTreeMap<Field, Kind> {
BTreeMap::from([
("remote_addr".into(), Kind::bytes().or_undefined()),
("remote_user".into(), Kind::bytes().or_undefined()),
("timestamp".into(), Kind::timestamp()),
("request".into(), Kind::bytes()),
("status".into(), Kind::integer()),
("body_bytes_size".into(), Kind::integer()),
("http_referer".into(), Kind::bytes().or_undefined()),
("http_user_agent".into(), Kind::bytes().or_undefined()),
("http_x_forwarded_for".into(), Kind::bytes().or_undefined()),
])
}

fn kind_error() -> BTreeMap<Field, Kind> {
BTreeMap::from([
("timestamp".into(), Kind::timestamp()),
Expand Down Expand Up @@ -427,6 +452,84 @@ mod tests {
tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
}

main_line_valid_no_proxy {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "-""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.3",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_user_agent" => "curl/8.11.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_valid_single_proxy {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "172.24.0.1""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.3",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_user_agent" => "curl/8.11.1",
"http_x_forwarded_for" => "172.24.0.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_valid_two_proxies {
args: func_args![
value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.3",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_user_agent" => "curl/8.11.1",
"http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_valid_all_fields {
args: func_args![
value: r#"172.24.0.2 - alice [03/Jan/2025:16:42:58 +0000] "GET / HTTP/1.1" 200 615 "http://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
format: "main"
],
want: Ok(btreemap! {
"remote_addr" => "172.24.0.2",
"remote_user" => "alice",
"timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2025-01-03T16:42:58Z").unwrap().into()),
"request" => "GET / HTTP/1.1",
"status" => 200,
"body_bytes_size" => 615,
"http_referer" => "http://domain.tld/path",
"http_user_agent" => "curl/8.11.1",
"http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
}),
tdef: TypeDef::object(kind_main()).fallible(),
}

main_line_invalid {
args: func_args![
value: r#"2025/01/03 16:41:26 [error] 31#31: *3 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 172.24.0.2, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:4080", referrer: "http://localhost:4080/""#,
format: "main"
],
want: Err("failed parsing log line"),
tdef: TypeDef::object(kind_main()).fallible(),
}

error_line_valid {
args: func_args![
value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
Expand Down

0 comments on commit e58d4b8

Please sign in to comment.