Skip to content

Commit

Permalink
trurl: canonicalize the path
Browse files Browse the repository at this point in the history
trurl now URL-decodes + URL-encodes the path so that %-sequences that
can be expressed as ASCII are shown as ASCII and %-sequences are unified
to lowercase hex etc.

Add test cases to verify

Fixes #329
Closes #331
  • Loading branch information
bagder committed Aug 27, 2024
1 parent dcea239 commit ee9ab20
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 0 deletions.
26 changes: 26 additions & 0 deletions tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -2650,5 +2650,31 @@
"stderr": "trurl note: URL decode error, most likely because of rubbish in the input (path)\n",
"returncode": 0
}
},
{
"input": {
"arguments": [
"https://example.com/one/t%61o/%2F%42/"
]
},
"expected": {
"stdout": "https://example.com/one/tao/%2fB/\n",
"stderr": "",
"returncode": 0
}
},
{
"input": {
"arguments": [
"https://example.com/one/t%61o/%2F%42/",
"--append",
"path=%61"
]
},
"expected": {
"stdout": "https://example.com/one/tao/%2fB/%2561\n",
"stderr": "",
"returncode": 0
}
}
]
66 changes: 66 additions & 0 deletions trurl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,59 @@ static CURLUcode seturl(struct option *o, CURLU *uh, const char *url)
CURLU_URLENCODE);
}

static char *canonical_path(const char *path)
{
/* split the path per slash, URL decode + encode, then put together again */
size_t len = strlen(path);
char *sl;
char *dupe = NULL;

do {
char *opath;
char *npath;
char *ndupe;
int olen;
sl = memchr(path, '/', len);
size_t partlen = sl ? (size_t)(sl - path) : len;

if(partlen) {
/* First URL decode the part */
opath = curl_easy_unescape(NULL, path, (int)partlen, &olen);
if(!opath)
return NULL;

/* Then URL encode it again */
npath = curl_easy_escape(NULL, opath, olen);
if(!npath)
return NULL;

curl_free(opath);
ndupe = curl_maprintf("%s%s%s", dupe ? dupe : "", npath, sl ? "/": "");
curl_free(npath);
}
else if(sl) {
/* zero length part but a slash */
ndupe = curl_maprintf("%s/", dupe ? dupe : "");
}
else {
/* no part, no slash */
break;
}
curl_free(dupe);
if(!ndupe)
return NULL;

dupe = ndupe;
if(sl) {
path = sl + 1;
len -= partlen + 1;
}

} while(sl);

return dupe;
}

static void singleurl(struct option *o,
const char *url, /* might be NULL */
struct iterinfo *iinfo,
Expand Down Expand Up @@ -1687,6 +1740,7 @@ static void singleurl(struct option *o,
if(first_lap) {
/* extract the current path */
char *opath;
char *cpath;
bool path_is_modified = false;
if(curl_url_get(uh, CURLUPART_PATH, &opath, 0))
errorf(o, ERROR_ITER, "out of memory");
Expand All @@ -1709,6 +1763,18 @@ static void singleurl(struct option *o,
opath = npath;
path_is_modified = true;
}
cpath = canonical_path(opath);
if(!cpath)
errorf(o, ERROR_MEM, "out of memory");

if(strcmp(cpath, opath)) {
/* updated */
path_is_modified = true;
curl_free(opath);
opath = cpath;
}
else
curl_free(cpath);
if(path_is_modified) {
/* set the new path */
if(curl_url_set(uh, CURLUPART_PATH, opath, 0))
Expand Down

0 comments on commit ee9ab20

Please sign in to comment.