-
-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(na): add manual link checker scripts
- Loading branch information
1 parent
65711ed
commit d531040
Showing
3 changed files
with
108 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/bin/bash | ||
|
||
# Function to get HTTP response code of a URL | ||
get_response_code() { | ||
local url=$1 | ||
local response_code=$(curl -s -o /dev/null -w "%{http_code}" "$url") | ||
echo "$response_code" | ||
} | ||
|
||
# Function to check for meta refresh tag in HTML content | ||
check_meta_refresh() { | ||
local html_content=$1 | ||
url=$2 | ||
if grep -q '<meta http-equiv="refresh"' <<< "$html_content"; then | ||
local redirect_url=$(grep -oP 'url=[^"]+' <<< "$html_content" | cut -d'=' -f2) | ||
local redirect_response_code=$(get_response_code "$redirect_url") | ||
echo "${url} Is redirected! Result is:" | ||
echo " -> $redirect_url $redirect_response_code " | ||
fi | ||
} | ||
|
||
run_checks(){ | ||
# Loop through each URL in the file | ||
while IFS= read -r url; do | ||
# Get HTTP response code, if it's not 200, print it so they know | ||
response_code=$(get_response_code "$url") | ||
if [ "$response_code" -ne 200 ]; then | ||
echo "$url $response_code" | ||
fi | ||
|
||
# If response code is 200, check for meta refresh tag | ||
if [ "$response_code" -eq 200 ]; then | ||
html_content=$(curl -s "$url") | ||
check_meta_refresh "$html_content" "$url" | ||
fi | ||
done < urls.txt | ||
} | ||
|
||
echo;echo "Are you on test branch running hugo on http://localhost:1313 and already run get.urls.sh?";echo | ||
read -p "\"y\" to proceed, \"n\" to cancel " -n 1 -r | ||
echo # (optional) move to a new line | ||
if [[ ! $REPLY =~ ^[nN]$ ]] | ||
then | ||
run_checks | ||
echo;echo "Done!";echo | ||
fi | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/bin/bash | ||
|
||
# Function to crawl URLs recursively | ||
function crawl_urls { | ||
local base_url="$1" | ||
local path="$2" | ||
local url="$base_url$path" | ||
local visited_urls=("${@:3}") | ||
|
||
# Check if the URL has already been visited | ||
if [[ " ${visited_urls[@]} " =~ " $url " ]]; then | ||
return | ||
fi | ||
|
||
# Add the current URL to the visited list | ||
visited_urls+=("$url") | ||
|
||
# Fetch the HTML content of the URL and suppress all output | ||
html_content=$(wget -qO- "$url" 2>/dev/null) | ||
wget_exit_status=$? | ||
|
||
# Check if wget command was successful | ||
if [ $wget_exit_status -ne 0 ]; then | ||
return | ||
fi | ||
|
||
# Extract all anchor tags and their href attributes | ||
local links=$(echo "$html_content" | grep -oE '<a [^>]+>' | grep -oE 'href="([^"#]+)"' | sed -e 's/^href="//' -e 's/"$//') | ||
|
||
# Output each URL found under the current URL | ||
for link in $links; do | ||
# Construct absolute URL if the link is relative | ||
if [[ $link == /* ]]; then | ||
link="$base_url$link" | ||
fi | ||
|
||
# Check if the URL is under the specified path and has not been visited before | ||
if [[ $link == "$base_url$path/"* && ! " ${visited_urls[@]} " =~ " $link " ]]; then | ||
echo "$link" | ||
# Recursively crawl the URL | ||
crawl_urls "$base_url" "$path" "$link" "${visited_urls[@]}" | ||
fi | ||
done | ||
} | ||
|
||
echo;echo "Are you on 'main' branch and running hugo on http://localhost:1313?";echo | ||
read -p "\"y\" to proceed, \"n\" to cancel " -n 1 -r | ||
echo # (optional) move to a new line | ||
if [[ ! $REPLY =~ ^[nN]$ ]] | ||
then | ||
# Start crawling from the base URL with the specified path | ||
base_url="http://localhost:1313" | ||
path="" | ||
declare -a visited_urls=() | ||
crawl_urls "$base_url" "$path" "${visited_urls[@]}" | sort -u > urls.txt | ||
count=$(wc -l urls.txt) | ||
echo "Saved $count URLs in urls.txt" | ||
fi | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ tech-doc-hugo | |
.DS_Store | ||
.idea | ||
.hugo_build.lock | ||
urls.txt |