#!/bin/sh
#
# @author Kurt Micheli <kurt.micheli@libelektra.org>
# @brief Checks http, https and ftp links if they are broken.
# @date 05.09.2016
# @tags validation

if [ -z "$1" ]; then
	echo "Usage: link-checker <linkfile>"
	exit
fi

NUMTHREADS=10

check() {
	# Ignore links that contain version placeholder
	echo "$1" | grep -Eq 'doc/news/_preparation_next_release\.md.*<<VERSION>>' && return
	# The following link only works if you are logged into the build server
	# See also: https://github.com/ElektraInitiative/libelektra/issues/2674
	echo "$1" | grep -Eq 'https://build.libelektra.org/jenkins/job/libelektra/pipeline-syntax' && return
	link=$(echo "$1" | grep -oE "(https|http|ftp):.*")
	if [ -z "$link" ]; then
		echo "check the link format of $1"
		return
	fi
	wget --spider --quiet "$link"
	if [ "$?" -ne "0" ]; then
		wget -O - --quiet "$link" > /dev/null
		if [ "$?" -ne "0" ]; then
			echo >&2 "$1"
		fi
	fi
}

COUNTLINKS=0
THREADCOUNT=0
NUMLINKS=$(wc -l < "$1")
PIDS=""

while read -r line; do
	check "$line" &
	PIDS="$PIDS $!"
	THREADCOUNT=$((THREADCOUNT + 1))
	printf "%i/%i\r" "$COUNTLINKS" "$NUMLINKS"
	COUNTLINKS=$((COUNTLINKS + 1))
	if [ "$THREADCOUNT" -eq "$NUMTHREADS" ]; then
		for pid in $PIDS; do
			wait "$pid"
		done
		PIDS=""
		THREADCOUNT=1
	fi
done < "$1"

for pid in $PIDS; do
	wait "$pid"
done

echo ""
