1#!/usr/bin/env perl 2#*************************************************************************** 3# _ _ ____ _ 4# Project ___| | | | _ \| | 5# / __| | | | |_) | | 6# | (__| |_| | _ <| |___ 7# \___|\___/|_| \_\_____| 8# 9# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 10# 11# This software is licensed as described in the file COPYING, which 12# you should have received as part of this distribution. The terms 13# are also available at https://curl.se/docs/copyright.html. 14# 15# You may opt to use, copy, modify, merge, publish, distribute and/or sell 16# copies of the Software, and permit persons to whom the Software is 17# furnished to do so, under the terms of the COPYING file. 18# 19# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20# KIND, either express or implied. 21# 22# SPDX-License-Identifier: curl 23# 24########################################################################### 25 26my %whitelist = ( 27 'https://curl.se/' => 1, 28 'https://curl.se/changes.html' => 1, 29 'https://curl.se/dev/advisory.html' => 1, 30 'https://curl.se/dev/builds.html' => 1, 31 'https://curl.se/dev/code-style.html' => 1, 32 'https://curl.se/dev/contribute.html' => 1, 33 'https://curl.se/dev/internals.html' => 1, 34 'https://curl.se/dev/secprocess.html' => 1, 35 'https://curl.se/dev/sourceactivity.html' => 1, 36 'https://curl.se/docs/' => 1, 37 'https://curl.se/docs/bugbounty.html' => 1, 38 'https://curl.se/docs/caextract.html' => 1, 39 'https://curl.se/docs/copyright.html' => 1, 40 'https://curl.se/docs/install.html' => 1, 41 'https://curl.se/docs/knownbugs.html' => 1, 42 'https://curl.se/docs/manpage.html' => 1, 43 'https://curl.se/docs/security.html' => 1, 44 'https://curl.se/docs/sslcerts.html' => 1, 45 'https://curl.se/docs/thanks.html' => 1, 46 'https://curl.se/docs/todo.html' => 1, 47 'https://curl.se/docs/vulnerabilities.html' => 1, 48 'https://curl.se/libcurl/' => 1, 49 'https://curl.se/libcurl/c/CURLOPT_SSLVERSION.html' => 1, 50 'https://curl.se/libcurl/c/CURLOPT_SSL_CIPHER_LIST.html' => 1, 51 'https://curl.se/libcurl/c/CURLOPT_TLS13_CIPHERS.html' => 1, 52 'https://curl.se/libcurl/c/libcurl.html' => 1, 53 'https://curl.se/logo/curl-logo.svg' => 1, 54 'https://curl.se/mail/' => 1, 55 'https://curl.se/mail/etiquette.html' => 1, 56 'https://curl.se/mail/list.cgi?list=curl-distros' => 1, 57 'https://curl.se/mail/list.cgi?list=curl-library' => 1, 58 'https://curl.se/rfc/cookie_spec.html' => 1, 59 'https://curl.se/rfc/rfc2255.txt' => 1, 60 'https://curl.se/sponsors.html' => 1, 61 'https://curl.se/support.html' => 1, 62 63 'https://github.com/curl/curl' => 1, 64 'https://github.com/curl/curl-fuzzer' => 1, 65 'https://github.com/curl/curl-www' => 1, 66 'https://github.com/curl/curl/discussions' => 1, 67 'https://github.com/curl/curl/issues' => 1, 68 'https://github.com/curl/curl/labels/help%20wanted' => 1, 69 'https://github.com/curl/curl/pulls' => 1, 70 71 ); 72 73# list all .md files in the repo 74my @files=`git ls-files '**.md'`; 75 76sub storelink { 77 my ($f, $line, $link) = @_; 78 my $o = $link; 79 80 if($link =~ /^\#/) { 81 # ignore local-only links 82 return; 83 } 84 # cut off any anchor 85 $link =~ s:\#.*\z::; 86 87 if($link =~ /^(https|http):/) { 88 $url{$link} .= "$f:$line "; 89 return; 90 } 91 92 # a file link 93 my $dir = $f; 94 $dir =~ s:([^/]*\z)::; 95 96 while($link =~ s:^\.\.\/::) { 97 $dir =~ s:([^/]*)\/\z::; 98 } 99 100 $flink{"./$dir$link"} .= "$f:$line "; 101} 102 103sub findlinks { 104 my ($f) = @_; 105 my $line = 1; 106 open(F, "<:crlf", "$f") || 107 return; 108 109 while(<F>) { 110 if(/\]\(([^)]*)/) { 111 my $link = $1; 112 #print "$f:$line $link\n"; 113 storelink($f, $line, $link); 114 } 115 $line++; 116 } 117 close(F); 118} 119 120sub checkurl { 121 my ($url) = @_; 122 123 if($whitelist{$url}) { 124 #print "$url is whitelisted\n"; 125 return 0; 126 } 127 128 print "check $url\n"; 129 my $curlcmd="curl -ILfsm10 --retry 2 --retry-delay 5 -A \"Mozilla/curl.se link-probe\""; 130 my @content = `$curlcmd \"$url\"`; 131 if(!$content[0]) { 132 print STDERR "FAIL\n"; 133 return 1; # fail 134 } 135 return 0; # ok 136} 137 138for my $f (@files) { 139 chomp $f; 140 findlinks($f); 141} 142 143my $error; 144 145for my $u (sort keys %url) { 146 my $r = checkurl($u); 147 148 if($r) { 149 for my $f (split(/ /, $url{$l})) { 150 printf "%s ERROR links to missing URL %s\n", $f, $u; 151 $error++; 152 } 153 } 154} 155 156for my $l (sort keys %flink) { 157 if(! -r $l) { 158 for my $f (split(/ /, $flink{$l})) { 159 printf "%s ERROR links to missing file %s\n", $f, $l; 160 $error++; 161 } 162 } 163} 164 165exit 1 if ($error); 166