xref: /curl/scripts/mdlinkcheck (revision 28dd14aa)
1#!/usr/bin/env perl
2#***************************************************************************
3#                                  _   _ ____  _
4#  Project                     ___| | | |  _ \| |
5#                             / __| | | | |_) | |
6#                            | (__| |_| |  _ <| |___
7#                             \___|\___/|_| \_\_____|
8#
9# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
10#
11# This software is licensed as described in the file COPYING, which
12# you should have received as part of this distribution. The terms
13# are also available at https://curl.se/docs/copyright.html.
14#
15# You may opt to use, copy, modify, merge, publish, distribute and/or sell
16# copies of the Software, and permit persons to whom the Software is
17# furnished to do so, under the terms of the COPYING file.
18#
19# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20# KIND, either express or implied.
21#
22# SPDX-License-Identifier: curl
23#
24###########################################################################
25
26my %whitelist = (
27    'https://curl.se/' => 1,
28    'https://curl.se/changes.html' => 1,
29    'https://curl.se/dev/advisory.html' => 1,
30    'https://curl.se/dev/builds.html' => 1,
31    'https://curl.se/dev/code-style.html' => 1,
32    'https://curl.se/dev/contribute.html' => 1,
33    'https://curl.se/dev/internals.html' => 1,
34    'https://curl.se/dev/secprocess.html' => 1,
35    'https://curl.se/dev/sourceactivity.html' => 1,
36    'https://curl.se/docs/' => 1,
37    'https://curl.se/docs/bugbounty.html' => 1,
38    'https://curl.se/docs/caextract.html' => 1,
39    'https://curl.se/docs/copyright.html' => 1,
40    'https://curl.se/docs/install.html' => 1,
41    'https://curl.se/docs/knownbugs.html' => 1,
42    'https://curl.se/docs/manpage.html' => 1,
43    'https://curl.se/docs/security.html' => 1,
44    'https://curl.se/docs/sslcerts.html' => 1,
45    'https://curl.se/docs/thanks.html' => 1,
46    'https://curl.se/docs/todo.html' => 1,
47    'https://curl.se/docs/vulnerabilities.html' => 1,
48    'https://curl.se/libcurl/' => 1,
49    'https://curl.se/libcurl/c/CURLOPT_SSLVERSION.html' => 1,
50    'https://curl.se/libcurl/c/CURLOPT_SSL_CIPHER_LIST.html' => 1,
51    'https://curl.se/libcurl/c/CURLOPT_TLS13_CIPHERS.html' => 1,
52    'https://curl.se/libcurl/c/libcurl.html' => 1,
53    'https://curl.se/logo/curl-logo.svg' => 1,
54    'https://curl.se/mail/' => 1,
55    'https://curl.se/mail/etiquette.html' => 1,
56    'https://curl.se/mail/list.cgi?list=curl-distros' => 1,
57    'https://curl.se/mail/list.cgi?list=curl-library' => 1,
58    'https://curl.se/rfc/cookie_spec.html' => 1,
59    'https://curl.se/rfc/rfc2255.txt' => 1,
60    'https://curl.se/sponsors.html' => 1,
61    'https://curl.se/support.html' => 1,
62
63    'https://github.com/curl/curl' => 1,
64    'https://github.com/curl/curl-fuzzer' => 1,
65    'https://github.com/curl/curl-www' => 1,
66    'https://github.com/curl/curl/discussions' => 1,
67    'https://github.com/curl/curl/issues' => 1,
68    'https://github.com/curl/curl/labels/help%20wanted' => 1,
69    'https://github.com/curl/curl/pulls' => 1,
70
71    );
72
73# list all .md files in the repo
74my @files=`git ls-files '**.md'`;
75
76sub storelink {
77    my ($f, $line, $link) = @_;
78    my $o = $link;
79
80    if($link =~ /^\#/) {
81        # ignore local-only links
82        return;
83    }
84    # cut off any anchor
85    $link =~ s:\#.*\z::;
86
87    if($link =~ /^(https|http):/) {
88        $url{$link} .= "$f:$line ";
89        return;
90    }
91
92    # a file link
93    my $dir = $f;
94    $dir =~ s:([^/]*\z)::;
95
96    while($link =~ s:^\.\.\/::) {
97        $dir =~ s:([^/]*)\/\z::;
98    }
99
100    $flink{"./$dir$link"} .= "$f:$line ";
101}
102
103sub findlinks {
104    my ($f) = @_;
105    my $line = 1;
106    open(F, "<:crlf", "$f") ||
107        return;
108
109    while(<F>) {
110        if(/\]\(([^)]*)/) {
111            my $link = $1;
112            #print "$f:$line $link\n";
113            storelink($f, $line, $link);
114        }
115        $line++;
116    }
117    close(F);
118}
119
120sub checkurl {
121    my ($url) = @_;
122
123    if($whitelist{$url}) {
124        #print "$url is whitelisted\n";
125        return 0;
126    }
127
128    print "check $url\n";
129    my $curlcmd="curl -ILfsm10 --retry 2 --retry-delay 5 -A \"Mozilla/curl.se link-probe\"";
130    my @content = `$curlcmd \"$url\"`;
131    if(!$content[0]) {
132        print STDERR "FAIL\n";
133        return 1; # fail
134    }
135    return 0; # ok
136}
137
138for my $f (@files) {
139    chomp $f;
140    findlinks($f);
141}
142
143my $error;
144
145for my $u (sort keys %url) {
146    my $r = checkurl($u);
147
148    if($r) {
149        for my $f (split(/ /, $url{$l})) {
150            printf "%s ERROR links to missing URL %s\n", $f, $u;
151            $error++;
152        }
153    }
154}
155
156for my $l (sort keys %flink) {
157    if(! -r $l) {
158        for my $f (split(/ /, $flink{$l})) {
159            printf "%s ERROR links to missing file %s\n", $f, $l;
160            $error++;
161        }
162    }
163}
164
165exit 1 if ($error);
166