xref: /curl/scripts/cd2nroff (revision a787442e)
1#!/usr/bin/env perl
2#***************************************************************************
3#                                  _   _ ____  _
4#  Project                     ___| | | |  _ \| |
5#                             / __| | | | |_) | |
6#                            | (__| |_| |  _ <| |___
7#                             \___|\___/|_| \_\_____|
8#
9# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
10#
11# This software is licensed as described in the file COPYING, which
12# you should have received as part of this distribution. The terms
13# are also available at https://curl.se/docs/copyright.html.
14#
15# You may opt to use, copy, modify, merge, publish, distribute and/or sell
16# copies of the Software, and permit persons to whom the Software is
17# furnished to do so, under the terms of the COPYING file.
18#
19# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20# KIND, either express or implied.
21#
22# SPDX-License-Identifier: curl
23#
24###########################################################################
25
26=begin comment
27
28Converts a curldown file to nroff (manpage).
29
30=end comment
31=cut
32
33use strict;
34use warnings;
35
36my $cd2nroff = "0.1"; # to keep check
37my $dir;
38my $extension;
39my $keepfilename;
40
41while(@ARGV) {
42    if($ARGV[0] eq "-d") {
43        shift @ARGV;
44        $dir = shift @ARGV;
45    }
46    elsif($ARGV[0] eq "-e") {
47        shift @ARGV;
48        $extension = shift @ARGV;
49    }
50    elsif($ARGV[0] eq "-k") {
51        shift @ARGV;
52        $keepfilename = 1;
53    }
54    elsif($ARGV[0] eq "-h") {
55        print <<HELP
56Usage: cd2nroff [options] [file.md]
57
58-d <dir> Write the output to the file name from the meta-data in the
59         specified directory, instead of writing to stdout
60-e <ext> If -d is used, this option can provide an added "extension", arbitrary
61         text really, to append to the file name.
62-h       This help text,
63-v       Show version then exit
64HELP
65            ;
66        exit 0;
67    }
68    elsif($ARGV[0] eq "-v") {
69        print "cd2nroff version $cd2nroff\n";
70        exit 0;
71    }
72    else {
73        last;
74    }
75}
76
77use POSIX qw(strftime);
78my @ts;
79if (defined($ENV{SOURCE_DATE_EPOCH})) {
80    @ts = gmtime($ENV{SOURCE_DATE_EPOCH});
81} else {
82    @ts = localtime;
83}
84my $date = strftime "%Y-%m-%d", @ts;
85
86sub outseealso {
87    my (@sa) = @_;
88    my $comma = 0;
89    my @o;
90    push @o, ".SH SEE ALSO\n";
91    for my $s (sort @sa) {
92        push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
93        $comma = 1;
94    }
95    push @o, "\n";
96    return @o;
97}
98
99sub outprotocols {
100    my (@p) = @_;
101    my $comma = 0;
102    my @o;
103    push @o, ".SH PROTOCOLS\n";
104
105    if($p[0] eq "TLS") {
106        push @o, "This functionality affects all TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
107    }
108    else {
109        my @s = sort @p;
110        push @o, "This functionality affects ";
111        for my $e (sort @s) {
112            push @o, sprintf "%s%s",
113                $comma ? (($e eq $s[-1]) ? " and " : ", "): "",
114                lc($e);
115            $comma = 1;
116        }
117        if($#s == 0) {
118            if($s[0] eq "All") {
119                push @o, " supported protocols";
120            }
121            else {
122                push @o, " only";
123            }
124        }
125    }
126    push @o, "\n";
127    return @o;
128}
129
130sub outtls {
131    my (@t) = @_;
132    my $comma = 0;
133    my @o;
134    if($t[0] eq "All") {
135        push @o, "\nAll TLS backends support this option.";
136    }
137    elsif($t[0] eq "none") {
138        push @o, "\nNo TLS backend supports this option.";
139    }
140    else {
141        push @o, "\nThis option works only with the following TLS backends:\n";
142        my @s = sort @t;
143        for my $e (@s) {
144            push @o, sprintf "%s$e",
145                $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
146            $comma = 1;
147        }
148    }
149    push @o, "\n";
150    return @o;
151}
152
153my %knownprotos = (
154    'DICT' => 1,
155    'FILE' => 1,
156    'FTP' => 1,
157    'FTPS' => 1,
158    'GOPHER' => 1,
159    'GOPHERS' => 1,
160    'HTTP' => 1,
161    'HTTPS' => 1,
162    'IMAP' => 1,
163    'IMAPS' => 1,
164    'LDAP' => 1,
165    'LDAPS' => 1,
166    'MQTT' => 1,
167    'POP3' => 1,
168    'POP3S' => 1,
169    'RTMP' => 1,
170    'RTMPS' => 1,
171    'RTSP' => 1,
172    'SCP' => 1,
173    'SFTP' => 1,
174    'SMB' => 1,
175    'SMBS' => 1,
176    'SMTP' => 1,
177    'SMTPS' => 1,
178    'TELNET' => 1,
179    'TFTP' => 1,
180    'WS' => 1,
181    'WSS' => 1,
182    'TLS' => 1,
183    'TCP' => 1,
184    'QUIC' => 1,
185    'All' => 1
186    );
187
188my %knowntls = (
189    'BearSSL' => 1,
190    'GnuTLS' => 1,
191    'mbedTLS' => 1,
192    'OpenSSL' => 1,
193    'rustls' => 1,
194    'Schannel' => 1,
195    'Secure Transport' => 1,
196    'wolfSSL' => 1,
197    'All' => 1,
198    'none' => 1,
199    );
200
201sub single {
202    my @seealso;
203    my @proto;
204    my @tls;
205    my $d;
206    my ($f)=@_;
207    my $copyright;
208    my $errors = 0;
209    my $fh;
210    my $line;
211    my $list;
212    my $tlslist;
213    my $section;
214    my $source;
215    my $addedin;
216    my $spdx;
217    my $start = 0;
218    my $title;
219
220    if(defined($f)) {
221        if(!open($fh, "<:crlf", "$f")) {
222            print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
223            return 1;
224        }
225    }
226    else {
227        $f = "STDIN";
228        $fh = \*STDIN;
229        binmode($fh, ":crlf");
230    }
231    while(<$fh>) {
232        $line++;
233        if(!$start) {
234            if(/^---/) {
235                # header starts here
236                $start = 1;
237            }
238            next;
239        }
240        if(/^Title: *(.*)/i) {
241            $title=$1;
242        }
243        elsif(/^Section: *(.*)/i) {
244            $section=$1;
245        }
246        elsif(/^Source: *(.*)/i) {
247            $source=$1;
248        }
249        elsif(/^See-also: +(.*)/i) {
250            $list = 1; # 1 for see-also
251            push @seealso, $1;
252        }
253        elsif(/^See-also: */i) {
254            if($seealso[0]) {
255                print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
256                return 2;
257            }
258            $list = 1; # 1 for see-also
259        }
260        elsif(/^Protocol:/i) {
261            $list = 2; # 2 for protocol
262        }
263        elsif(/^TLS-backend:/i) {
264            $list = 3; # 3 for TLS backend
265        }
266        elsif(/^Added-in: *(.*)/i) {
267            $addedin=$1;
268            if(($addedin !~ /^[0-9.]+[0-9]\z/) &&
269               ($addedin ne "n/a")) {
270                print STDERR "$f:$line:1:ERROR: invalid version number in Added-in line: $addedin\n";
271                return 2;
272            }
273        }
274        elsif(/^ +- (.*)/i) {
275            # the only lists we support are see-also and protocol
276            if($list == 1) {
277                push @seealso, $1;
278            }
279            elsif($list == 2) {
280                push @proto, $1;
281            }
282            elsif($list == 3) {
283                push @tls, $1;
284            }
285            else {
286                print STDERR "$f:$line:1:ERROR: list item without owner?\n";
287                return 2;
288            }
289        }
290        # REUSE-IgnoreStart
291        elsif(/^C: (.*)/i) {
292            $copyright=$1;
293        }
294        elsif(/^SPDX-License-Identifier: (.*)/i) {
295            $spdx=$1;
296        }
297        # REUSE-IgnoreEnd
298        elsif(/^---/) {
299            # end of the header section
300            if(!$title) {
301                print STDERR "$f:$line:1:ERROR: no 'Title:' in $f\n";
302                return 1;
303            }
304            if(!$section) {
305                print STDERR "$f:$line:1:ERROR: no 'Section:' in $f\n";
306                return 2;
307            }
308            if(!$source) {
309                print STDERR "$f:$line:1:ERROR: no 'Source:' in $f\n";
310                return 2;
311            }
312            if(($source eq "libcurl") && !$addedin) {
313                print STDERR "$f:$line:1:ERROR: no 'Added-in:' in $f\n";
314                return 2;
315            }
316            if(!$seealso[0]) {
317                print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
318                return 2;
319            }
320            if(!$copyright) {
321                print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
322                return 2;
323            }
324            if(!$spdx) {
325                print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
326                return 2;
327            }
328            if($section == 3) {
329                if(!$proto[0]) {
330                    printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
331                    exit 2;
332                }
333                my $tls = 0;
334                for my $p (@proto) {
335                    if($p eq "TLS") {
336                        $tls = 1;
337                    }
338                    if(!$knownprotos{$p}) {
339                        printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
340                        exit 2;
341                    }
342                }
343                # This is for TLS, require TLS-backend:
344                if($tls) {
345                    if(!$tls[0]) {
346                        printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
347                        exit 2;
348                    }
349                    for my $t (@tls) {
350                        if(!$knowntls{$t}) {
351                            printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
352                            exit 2;
353                        }
354                    }
355                }
356            }
357            last;
358        }
359        else {
360            chomp;
361            print STDERR "$f:$line:1:ERROR: unrecognized header keyword: '$_'\n";
362            $errors++;
363        }
364    }
365
366    if(!$start) {
367        print STDERR "$f:$line:1:ERROR: no header present\n";
368        return 2;
369    }
370
371    my @desc;
372    my $quote = 0;
373    my $blankline = 0;
374    my $header = 0;
375
376    # cut off the leading path from the file name, if any
377    $f =~ s/^(.*[\\\/])//;
378
379    push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
380    push @desc, ".TH $title $section \"$date\" $source\n";
381    while(<$fh>) {
382        $line++;
383
384        $d = $_;
385
386        if($quote) {
387            if($quote == 4) {
388                # remove the indentation
389                if($d =~ /^    (.*)/) {
390                    push @desc, "$1\n";
391                    next;
392                }
393                else {
394                    # end of quote
395                    $quote = 0;
396                    push @desc, ".fi\n";
397                    next;
398                }
399            }
400            if(/^~~~/) {
401                # end of quote
402                $quote = 0;
403                push @desc, ".fi\n";
404                next;
405            }
406            # convert single backslahes to doubles
407            $d =~ s/\\/\\\\/g;
408            # lines starting with a period needs it escaped
409            $d =~ s/^\./\\&./;
410            push @desc, $d;
411            next;
412        }
413
414        # remove single line HTML comments
415        $d =~ s/<!--.*?-->//g;
416
417        # **bold**
418        $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
419        # *italics*
420        $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
421
422        if($d =~ /[^\\][\<\>]/) {
423            print STDERR "$f:$line:1:ERROR: un-escaped < or > used\n";
424            $errors++;
425        }
426        # convert backslash-'<' or '> to just the second character
427        $d =~ s/\\([<>])/$1/g;
428
429        # mentions of curl symbols with manpages use italics by default
430        $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
431
432        # backticked becomes italics
433        $d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
434
435        if(/^## (.*)/) {
436            my $word = $1;
437            # if there are enclosing quotes, remove them first
438            $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
439
440            # enclose in double quotes if there is a space present
441            if($word =~ / /) {
442                push @desc, ".IP \"$word\"\n";
443            }
444            else {
445                push @desc, ".IP $word\n";
446            }
447            $header = 1;
448        }
449        elsif(/^##/) {
450            # end of IP sequence
451            push @desc, ".PP\n";
452            $header = 1;
453        }
454        elsif(/^# (.*)/) {
455            my $word = $1;
456            # if there are enclosing quotes, remove them first
457            $word =~ s/[\"\'](.*)[\"\']\z/$1/;
458
459            if($word eq "PROTOCOLS") {
460                print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
461            }
462            elsif($word eq "AVAILABILITY") {
463                print STDERR "$f:$line:1:WARN: AVAILABILITY section in source file\n";
464            }
465            elsif($word eq "%PROTOCOLS%") {
466                # insert the generated PROTOCOLS section
467                push @desc, outprotocols(@proto);
468
469                if($proto[0] eq "TLS") {
470                    push @desc, outtls(@tls);
471                }
472                $header = 1;
473                next;
474            }
475            elsif($word eq "%AVAILABILITY%") {
476                if($addedin ne "n/a") {
477                    # insert the generated AVAILABILITY section
478                    push @desc, ".SH AVAILABILITY\n";
479                    push @desc, "Added in curl $addedin\n";
480                }
481                $header = 1;
482                next;
483            }
484            push @desc, ".SH $word\n";
485            $header = 1;
486        }
487        elsif(/^~~~c/) {
488            # start of a code section, not indented
489            $quote = 1;
490            push @desc, "\n" if($blankline && !$header);
491            $header = 0;
492            push @desc, ".nf\n";
493        }
494        elsif(/^~~~/) {
495            # start of a quote section; not code, not indented
496            $quote = 1;
497            push @desc, "\n" if($blankline && !$header);
498            $header = 0;
499            push @desc, ".nf\n";
500        }
501        elsif(/^    (.*)/) {
502            # quoted, indented by 4 space
503            $quote = 4;
504            push @desc, "\n" if($blankline && !$header);
505            $header = 0;
506            push @desc, ".nf\n$1\n";
507        }
508        elsif(/^[ \t]*\n/) {
509            # count and ignore blank lines
510            $blankline++;
511        }
512        else {
513            # don't output newlines if this is the first content after a
514            # header
515            push @desc, "\n" if($blankline && !$header);
516            $blankline = 0;
517            $header = 0;
518
519            # quote minuses in the output
520            $d =~ s/([^\\])-/$1\\-/g;
521            # replace single quotes
522            $d =~ s/\'/\\(aq/g;
523            # handle double quotes first on the line
524            $d =~ s/^(\s*)\"/$1\\&\"/;
525
526            # lines starting with a period needs it escaped
527            $d =~ s/^\./\\&./;
528
529            if($d =~ /^(.*)  /) {
530                printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
531                    length($1);
532                $errors++;
533            }
534            if($d =~ /^[ \t]*\n/) {
535                # replaced away all contents
536                $blankline= 1;
537            }
538            else {
539                push @desc, $d;
540            }
541        }
542    }
543    if($fh != \*STDIN) {
544        close($fh);
545    }
546    push @desc, outseealso(@seealso);
547    if($dir) {
548        if($keepfilename) {
549            $title = $f;
550            $title =~ s/\.[^.]*$//;
551        }
552        my $outfile = "$dir/$title.$section";
553        if(defined($extension)) {
554            $outfile .= $extension;
555        }
556        if(!open(O, ">", $outfile)) {
557            print STDERR "Failed to open $outfile : $!\n";
558            return 1;
559        }
560        print O @desc;
561        close(O);
562    }
563    else {
564        print @desc;
565    }
566    return $errors;
567}
568
569if(@ARGV) {
570    for my $f (@ARGV) {
571        my $r = single($f);
572        if($r) {
573            exit $r;
574        }
575    }
576}
577else {
578    exit single();
579}
580