xref: /curl/scripts/cd2nroff (revision 28ca199d)
1#!/usr/bin/env perl
2#***************************************************************************
3#                                  _   _ ____  _
4#  Project                     ___| | | |  _ \| |
5#                             / __| | | | |_) | |
6#                            | (__| |_| |  _ <| |___
7#                             \___|\___/|_| \_\_____|
8#
9# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
10#
11# This software is licensed as described in the file COPYING, which
12# you should have received as part of this distribution. The terms
13# are also available at https://curl.se/docs/copyright.html.
14#
15# You may opt to use, copy, modify, merge, publish, distribute and/or sell
16# copies of the Software, and permit persons to whom the Software is
17# furnished to do so, under the terms of the COPYING file.
18#
19# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20# KIND, either express or implied.
21#
22# SPDX-License-Identifier: curl
23#
24###########################################################################
25
26=begin comment
27
28Converts a curldown file to nroff (manpage).
29
30=end comment
31=cut
32
33use strict;
34use warnings;
35
36my $cd2nroff = "0.1"; # to keep check
37my $dir;
38my $extension;
39my $keepfilename;
40
41while(@ARGV) {
42    if($ARGV[0] eq "-d") {
43        shift @ARGV;
44        $dir = shift @ARGV;
45    }
46    elsif($ARGV[0] eq "-e") {
47        shift @ARGV;
48        $extension = shift @ARGV;
49    }
50    elsif($ARGV[0] eq "-k") {
51        shift @ARGV;
52        $keepfilename = 1;
53    }
54    elsif($ARGV[0] eq "-h") {
55        print <<HELP
56Usage: cd2nroff [options] [file.md]
57
58-d <dir> Write the output to the file name from the meta-data in the
59         specified directory, instead of writing to stdout
60-e <ext> If -d is used, this option can provide an added "extension", arbitrary
61         text really, to append to the file name.
62-h       This help text,
63-v       Show version then exit
64HELP
65            ;
66        exit 0;
67    }
68    elsif($ARGV[0] eq "-v") {
69        print "cd2nroff version $cd2nroff\n";
70        exit 0;
71    }
72    else {
73        last;
74    }
75}
76
77use POSIX qw(strftime);
78my @ts;
79if (defined($ENV{SOURCE_DATE_EPOCH})) {
80    @ts = gmtime($ENV{SOURCE_DATE_EPOCH});
81} else {
82    @ts = localtime;
83}
84my $date = strftime "%Y-%m-%d", @ts;
85
86sub outseealso {
87    my (@sa) = @_;
88    my $comma = 0;
89    my @o;
90    push @o, ".SH SEE ALSO\n";
91    for my $s (sort @sa) {
92        push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
93        $comma = 1;
94    }
95    push @o, "\n";
96    return @o;
97}
98
99sub outprotocols {
100    my (@p) = @_;
101    my $comma = 0;
102    my @o;
103    push @o, ".SH PROTOCOLS\n";
104
105    if($p[0] eq "TLS") {
106        push @o, "This functionality affects all TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
107    }
108    else {
109        my @s = sort @p;
110        push @o, "This functionality affects ";
111        for my $e (sort @s) {
112            push @o, sprintf "%s%s",
113                $comma ? (($e eq $s[-1]) ? " and " : ", "): "",
114                lc($e);
115            $comma = 1;
116        }
117        if($#s == 0) {
118            if($s[0] eq "All") {
119                push @o, " supported protocols";
120            }
121            else {
122                push @o, " only";
123            }
124        }
125    }
126    push @o, "\n";
127    return @o;
128}
129
130sub outtls {
131    my (@t) = @_;
132    my $comma = 0;
133    my @o;
134    if($t[0] eq "All") {
135        push @o, "\nAll TLS backends support this option.";
136    }
137    else {
138        push @o, "\nThis option works only with the following TLS backends:\n";
139        my @s = sort @t;
140        for my $e (@s) {
141            push @o, sprintf "%s$e",
142                $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
143            $comma = 1;
144        }
145    }
146    push @o, "\n";
147    return @o;
148}
149
150my %knownprotos = (
151    'DICT' => 1,
152    'FILE' => 1,
153    'FTP' => 1,
154    'FTPS' => 1,
155    'GOPHER' => 1,
156    'GOPHERS' => 1,
157    'HTTP' => 1,
158    'HTTPS' => 1,
159    'IMAP' => 1,
160    'IMAPS' => 1,
161    'LDAP' => 1,
162    'LDAPS' => 1,
163    'MQTT' => 1,
164    'POP3' => 1,
165    'POP3S' => 1,
166    'RTMP' => 1,
167    'RTMPS' => 1,
168    'RTSP' => 1,
169    'SCP' => 1,
170    'SFTP' => 1,
171    'SMB' => 1,
172    'SMBS' => 1,
173    'SMTP' => 1,
174    'SMTPS' => 1,
175    'TELNET' => 1,
176    'TFTP' => 1,
177    'WS' => 1,
178    'WSS' => 1,
179    'TLS' => 1,
180    'TCP' => 1,
181    'QUIC' => 1,
182    'All' => 1
183    );
184
185my %knowntls = (
186    'BearSSL' => 1,
187    'GnuTLS' => 1,
188    'mbedTLS' => 1,
189    'OpenSSL' => 1,
190    'rustls' => 1,
191    'Schannel' => 1,
192    'Secure Transport' => 1,
193    'wolfSSL' => 1,
194    'All' => 1,
195    );
196
197sub single {
198    my @seealso;
199    my @proto;
200    my @tls;
201    my $d;
202    my ($f)=@_;
203    my $copyright;
204    my $errors = 0;
205    my $fh;
206    my $line;
207    my $list;
208    my $tlslist;
209    my $section;
210    my $source;
211    my $addedin;
212    my $spdx;
213    my $start = 0;
214    my $title;
215
216    if(defined($f)) {
217        if(!open($fh, "<:crlf", "$f")) {
218            print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
219            return 1;
220        }
221    }
222    else {
223        $f = "STDIN";
224        $fh = \*STDIN;
225        binmode($fh, ":crlf");
226    }
227    while(<$fh>) {
228        $line++;
229        if(!$start) {
230            if(/^---/) {
231                # header starts here
232                $start = 1;
233            }
234            next;
235        }
236        if(/^Title: *(.*)/i) {
237            $title=$1;
238        }
239        elsif(/^Section: *(.*)/i) {
240            $section=$1;
241        }
242        elsif(/^Source: *(.*)/i) {
243            $source=$1;
244        }
245        elsif(/^See-also: +(.*)/i) {
246            $list = 1; # 1 for see-also
247            push @seealso, $1;
248        }
249        elsif(/^See-also: */i) {
250            if($seealso[0]) {
251                print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
252                return 2;
253            }
254            $list = 1; # 1 for see-also
255        }
256        elsif(/^Protocol:/i) {
257            $list = 2; # 2 for protocol
258        }
259        elsif(/^TLS-backend:/i) {
260            $list = 3; # 3 for TLS backend
261        }
262        elsif(/^Added-in: *(.*)/i) {
263            $addedin=$1;
264            if(($addedin !~ /^[0-9.]+[0-9]\z/) &&
265               ($addedin ne "n/a")) {
266                print STDERR "$f:$line:1:ERROR: invalid version number in Added-in line: $addedin\n";
267                return 2;
268            }
269        }
270        elsif(/^ +- (.*)/i) {
271            # the only lists we support are see-also and protocol
272            if($list == 1) {
273                push @seealso, $1;
274            }
275            elsif($list == 2) {
276                push @proto, $1;
277            }
278            elsif($list == 3) {
279                push @tls, $1;
280            }
281            else {
282                print STDERR "$f:$line:1:ERROR: list item without owner?\n";
283                return 2;
284            }
285        }
286        # REUSE-IgnoreStart
287        elsif(/^C: (.*)/i) {
288            $copyright=$1;
289        }
290        elsif(/^SPDX-License-Identifier: (.*)/i) {
291            $spdx=$1;
292        }
293        # REUSE-IgnoreEnd
294        elsif(/^---/) {
295            # end of the header section
296            if(!$title) {
297                print STDERR "$f:$line:1:ERROR: no 'Title:' in $f\n";
298                return 1;
299            }
300            if(!$section) {
301                print STDERR "$f:$line:1:ERROR: no 'Section:' in $f\n";
302                return 2;
303            }
304            if(!$source) {
305                print STDERR "$f:$line:1:ERROR: no 'Source:' in $f\n";
306                return 2;
307            }
308            if(($source eq "libcurl") && !$addedin) {
309                print STDERR "$f:$line:1:ERROR: no 'Added-in:' in $f\n";
310                return 2;
311            }
312            if(!$seealso[0]) {
313                print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
314                return 2;
315            }
316            if(!$copyright) {
317                print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
318                return 2;
319            }
320            if(!$spdx) {
321                print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
322                return 2;
323            }
324            if($section == 3) {
325                if(!$proto[0]) {
326                    printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
327                    exit 2;
328                }
329                my $tls = 0;
330                for my $p (@proto) {
331                    if($p eq "TLS") {
332                        $tls = 1;
333                    }
334                    if(!$knownprotos{$p}) {
335                        printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
336                        exit 2;
337                    }
338                }
339                # This is for TLS, require TLS-backend:
340                if($tls) {
341                    if(!$tls[0]) {
342                        printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
343                        exit 2;
344                    }
345                    for my $t (@tls) {
346                        if(!$knowntls{$t}) {
347                            printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
348                            exit 2;
349                        }
350                    }
351                }
352            }
353            last;
354        }
355        else {
356            chomp;
357            print STDERR "$f:$line:1:ERROR: unrecognized header keyword: '$_'\n";
358            $errors++;
359        }
360    }
361
362    if(!$start) {
363        print STDERR "$f:$line:1:ERROR: no header present\n";
364        return 2;
365    }
366
367    my @desc;
368    my $quote = 0;
369    my $blankline = 0;
370    my $header = 0;
371
372    # cut off the leading path from the file name, if any
373    $f =~ s/^(.*[\\\/])//;
374
375    push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
376    push @desc, ".TH $title $section \"$date\" $source\n";
377    while(<$fh>) {
378        $line++;
379
380        $d = $_;
381
382        if($quote) {
383            if($quote == 4) {
384                # remove the indentation
385                if($d =~ /^    (.*)/) {
386                    push @desc, "$1\n";
387                    next;
388                }
389                else {
390                    # end of quote
391                    $quote = 0;
392                    push @desc, ".fi\n";
393                    next;
394                }
395            }
396            if(/^~~~/) {
397                # end of quote
398                $quote = 0;
399                push @desc, ".fi\n";
400                next;
401            }
402            # convert single backslahes to doubles
403            $d =~ s/\\/\\\\/g;
404            # lines starting with a period needs it escaped
405            $d =~ s/^\./\\&./;
406            push @desc, $d;
407            next;
408        }
409
410        # remove single line HTML comments
411        $d =~ s/<!--.*?-->//g;
412
413        # **bold**
414        $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
415        # *italics*
416        $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
417
418        if($d =~ /[^\\][\<\>]/) {
419            print STDERR "$f:$line:1:ERROR: un-escaped < or > used\n";
420            $errors++;
421        }
422        # convert backslash-'<' or '> to just the second character
423        $d =~ s/\\([<>])/$1/g;
424
425        # mentions of curl symbols with manpages use italics by default
426        $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
427
428        # backticked becomes italics
429        $d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
430
431        if(/^## (.*)/) {
432            my $word = $1;
433            # if there are enclosing quotes, remove them first
434            $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
435
436            # enclose in double quotes if there is a space present
437            if($word =~ / /) {
438                push @desc, ".IP \"$word\"\n";
439            }
440            else {
441                push @desc, ".IP $word\n";
442            }
443            $header = 1;
444        }
445        elsif(/^##/) {
446            # end of IP sequence
447            push @desc, ".PP\n";
448            $header = 1;
449        }
450        elsif(/^# (.*)/) {
451            my $word = $1;
452            # if there are enclosing quotes, remove them first
453            $word =~ s/[\"\'](.*)[\"\']\z/$1/;
454
455            if($word eq "PROTOCOLS") {
456                print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
457            }
458            elsif($word eq "AVAILABILITY") {
459                print STDERR "$f:$line:1:WARN: AVAILABILITY section in source file\n";
460            }
461            elsif($word eq "%PROTOCOLS%") {
462                # insert the generated PROTOCOLS section
463                push @desc, outprotocols(@proto);
464
465                if($proto[0] eq "TLS") {
466                    push @desc, outtls(@tls);
467                }
468                $header = 1;
469                next;
470            }
471            elsif($word eq "%AVAILABILITY%") {
472                if($addedin ne "n/a") {
473                    # insert the generated AVAILABILITY section
474                    push @desc, ".SH AVAILABILITY\n";
475                    push @desc, "Added in curl $addedin\n";
476                }
477                $header = 1;
478                next;
479            }
480            push @desc, ".SH $word\n";
481            $header = 1;
482        }
483        elsif(/^~~~c/) {
484            # start of a code section, not indented
485            $quote = 1;
486            push @desc, "\n" if($blankline && !$header);
487            $header = 0;
488            push @desc, ".nf\n";
489        }
490        elsif(/^~~~/) {
491            # start of a quote section; not code, not indented
492            $quote = 1;
493            push @desc, "\n" if($blankline && !$header);
494            $header = 0;
495            push @desc, ".nf\n";
496        }
497        elsif(/^    (.*)/) {
498            # quoted, indented by 4 space
499            $quote = 4;
500            push @desc, "\n" if($blankline && !$header);
501            $header = 0;
502            push @desc, ".nf\n$1\n";
503        }
504        elsif(/^[ \t]*\n/) {
505            # count and ignore blank lines
506            $blankline++;
507        }
508        else {
509            # don't output newlines if this is the first content after a
510            # header
511            push @desc, "\n" if($blankline && !$header);
512            $blankline = 0;
513            $header = 0;
514
515            # quote minuses in the output
516            $d =~ s/([^\\])-/$1\\-/g;
517            # replace single quotes
518            $d =~ s/\'/\\(aq/g;
519            # handle double quotes first on the line
520            $d =~ s/^(\s*)\"/$1\\&\"/;
521
522            # lines starting with a period needs it escaped
523            $d =~ s/^\./\\&./;
524
525            if($d =~ /^(.*)  /) {
526                printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
527                    length($1);
528                $errors++;
529            }
530            if($d =~ /^[ \t]*\n/) {
531                # replaced away all contents
532                $blankline= 1;
533            }
534            else {
535                push @desc, $d;
536            }
537        }
538    }
539    if($fh != \*STDIN) {
540        close($fh);
541    }
542    push @desc, outseealso(@seealso);
543    if($dir) {
544        if($keepfilename) {
545            $title = $f;
546            $title =~ s/\.[^.]*$//;
547        }
548        my $outfile = "$dir/$title.$section";
549        if(defined($extension)) {
550            $outfile .= $extension;
551        }
552        if(!open(O, ">", $outfile)) {
553            print STDERR "Failed to open $outfile : $!\n";
554            return 1;
555        }
556        print O @desc;
557        close(O);
558    }
559    else {
560        print @desc;
561    }
562    return $errors;
563}
564
565if(@ARGV) {
566    for my $f (@ARGV) {
567        my $r = single($f);
568        if($r) {
569            exit $r;
570        }
571    }
572}
573else {
574    exit single();
575}
576