xref: /curl/scripts/cd2nroff (revision afdd1129)
1#!/usr/bin/env perl
2#***************************************************************************
3#                                  _   _ ____  _
4#  Project                     ___| | | |  _ \| |
5#                             / __| | | | |_) | |
6#                            | (__| |_| |  _ <| |___
7#                             \___|\___/|_| \_\_____|
8#
9# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
10#
11# This software is licensed as described in the file COPYING, which
12# you should have received as part of this distribution. The terms
13# are also available at https://curl.se/docs/copyright.html.
14#
15# You may opt to use, copy, modify, merge, publish, distribute and/or sell
16# copies of the Software, and permit persons to whom the Software is
17# furnished to do so, under the terms of the COPYING file.
18#
19# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20# KIND, either express or implied.
21#
22# SPDX-License-Identifier: curl
23#
24###########################################################################
25
26=begin comment
27
28Converts a curldown file to nroff (man page).
29
30=end comment
31=cut
32
33use strict;
34use warnings;
35
36my $cd2nroff = "0.1"; # to keep check
37my $dir;
38my $extension;
39my $keepfilename;
40
41while(@ARGV) {
42    if($ARGV[0] eq "-d") {
43        shift @ARGV;
44        $dir = shift @ARGV;
45    }
46    elsif($ARGV[0] eq "-e") {
47        shift @ARGV;
48        $extension = shift @ARGV;
49    }
50    elsif($ARGV[0] eq "-k") {
51        shift @ARGV;
52        $keepfilename = 1;
53    }
54    elsif($ARGV[0] eq "-h") {
55        print <<HELP
56Usage: cd2nroff [options] [file.md]
57
58-d <dir> Write the output to the file name from the meta-data in the
59         specified directory, instead of writing to stdout
60-e <ext> If -d is used, this option can provide an added "extension", arbitrary
61         text really, to append to the file name.
62-h       This help text,
63-v       Show version then exit
64HELP
65            ;
66        exit 0;
67    }
68    elsif($ARGV[0] eq "-v") {
69        print "cd2nroff version $cd2nroff\n";
70        exit 0;
71    }
72    else {
73        last;
74    }
75}
76
77use POSIX qw(strftime);
78my @ts;
79if (defined($ENV{SOURCE_DATE_EPOCH})) {
80    @ts = gmtime($ENV{SOURCE_DATE_EPOCH});
81} else {
82    @ts = localtime;
83}
84my $date = strftime "%Y-%m-%d", @ts;
85
86sub outseealso {
87    my (@sa) = @_;
88    my $comma = 0;
89    my @o;
90    push @o, ".SH SEE ALSO\n";
91    for my $s (sort @sa) {
92        push @o, sprintf "%s.BR $s", $comma ? ",\n": "";
93        $comma = 1;
94    }
95    push @o, "\n";
96    return @o;
97}
98
99sub outprotocols {
100    my (@p) = @_;
101    my $comma = 0;
102    my @o;
103    push @o, ".SH PROTOCOLS\n";
104
105    if($p[0] eq "TLS") {
106        push @o, "All TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc.";
107    }
108    else {
109        my @s = sort @p;
110        for my $e (sort @s) {
111            push @o, sprintf "%s$e",
112                $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
113            $comma = 1;
114        }
115    }
116    push @o, "\n";
117    return @o;
118}
119
120sub outtls {
121    my (@t) = @_;
122    my $comma = 0;
123    my @o;
124    if($t[0] eq "All") {
125        push @o, "\nAll TLS backends support this option.";
126    }
127    else {
128        push @o, "\nThis option works only with the following TLS backends:\n";
129        my @s = sort @t;
130        for my $e (@s) {
131            push @o, sprintf "%s$e",
132                $comma ? (($e eq $s[-1]) ? " and " : ", "): "";
133            $comma = 1;
134        }
135    }
136    push @o, "\n";
137    return @o;
138}
139
140my %knownprotos = (
141    'DICT' => 1,
142    'FILE' => 1,
143    'FTP' => 1,
144    'FTPS' => 1,
145    'GOPHER' => 1,
146    'GOPHERS' => 1,
147    'HTTP' => 1,
148    'HTTPS' => 1,
149    'IMAP' => 1,
150    'IMAPS' => 1,
151    'LDAP' => 1,
152    'LDAPS' => 1,
153    'MQTT' => 1,
154    'POP3' => 1,
155    'POP3S' => 1,
156    'RTMP' => 1,
157    'RTMPS' => 1,
158    'RTSP' => 1,
159    'SCP' => 1,
160    'SFTP' => 1,
161    'SMB' => 1,
162    'SMBS' => 1,
163    'SMTP' => 1,
164    'SMTPS' => 1,
165    'TELNET' => 1,
166    'TFTP' => 1,
167    'WS' => 1,
168    'WSS' => 1,
169    'TLS' => 1,
170    'TCP' => 1,
171    'All' => 1
172    );
173
174my %knowntls = (
175    'BearSSL' => 1,
176    'GnuTLS' => 1,
177    'mbedTLS' => 1,
178    'OpenSSL' => 1,
179    'rustls' => 1,
180    'Schannel' => 1,
181    'Secure Transport' => 1,
182    'wolfSSL' => 1,
183    'All' => 1,
184    );
185
186sub single {
187    my @seealso;
188    my @proto;
189    my @tls;
190    my $d;
191    my ($f)=@_;
192    my $copyright;
193    my $errors = 0;
194    my $fh;
195    my $line;
196    my $list;
197    my $tlslist;
198    my $section;
199    my $source;
200    my $spdx;
201    my $start = 0;
202    my $title;
203
204    if(defined($f)) {
205        if(!open($fh, "<:crlf", "$f")) {
206            print STDERR "cd2nroff failed to open '$f' for reading: $!\n";
207            return 1;
208        }
209    }
210    else {
211        $f = "STDIN";
212        $fh = \*STDIN;
213        binmode($fh, ":crlf");
214    }
215    while(<$fh>) {
216        $line++;
217        if(!$start) {
218            if(/^---/) {
219                # header starts here
220                $start = 1;
221            }
222            next;
223        }
224        if(/^Title: *(.*)/i) {
225            $title=$1;
226        }
227        elsif(/^Section: *(.*)/i) {
228            $section=$1;
229        }
230        elsif(/^Source: *(.*)/i) {
231            $source=$1;
232        }
233        elsif(/^See-also: +(.*)/i) {
234            $list = 1; # 1 for see-also
235            push @seealso, $1;
236        }
237        elsif(/^See-also: */i) {
238            if($seealso[0]) {
239                print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n";
240                return 2;
241            }
242            $list = 1; # 1 for see-also
243        }
244        elsif(/^Protocol:/i) {
245            $list = 2; # 2 for protocol
246        }
247        elsif(/^TLS-backend:/i) {
248            $list = 3; # 3 for TLS backend
249        }
250        elsif(/^ +- (.*)/i) {
251            # the only lists we support are see-also and protocol
252            if($list == 1) {
253                push @seealso, $1;
254            }
255            elsif($list == 2) {
256                push @proto, $1;
257            }
258            elsif($list == 3) {
259                push @tls, $1;
260            }
261            else {
262                print STDERR "$f:$line:1:ERROR: list item without owner?\n";
263                return 2;
264            }
265        }
266        # REUSE-IgnoreStart
267        elsif(/^C: (.*)/i) {
268            $copyright=$1;
269        }
270        elsif(/^SPDX-License-Identifier: (.*)/i) {
271            $spdx=$1;
272        }
273        # REUSE-IgnoreEnd
274        elsif(/^---/) {
275            # end of the header section
276            if(!$title) {
277                print STDERR "ERROR: no 'Title:' in $f\n";
278                return 1;
279            }
280            if(!$section) {
281                print STDERR "ERROR: no 'Section:' in $f\n";
282                return 2;
283            }
284            if(!$seealso[0]) {
285                print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n";
286                return 2;
287            }
288            if(!$copyright) {
289                print STDERR "$f:$line:1:ERROR: no 'C:' field present\n";
290                return 2;
291            }
292            if(!$spdx) {
293                print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n";
294                return 2;
295            }
296            if($section == 3) {
297                if(!$proto[0]) {
298                    printf STDERR "$f:$line:1:ERROR: missing Protocol:\n";
299                    exit 2;
300                }
301                my $tls = 0;
302                for my $p (@proto) {
303                    if($p eq "TLS") {
304                        $tls = 1;
305                    }
306                    if(!$knownprotos{$p}) {
307                        printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n";
308                        exit 2;
309                    }
310                }
311                # This is for TLS, require TLS-backend:
312                if($tls) {
313                    if(!$tls[0]) {
314                        printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n";
315                        exit 2;
316                    }
317                    for my $t (@tls) {
318                        if(!$knowntls{$t}) {
319                            printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n";
320                            exit 2;
321                        }
322                    }
323                }
324            }
325            last;
326        }
327        else {
328            chomp;
329            print STDERR "WARN: unrecognized line in $f, ignoring:\n:'$_';"
330        }
331    }
332
333    if(!$start) {
334        print STDERR "$f:$line:1:ERROR: no header present\n";
335        return 2;
336    }
337
338    my @desc;
339    my $quote = 0;
340    my $blankline = 0;
341    my $header = 0;
342
343    # cut off the leading path from the file name, if any
344    $f =~ s/^(.*[\\\/])//;
345
346    push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n";
347    push @desc, ".TH $title $section \"$date\" $source\n";
348    while(<$fh>) {
349        $line++;
350
351        $d = $_;
352
353        if($quote) {
354            if($quote == 4) {
355                # remove the indentation
356                if($d =~ /^    (.*)/) {
357                    push @desc, "$1\n";
358                    next;
359                }
360                else {
361                    # end of quote
362                    $quote = 0;
363                    push @desc, ".fi\n";
364                    next;
365                }
366            }
367            if(/^~~~/) {
368                # end of quote
369                $quote = 0;
370                push @desc, ".fi\n";
371                next;
372            }
373            # convert single backslahes to doubles
374            $d =~ s/\\/\\\\/g;
375            # lines starting with a period needs it escaped
376            $d =~ s/^\./\\&./;
377            push @desc, $d;
378            next;
379        }
380
381        # remove single line HTML comments
382        $d =~ s/<!--.*?-->//g;
383
384        # **bold**
385        $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g;
386        # *italics*
387        $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g;
388
389        if($d =~ /[^\\][\<\>]/) {
390            print STDERR "$f:$line:1:WARN: un-escaped < or > used\n";
391        }
392        # convert backslash-'<' or '> to just the second character
393        $d =~ s/\\([<>])/$1/g;
394
395        # mentions of curl symbols with man pages use italics by default
396        $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi;
397
398        # backticked becomes italics
399        $d =~ s/\`(.*?)\`/\\fI$1\\fP/g;
400
401        if(/^## (.*)/) {
402            my $word = $1;
403            # if there are enclosing quotes, remove them first
404            $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/;
405
406            # enclose in double quotes if there is a space present
407            if($word =~ / /) {
408                push @desc, ".IP \"$word\"\n";
409            }
410            else {
411                push @desc, ".IP $word\n";
412            }
413            $header = 1;
414        }
415        elsif(/^# (.*)/) {
416            my $word = $1;
417            # if there are enclosing quotes, remove them first
418            $word =~ s/[\"\'](.*)[\"\']\z/$1/;
419
420            if($word eq "PROTOCOLS") {
421                print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n";
422            }
423            elsif($word eq "EXAMPLE") {
424                # insert the generated PROTOCOLS section before EXAMPLE
425                push @desc, outprotocols(@proto);
426
427                if($proto[0] eq "TLS") {
428                    push @desc, outtls(@tls);
429                }
430            }
431            push @desc, ".SH $word\n";
432            $header = 1;
433        }
434        elsif(/^~~~c/) {
435            # start of a code section, not indented
436            $quote = 1;
437            push @desc, "\n" if($blankline && !$header);
438            $header = 0;
439            push @desc, ".nf\n";
440        }
441        elsif(/^~~~/) {
442            # start of a quote section; not code, not indented
443            $quote = 1;
444            push @desc, "\n" if($blankline && !$header);
445            $header = 0;
446            push @desc, ".nf\n";
447        }
448        elsif(/^    (.*)/) {
449            # quoted, indented by 4 space
450            $quote = 4;
451            push @desc, "\n" if($blankline && !$header);
452            $header = 0;
453            push @desc, ".nf\n$1\n";
454        }
455        elsif(/^[ \t]*\n/) {
456            # count and ignore blank lines
457            $blankline++;
458        }
459        else {
460            # don't output newlines if this is the first content after a
461            # header
462            push @desc, "\n" if($blankline && !$header);
463            $blankline = 0;
464            $header = 0;
465
466            # quote minuses in the output
467            $d =~ s/([^\\])-/$1\\-/g;
468            # replace single quotes
469            $d =~ s/\'/\\(aq/g;
470            # handle double quotes first on the line
471            $d =~ s/^(\s*)\"/$1\\&\"/;
472
473            # lines starting with a period needs it escaped
474            $d =~ s/^\./\\&./;
475
476            if($d =~ /^(.*)  /) {
477                printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n",
478                    length($1);
479                $errors++;
480            }
481            if($d =~ /^[ \t]*\n/) {
482                # replaced away all contents
483                $blankline= 1;
484            }
485            else {
486                push @desc, $d;
487            }
488        }
489    }
490    if($fh != \*STDIN) {
491        close($fh);
492    }
493    push @desc, outseealso(@seealso);
494    if($dir) {
495        if($keepfilename) {
496            $title = $f;
497            $title =~ s/\.[^.]*$//;
498        }
499        my $outfile = "$dir/$title.$section";
500        if(defined($extension)) {
501            $outfile .= $extension;
502        }
503        if(!open(O, ">", $outfile)) {
504            print STDERR "Failed to open $outfile : $!\n";
505            return 1;
506        }
507        print O @desc;
508        close(O);
509    }
510    else {
511        print @desc;
512    }
513    return $errors;
514}
515
516if(@ARGV) {
517    for my $f (@ARGV) {
518        my $r = single($f);
519        if($r) {
520            exit $r;
521        }
522    }
523}
524else {
525    exit single();
526}
527