xref: /curl/.github/scripts/binarycheck.pl (revision e3240db0)
1#!/usr/bin/env perl
2#***************************************************************************
3#                                  _   _ ____  _
4#  Project                     ___| | | |  _ \| |
5#                             / __| | | | |_) | |
6#                            | (__| |_| |  _ <| |___
7#                             \___|\___/|_| \_\_____|
8#
9# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
10#
11# This software is licensed as described in the file COPYING, which
12# you should have received as part of this distribution. The terms
13# are also available at https://curl.se/docs/copyright.html.
14#
15# You may opt to use, copy, modify, merge, publish, distribute and/or sell
16# copies of the Software, and permit persons to whom the Software is
17# furnished to do so, under the terms of the COPYING file.
18#
19# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20# KIND, either express or implied.
21#
22# SPDX-License-Identifier: curl
23#
24###########################################################################
25
26# This scripts scans the entire git repository for binary files.
27#
28# All files in the git repo that contain signs of being binary are then
29# collected and a sha256sum is generated for all of them. That summary is then
30# compared to the list of pre-vetted files so that only the exact copies of
31# already scrutinized files are deemed okay to "appear binary".
32#
33
34use strict;
35use warnings;
36
37my $root = ".";
38my $sumsfile = ".github/scripts/binarycheck.sums";
39if($ARGV[0]) {
40    $root = $ARGV[0];
41}
42
43my @bin;
44my %known;
45my $error = 0;
46
47sub knownbins {
48    open(my $mh, "<", "$sumsfile") ||
49        die "can't read known binaries";
50    while(<$mh>) {
51        my $l = $_;
52        chomp $l;
53        if($l =~ /^([a-f0-9]+)  (.*)/) {
54            my ($sum, $file) = ($1, $2);
55            $known{$file} = 1;
56        }
57        elsif($l =~ /^#/) {
58            # skip comments
59        }
60        else {
61            print STDERR "suspicious line in $sumsfile\n";
62            $error++;
63        }
64    }
65    close($mh);
66}
67
68sub checkfile {
69    my ($file) = @_;
70    open(my $mh, "<", "$file") || die "can't read $file";
71    my $line = 0;
72    while(<$mh>) {
73        my $l = $_;
74        $line++;
75        if($l =~ /([\x00-\x08\x0b\x0c\x0e-\x1f\x7f])/) {
76            push @bin, $file;
77
78            if(!$known{$file}) {
79                printf STDERR "$file:$line has unknown binary contents\n";
80                $error++;
81            }
82            last;
83        }
84    }
85    close($mh);
86}
87
88my @files = `git ls-files -- $root`;
89
90if(scalar(@files) < 3000) {
91    # this means this is not the git source code repository or that git does
92    # not work, error out!
93    print STDERR "too few files in the git repository!\n";
94    exit 1;
95}
96
97knownbins();
98
99if(scalar(keys %known) < 10) {
100    print STDERR "too few known binaries in $sumsfile\n";
101    exit 2;
102}
103
104for my $f (@files) {
105    chomp $f;
106    checkfile("$root/$f");
107}
108
109my $check=system("sha256sum -c $sumsfile");
110if($check) {
111    print STDERR "sha256sum detected a problem\n";
112    $error++;
113}
114
115exit $error;
116