xref: /openssl/util/check-format-commit.sh (revision 4c29044a)
1#!/bin/bash
2# Copyright 2020-2024 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").
5# You may not use this file except in compliance with the License.
6# You can obtain a copy in the file LICENSE in the source distribution
7# or at https://www.openssl.org/source/license.html
8#
9# This script is a wrapper around check-format.pl.
10# It accepts the same commit revision range as 'git diff' as arguments,
11# or just a single commit id, and uses it to identify the files and line ranges
12# that were changed in that commit range, filtering check-format.pl output
13# only to lines that fall into the change ranges of the changed files.
14# examples:
15# check-format-commit.sh       # check unstaged changes
16# check-format-commit.sh HEAD
17# check-format-commit.sh @~3..
18# check-format-commit.sh f5981c9629667a5a5d6
19# check-format-commit.sh f5981c9629667a5a5d6..ee0bf38e8709bf71888
20
21# Allowlist of files to scan
22# Currently this is any .c or .h file (with an optional .in suffix)
23FILE_NAME_END_ALLOWLIST=("\.[ch]\(.in\)\?")
24
25# Global vars
26
27# TEMPDIR is used to hold any files this script creates
28# And is cleaned on EXIT with a trap function
29TEMPDIR=$(mktemp -d /tmp/checkformat.XXXXXX)
30
31# TOPDIR always points to the root of the git tree we are working in
32# used to locate the check-format.pl script
33TOPDIR=$(git rev-parse --show-toplevel)
34
35
36# cleanup handler function, returns us to the root of the git tree
37# and erases our temp directory
38cleanup() {
39    rm -rf $TEMPDIR
40    cd $TOPDIR
41}
42
43trap cleanup EXIT
44
45# Get the list of ids of the commits we are checking,
46# or empty for unstaged changes.
47# This lets us pass in symbolic ref names like master/etc and
48# resolve them to commit ids easily
49COMMIT_RANGE="$@"
50[ -n $COMMIT_RANGE ] && COMMIT_LAST=$(git rev-parse $COMMIT_RANGE)
51
52# Fail gracefully if git rev-parse doesn't produce a valid commit
53if [ $? -ne 0 ]
54then
55    echo "$1 is not a valid commit range or commit id"
56    exit 1
57fi
58
59# If the commit range is exactly one revision,
60# git rev-parse will output just the commit id of that one alone.
61# In that case, we must manipulate a little to get a desirable result,
62# as 'git diff' has a slightly different interpretation of a single commit id:
63# it takes that to mean all commits up to HEAD, plus any unstaged changes.
64if [ $(echo -n "$COMMIT_LAST" | wc -w) -ne 1 ]; then
65    COMMIT_LAST=$(echo "$COMMIT_LAST" | head -1)
66else
67    # $COMMIT_RANGE is just one commit, make it an actual range
68    COMMIT_RANGE=$COMMIT_RANGE^..$COMMIT_RANGE
69fi
70
71# Create an iterable list of files to check formatting on,
72# including the line ranges that are changed by the commits
73# It produces output of this format:
74# <file name> <change start line>, <change line count>
75git diff -U0 $COMMIT_RANGE | awk '
76    BEGIN {myfile=""}
77    /^\+\+\+/ { sub(/^b./,"",$2); file=$2 }
78    /^@@/     { sub(/^\+/,"",$3); range=$3; printf file " " range "\n" }
79    ' > $TEMPDIR/ranges.txt
80
81# filter in anything that matches on a filter regex
82for i in ${FILE_NAME_END_ALLOWLIST[@]}
83do
84    # Note the space after the $i below.  This is done because we want
85    # to match on file name suffixes, but the input file is of the form
86    # <commit> <file path> <range start>, <range length>
87    # So we can't just match on end of line.  The additional space
88    # here lets us match on suffixes followed by the expected space
89    # in the input file
90    grep "$i " $TEMPDIR/ranges.txt >> $TEMPDIR/ranges.filter || true
91done
92
93REMAINING_FILES=$(wc -l <$TEMPDIR/ranges.filter)
94if [ $REMAINING_FILES -eq 0 ]
95then
96    echo "The given commit range has no C source file changes that require checking"
97    exit 0
98fi
99
100# unless checking the format of unstaged changes,
101# check out the files from the commit range.
102if [ -n "$COMMIT_RANGE" ]
103then
104    # For each file name in ranges, we show that file at the commit range
105    # we are checking, and redirect it to the same path,
106    # relative to $TEMPDIR/check-format.
107    # This give us the full file path to run check-format.pl on
108    # with line numbers matching the ranges in the $TEMPDIR/ranges.filter file
109    for j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u)
110    do
111        FDIR=$(dirname $j)
112        mkdir -p $TEMPDIR/check-format/$FDIR
113        git show $COMMIT_LAST:$j > $TEMPDIR/check-format/$j
114    done
115fi
116
117# Now for each file in $TEMPDIR/ranges.filter, run check-format.pl
118for j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u)
119do
120    range_start=()
121    range_end=()
122
123    # Get the ranges for this file. Create 2 arrays.  range_start contains
124    # the start lines for valid ranges from the commit.  the range_end array
125    # contains the corresponding end line.  Note, since diff output gives us
126    # a line count for a change, the range_end[k] entry is actually
127    # range_start[k]+line count
128    for k in $(grep ^$j $TEMPDIR/ranges.filter | awk '{print $2}')
129    do
130        RSTART=$(echo $k | awk -F',' '{print $1}')
131        RLEN=$(echo $k | awk -F',' '{print $2}')
132        # when the hunk is just one line, its length is implied
133        if [ -z "$RLEN" ]; then RLEN=1; fi
134        let REND=$RSTART+$RLEN
135        range_start+=($RSTART)
136        range_end+=($REND)
137    done
138
139    # Go to our checked out tree, unless checking unstaged changes
140    [ -n "$COMMIT_RANGE" ] && cd $TEMPDIR/check-format
141
142    # Actually run check-format.pl on the file, capturing the output
143    # in a temporary file.  Note the format of check-format.pl output is
144    # <file path>:<line number>:<error text>:<offending line contents>
145    $TOPDIR/util/check-format.pl $j > $TEMPDIR/results.txt
146
147    # Now we filter the check-format.pl output based on the changed lines
148    # captured in the range_start/end arrays
149    let maxidx=${#range_start[@]}-1
150    for k in $(seq 0 1 $maxidx)
151    do
152        RSTART=${range_start[$k]}
153        REND=${range_end[$k]}
154
155        # field 2 of check-format.pl output is the offending line number
156        # Check here if any line in that output falls between any of the
157        # start/end ranges defined in the range_start/range_end array.
158        # If it does fall in that range, print the entire line to stdout
159        awk -v rstart=$RSTART -v rend=$REND -F':' '
160                /:/ { if (rstart <= $2 && $2 <= rend) print $0 }
161            ' $TEMPDIR/results.txt >>$TEMPDIR/results-filtered.txt
162    done
163done
164cat $TEMPDIR/results-filtered.txt
165
166# If any findings were in range, exit with a different error code
167if [ -s $TEMPDIR/results-filtered.txt ]
168then
169    exit 2
170fi
171