jaulib v1.3.0
Jau Support Library (C++, Java, ..)
SHASum.java
Go to the documentation of this file.
1/**
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2021 Gothel Software e.K.
4 * Copyright (c) 2019 Gothel Software e.K.
5 * Copyright (c) 2019 JogAmp Community.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27package org.jau.sec;
28
29import java.io.BufferedInputStream;
30import java.io.FileInputStream;
31import java.io.IOException;
32import java.io.FileNotFoundException;
33import java.io.InputStream;
34import java.net.URISyntaxException;
35import java.security.MessageDigest;
36import java.security.NoSuchAlgorithmException;
37import java.util.ArrayList;
38import java.util.Arrays;
39import java.util.List;
40import java.util.Locale;
41import java.util.regex.Pattern;
42
43import org.jau.io.IOUtil;
44import org.jau.sys.Debug;
45
46/**
47 * Utility class to produce secure hash (SHA) sums over diverse input sources.
48 * <p>
49 * See {@link #updateDigest(MessageDigest, List)}
50 * </p>
51 * <p>
52 * This implementation is being utilized at JogAmp build time to produce various
53 * SHA sums over sources, class files and native libraries to ensure their identity.
54 * See {@link JauVersion#getImplementationSHASources()},
55 * {@link JauVersion#getImplementationSHAClasses()}
56 * and {@link JauVersion#getImplementationSHANatives()}.
57 * </p>
58 * <p>
59 * {@link JauVersion#getImplementationSHASources()} for module gluegen is produced via:
60 * <pre>
61 * java -cp build/gluegen-rt.jar com.jogamp.common.util.SHASum --algorithm 256 --exclude ".*\\.log" --exclude "make/lib/toolchain" src jcpp/src make
62 * </pre>
63 * </p>
64 * @see #SHASum(MessageDigest, List, List, List)
65 * @see #compute(boolean)
66 * @see org.jau.pkg.TempJarSHASum
67 * @see #main(String[])
68 */
69public class SHASum {
70 private static final boolean DEBUG = Debug.debug("SHASum");
71
72 /**
73 * {@link MessageDigest#update(byte[], int, int) Updates} the given {@code digest}
74 * with the bytes contained by the files denoted by the given {@code filenames} in the given order.
75 * <p>
76 * To retrieve the list of all files traversing through directories, one may use {@link IOUtil#filesOf(List, List, List)}.
77 * </p>
78 * <p>
79 * The SHA implementation is sensitive to the order of input bytes and hence the given filename order.
80 * </p>
81 * <p>
82 * It is advised to pass given list of filenames in lexicographically sorted order to ensure reproducible outcome across all platforms,
83 * one may use {@link #sort(ArrayList)}.
84 * </p>
85 * <p>
86 * As an example, one could write
87 * <pre>
88 * final MessageDigest digest = ...;
89 * final long totalBytes = updateDigest(digest, sort(IOUtil.filesOf(Arrays.asList("sources"), null, null)));
90 * </pre>
91 * </p>
92 * @param digest to be updated digest
93 * @param filenames list of filenames denoting files, which bytes will be used to update the digest
94 * @return total number of bytes read.
95 * @throws FileNotFoundException see {@link FileInputStream#FileInputStream(String)}
96 * @throws IOException see {@link InputStream#read(byte[])}
97 */
98 public static long updateDigest(final MessageDigest digest, final List<String> filenames) throws IOException {
99 long numBytes = 0;
100 final byte buffer[] = new byte[4096]; // avoid Platform.getMachineDataInfo().pageSizeInBytes() due to native dependency
101 for(int i=0; i<filenames.size(); i++) {
102 final InputStream in = new BufferedInputStream(new FileInputStream(filenames.get(i)));
103 try {
104 while (true) {
105 int count;
106 if ((count = in.read(buffer)) == -1) {
107 break;
108 }
109 digest.update(buffer, 0, count);
110 numBytes += count;
111 }
112 } finally {
113 in.close();
114 }
115 }
116 return numBytes;
117 }
118
119 /**
120 * Simple helper to print the given byte-array into a string, here appended to StringBuilder
121 * @param shasum the given byte-array
122 * @param sb optional pre-existing StringBuilder, may be null
123 * @return return given or new StringBuilder with appended hex-string
124 */
125 public static StringBuilder toHexString(final byte[] shasum, StringBuilder sb) {
126 if( null == sb ) {
127 sb = new StringBuilder();
128 }
129 for(int i=0; i<shasum.length; i++) {
130 sb.append(String.format((Locale)null, "%02x", shasum[i]));
131 }
132 return sb;
133 }
134
135 /**
136 * Returns the sorted list of given strings using {@link String#compareTo(String)}'s lexicographically comparison.
137 * @param source given input strings
138 * @return sorted list of given strings
139 */
140 public static List<String> sort(final ArrayList<String> source) {
141 final String s[] = source.toArray(new String[source.size()]);
142 Arrays.sort(s, 0, s.length, null);
143 return Arrays.asList(s);
144 }
145
146 final MessageDigest digest;
147 final List<String> origins;
148 final List<Pattern> excludes, includes;
149
150 /**
151 * Instance to ensure proper {@link #compute(boolean)} of identical SHA sums over same contents within given paths across machines.
152 * <p>
153 * Instantiation of this class is lightweight, {@link #compute(boolean)} performs all operations.
154 * </p>
155 *
156 * @param digest the SHA algorithm
157 * @param origins the mandatory path origins to be used for {@link IOUtil#filesOf(List, List, List)}
158 * @param excludes the optional exclude patterns to be used for {@link IOUtil#filesOf(List, List, List)}
159 * @param includes the optional include patterns to be used for {@link IOUtil#filesOf(List, List, List)}
160 * @throws IllegalArgumentException
161 * @throws IOException
162 * @throws URISyntaxException
163 */
164 public SHASum(final MessageDigest digest, final List<String> origins, final List<Pattern> excludes, final List<Pattern> includes) {
165 this.digest = digest;
166 this.origins = origins;
167 this.excludes = excludes;
168 this.includes = includes;
169 }
170
171 /**
172 * Implementation gathers all files traversing through given paths via {@link IOUtil#filesOf(List, List, List)},
173 * sorts the resulting file list via {@link #sort(ArrayList)} and finally
174 * calculates the SHA sum over its byte content via {@link #updateDigest(MessageDigest, List)}.
175 * <p>
176 * This ensures identical SHA sums over same contents within given paths across machines.
177 * </p>
178 * <p>
179 * This method is heavyweight and performs all operations.
180 * </p>
181 *
182 * @param verbose if true, all used files will be dumped as well as the digest result
183 * @return the resulting SHA value
184 * @throws IOException
185 */
186 public final byte[] compute(final boolean verbose) throws IOException {
187 final List<String> fnamesS = SHASum.sort(IOUtil.filesOf(origins, excludes, includes));
188 if( verbose ) {
189 for(int i=0; i<fnamesS.size(); i++) {
190 System.err.println(fnamesS.get(i));
191 }
192 }
193 final long numBytes = SHASum.updateDigest(digest, fnamesS);
194 final byte[] shasum = digest.digest();
195 if( verbose ) {
196 System.err.println("Digested "+numBytes+" bytes, shasum size "+shasum.length+" bytes");
197 System.err.println("Digested result: "+SHASum.toHexString(shasum, null).toString());
198 }
199 return shasum;
200 }
201
202 public final List<String> getOrigins() { return origins; }
203 public final List<Pattern> getExcludes() { return excludes; }
204 public final List<Pattern> getIncludes() { return includes; }
205
206 /**
207 * Main entry point taking var-arg path or gnu-arguments with a leading '--'.
208 * <p>
209 * Implementation gathers all files traversing through given paths via {@link IOUtil#filesOf(List, List, List)},
210 * sorts the resulting file list via {@link #sort(ArrayList)} and finally
211 * calculates the SHA sum over its byte content via {@link #updateDigest(MessageDigest, List)}.
212 * This ensures identical SHA sums over same contents within given paths.
213 * </p>
214 * <p>
215 * Example to calculate the SHA-256 over our source files as performed for {@link JauVersion#getImplementationSHASources()}
216 * <pre>
217 * java -cp build/gluegen-rt.jar com.jogamp.common.util.SHASum --algorithm 256 --exclude ".*\\.log" --exclude "make/lib/toolchain" src jcpp/src make
218 * </pre>
219 * </p>
220 * <p>
221 * To validate the implementation, one can gather the sorted list of files (to ensure same order)
222 * <pre>
223 * java -cp build/gluegen-rt.jar com.jogamp.common.util.SHASum --listfilesonly --exclude ".*\\.log" --exclude "make/lib/toolchain" src jcpp/src make >& java.sorted.txt
224 * </pre>
225 * and then calculate the shasum independently
226 * <pre>
227 * find `cat java.sorted.txt` -exec cat {} + | shasum -a 256 -b - | awk '{print $1}'
228 * </pre>
229 * </p>
230 * @param args
231 * @throws IOException
232 * @throws URISyntaxException
233 * @throws IllegalArgumentException
234 */
235 public static void main(final String[] args) throws IOException {
236 boolean listFilesOnly = false;
237 int shabits = 256;
238 int i;
239 final ArrayList<String> pathU = new ArrayList<String>();
240 final ArrayList<Pattern> excludes = new ArrayList<Pattern>();
241 final ArrayList<Pattern> includes = new ArrayList<Pattern>();
242 {
243 for(i=0; i<args.length; i++) {
244 if(null != args[i]) {
245 if( args[i].startsWith("--") ) {
246 // options
247 if( args[i].equals("--algorithm")) {
248 shabits = Integer.parseInt(args[++i]);
249 } else if( args[i].equals("--exclude")) {
250 excludes.add(Pattern.compile(args[++i]));
251 if( DEBUG ) {
252 System.err.println("adding exclude: <"+args[i]+"> -> <"+excludes.get(excludes.size()-1)+">");
253 }
254 } else if( args[i].equals("--include")) {
255 includes.add(Pattern.compile(args[++i]));
256 if( DEBUG ) {
257 System.err.println("adding include: <"+args[i]+"> -> <"+includes.get(includes.size()-1)+">");
258 }
259 } else if( args[i].equals("--listfilesonly")) {
260 listFilesOnly = true;
261 } else {
262 System.err.println("Abort, unknown argument: "+args[i]);
263 return;
264 }
265 } else {
266 pathU.add(args[i]);
267 if( DEBUG ) {
268 System.err.println("adding path: <"+args[i]+">");
269 }
270 }
271 }
272 }
273 if( listFilesOnly ) {
274 final List<String> fnamesS = sort(IOUtil.filesOf(pathU, excludes, includes));
275 for(i=0; i<fnamesS.size(); i++) {
276 System.out.println(fnamesS.get(i));
277 }
278 return;
279 }
280 }
281 final String shaalgo = "SHA-"+shabits;
282 final MessageDigest digest;
283 try {
284 digest = MessageDigest.getInstance(shaalgo);
285 } catch (final NoSuchAlgorithmException e) {
286 System.err.println("Abort, implementation for "+shaalgo+" not available: "+e.getMessage());
287 return;
288 }
289 final SHASum shaSum = new SHASum(digest, pathU, excludes, includes);
290 System.out.println(toHexString(shaSum.compute(DEBUG), null).toString());
291 }
292}
static ArrayList< String > filesOf(final List< String > paths, final List< Pattern > excludes, final List< Pattern > includes)
Retrieve the list of all filenames traversing through given paths.
Definition: IOUtil.java:1295
Utility class to produce secure hash (SHA) sums over diverse input sources.
Definition: SHASum.java:69
static void main(final String[] args)
Main entry point taking var-arg path or gnu-arguments with a leading '–'.
Definition: SHASum.java:235
final List< Pattern > getIncludes()
Definition: SHASum.java:204
final List< Pattern > getExcludes()
Definition: SHASum.java:203
final List< String > getOrigins()
Definition: SHASum.java:202
SHASum(final MessageDigest digest, final List< String > origins, final List< Pattern > excludes, final List< Pattern > includes)
Instance to ensure proper compute(boolean) of identical SHA sums over same contents within given path...
Definition: SHASum.java:164
static long updateDigest(final MessageDigest digest, final List< String > filenames)
Updates the given digest with the bytes contained by the files denoted by the given filenames in the ...
Definition: SHASum.java:98
static List< String > sort(final ArrayList< String > source)
Returns the sorted list of given strings using String#compareTo(String)'s lexicographically compariso...
Definition: SHASum.java:140
final byte[] compute(final boolean verbose)
Implementation gathers all files traversing through given paths via IOUtil#filesOf(List,...
Definition: SHASum.java:186
static StringBuilder toHexString(final byte[] shasum, StringBuilder sb)
Simple helper to print the given byte-array into a string, here appended to StringBuilder.
Definition: SHASum.java:125
Helper routines for logging and debugging.
Definition: Debug.java:35
static final boolean debug(final String subcomponent)
Definition: Debug.java:63